1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 MCContext *Ctx = nullptr; 1129 1130 void usesSgprAt(int i) { 1131 if (i >= SgprIndexUnusedMin) { 1132 SgprIndexUnusedMin = ++i; 1133 if (Ctx) { 1134 MCSymbol* const Sym = 1135 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1136 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1137 } 1138 } 1139 } 1140 1141 void usesVgprAt(int i) { 1142 if (i >= VgprIndexUnusedMin) { 1143 VgprIndexUnusedMin = ++i; 1144 if (Ctx) { 1145 MCSymbol* const Sym = 1146 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1147 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1148 } 1149 } 1150 } 1151 1152 public: 1153 KernelScopeInfo() = default; 1154 1155 void initialize(MCContext &Context) { 1156 Ctx = &Context; 1157 usesSgprAt(SgprIndexUnusedMin = -1); 1158 usesVgprAt(VgprIndexUnusedMin = -1); 1159 } 1160 1161 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1162 switch (RegKind) { 1163 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1164 case IS_AGPR: // fall through 1165 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1166 default: break; 1167 } 1168 } 1169 }; 1170 1171 class AMDGPUAsmParser : public MCTargetAsmParser { 1172 MCAsmParser &Parser; 1173 1174 // Number of extra operands parsed after the first optional operand. 1175 // This may be necessary to skip hardcoded mandatory operands. 1176 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1177 1178 unsigned ForcedEncodingSize = 0; 1179 bool ForcedDPP = false; 1180 bool ForcedSDWA = false; 1181 KernelScopeInfo KernelScope; 1182 unsigned CPolSeen; 1183 1184 /// @name Auto-generated Match Functions 1185 /// { 1186 1187 #define GET_ASSEMBLER_HEADER 1188 #include "AMDGPUGenAsmMatcher.inc" 1189 1190 /// } 1191 1192 private: 1193 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1194 bool OutOfRangeError(SMRange Range); 1195 /// Calculate VGPR/SGPR blocks required for given target, reserved 1196 /// registers, and user-specified NextFreeXGPR values. 1197 /// 1198 /// \param Features [in] Target features, used for bug corrections. 1199 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1200 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1201 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1202 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1203 /// descriptor field, if valid. 1204 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1205 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1206 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1207 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1208 /// \param VGPRBlocks [out] Result VGPR block count. 1209 /// \param SGPRBlocks [out] Result SGPR block count. 1210 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1211 bool FlatScrUsed, bool XNACKUsed, 1212 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1213 SMRange VGPRRange, unsigned NextFreeSGPR, 1214 SMRange SGPRRange, unsigned &VGPRBlocks, 1215 unsigned &SGPRBlocks); 1216 bool ParseDirectiveAMDGCNTarget(); 1217 bool ParseDirectiveAMDHSAKernel(); 1218 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1219 bool ParseDirectiveHSACodeObjectVersion(); 1220 bool ParseDirectiveHSACodeObjectISA(); 1221 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1222 bool ParseDirectiveAMDKernelCodeT(); 1223 // TODO: Possibly make subtargetHasRegister const. 1224 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1225 bool ParseDirectiveAMDGPUHsaKernel(); 1226 1227 bool ParseDirectiveISAVersion(); 1228 bool ParseDirectiveHSAMetadata(); 1229 bool ParseDirectivePALMetadataBegin(); 1230 bool ParseDirectivePALMetadata(); 1231 bool ParseDirectiveAMDGPULDS(); 1232 1233 /// Common code to parse out a block of text (typically YAML) between start and 1234 /// end directives. 1235 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1236 const char *AssemblerDirectiveEnd, 1237 std::string &CollectString); 1238 1239 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1240 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1241 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1242 unsigned &RegNum, unsigned &RegWidth, 1243 bool RestoreOnFailure = false); 1244 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1245 unsigned &RegNum, unsigned &RegWidth, 1246 SmallVectorImpl<AsmToken> &Tokens); 1247 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1248 unsigned &RegWidth, 1249 SmallVectorImpl<AsmToken> &Tokens); 1250 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1251 unsigned &RegWidth, 1252 SmallVectorImpl<AsmToken> &Tokens); 1253 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1254 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1255 bool ParseRegRange(unsigned& Num, unsigned& Width); 1256 unsigned getRegularReg(RegisterKind RegKind, 1257 unsigned RegNum, 1258 unsigned RegWidth, 1259 SMLoc Loc); 1260 1261 bool isRegister(); 1262 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1263 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1264 void initializeGprCountSymbol(RegisterKind RegKind); 1265 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1266 unsigned RegWidth); 1267 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1268 bool IsAtomic, bool IsLds = false); 1269 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1270 bool IsGdsHardcoded); 1271 1272 public: 1273 enum AMDGPUMatchResultTy { 1274 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1275 }; 1276 enum OperandMode { 1277 OperandMode_Default, 1278 OperandMode_NSA, 1279 }; 1280 1281 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1282 1283 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1284 const MCInstrInfo &MII, 1285 const MCTargetOptions &Options) 1286 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1287 MCAsmParserExtension::Initialize(Parser); 1288 1289 if (getFeatureBits().none()) { 1290 // Set default features. 1291 copySTI().ToggleFeature("southern-islands"); 1292 } 1293 1294 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1295 1296 { 1297 // TODO: make those pre-defined variables read-only. 1298 // Currently there is none suitable machinery in the core llvm-mc for this. 1299 // MCSymbol::isRedefinable is intended for another purpose, and 1300 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1301 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1302 MCContext &Ctx = getContext(); 1303 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } else { 1312 MCSymbol *Sym = 1313 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1315 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1316 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1317 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1318 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1319 } 1320 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1321 initializeGprCountSymbol(IS_VGPR); 1322 initializeGprCountSymbol(IS_SGPR); 1323 } else 1324 KernelScope.initialize(getContext()); 1325 } 1326 } 1327 1328 bool hasMIMG_R128() const { 1329 return AMDGPU::hasMIMG_R128(getSTI()); 1330 } 1331 1332 bool hasPackedD16() const { 1333 return AMDGPU::hasPackedD16(getSTI()); 1334 } 1335 1336 bool hasGFX10A16() const { 1337 return AMDGPU::hasGFX10A16(getSTI()); 1338 } 1339 1340 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1341 1342 bool isSI() const { 1343 return AMDGPU::isSI(getSTI()); 1344 } 1345 1346 bool isCI() const { 1347 return AMDGPU::isCI(getSTI()); 1348 } 1349 1350 bool isVI() const { 1351 return AMDGPU::isVI(getSTI()); 1352 } 1353 1354 bool isGFX9() const { 1355 return AMDGPU::isGFX9(getSTI()); 1356 } 1357 1358 bool isGFX90A() const { 1359 return AMDGPU::isGFX90A(getSTI()); 1360 } 1361 1362 bool isGFX9Plus() const { 1363 return AMDGPU::isGFX9Plus(getSTI()); 1364 } 1365 1366 bool isGFX10() const { 1367 return AMDGPU::isGFX10(getSTI()); 1368 } 1369 1370 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1371 1372 bool isGFX10_BEncoding() const { 1373 return AMDGPU::isGFX10_BEncoding(getSTI()); 1374 } 1375 1376 bool hasInv2PiInlineImm() const { 1377 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1378 } 1379 1380 bool hasFlatOffsets() const { 1381 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1382 } 1383 1384 bool hasArchitectedFlatScratch() const { 1385 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1386 } 1387 1388 bool hasSGPR102_SGPR103() const { 1389 return !isVI() && !isGFX9(); 1390 } 1391 1392 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1393 1394 bool hasIntClamp() const { 1395 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1396 } 1397 1398 AMDGPUTargetStreamer &getTargetStreamer() { 1399 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1400 return static_cast<AMDGPUTargetStreamer &>(TS); 1401 } 1402 1403 const MCRegisterInfo *getMRI() const { 1404 // We need this const_cast because for some reason getContext() is not const 1405 // in MCAsmParser. 1406 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1407 } 1408 1409 const MCInstrInfo *getMII() const { 1410 return &MII; 1411 } 1412 1413 const FeatureBitset &getFeatureBits() const { 1414 return getSTI().getFeatureBits(); 1415 } 1416 1417 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1418 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1419 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1420 1421 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1422 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1423 bool isForcedDPP() const { return ForcedDPP; } 1424 bool isForcedSDWA() const { return ForcedSDWA; } 1425 ArrayRef<unsigned> getMatchedVariants() const; 1426 StringRef getMatchedVariantName() const; 1427 1428 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1429 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1430 bool RestoreOnFailure); 1431 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1432 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1433 SMLoc &EndLoc) override; 1434 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1435 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1436 unsigned Kind) override; 1437 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1438 OperandVector &Operands, MCStreamer &Out, 1439 uint64_t &ErrorInfo, 1440 bool MatchingInlineAsm) override; 1441 bool ParseDirective(AsmToken DirectiveID) override; 1442 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1443 OperandMode Mode = OperandMode_Default); 1444 StringRef parseMnemonicSuffix(StringRef Name); 1445 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1446 SMLoc NameLoc, OperandVector &Operands) override; 1447 //bool ProcessInstruction(MCInst &Inst); 1448 1449 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1450 1451 OperandMatchResultTy 1452 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1453 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1454 bool (*ConvertResult)(int64_t &) = nullptr); 1455 1456 OperandMatchResultTy 1457 parseOperandArrayWithPrefix(const char *Prefix, 1458 OperandVector &Operands, 1459 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1460 bool (*ConvertResult)(int64_t&) = nullptr); 1461 1462 OperandMatchResultTy 1463 parseNamedBit(StringRef Name, OperandVector &Operands, 1464 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1465 OperandMatchResultTy parseCPol(OperandVector &Operands); 1466 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1467 StringRef &Value, 1468 SMLoc &StringLoc); 1469 1470 bool isModifier(); 1471 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1474 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1475 bool parseSP3NegModifier(); 1476 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1477 OperandMatchResultTy parseReg(OperandVector &Operands); 1478 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1479 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1480 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1481 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1482 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1483 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1484 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1485 OperandMatchResultTy parseUfmt(int64_t &Format); 1486 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1487 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1488 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1489 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1490 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1491 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1492 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1493 1494 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1495 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1496 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1497 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1498 1499 bool parseCnt(int64_t &IntVal); 1500 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1501 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1502 1503 private: 1504 struct OperandInfoTy { 1505 SMLoc Loc; 1506 int64_t Id; 1507 bool IsSymbolic = false; 1508 bool IsDefined = false; 1509 1510 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1511 }; 1512 1513 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1514 bool validateSendMsg(const OperandInfoTy &Msg, 1515 const OperandInfoTy &Op, 1516 const OperandInfoTy &Stream); 1517 1518 bool parseHwregBody(OperandInfoTy &HwReg, 1519 OperandInfoTy &Offset, 1520 OperandInfoTy &Width); 1521 bool validateHwreg(const OperandInfoTy &HwReg, 1522 const OperandInfoTy &Offset, 1523 const OperandInfoTy &Width); 1524 1525 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1526 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1527 1528 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1529 const OperandVector &Operands) const; 1530 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1531 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1532 SMLoc getLitLoc(const OperandVector &Operands) const; 1533 SMLoc getConstLoc(const OperandVector &Operands) const; 1534 1535 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1536 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1538 bool validateSOPLiteral(const MCInst &Inst) const; 1539 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateIntClampSupported(const MCInst &Inst); 1542 bool validateMIMGAtomicDMask(const MCInst &Inst); 1543 bool validateMIMGGatherDMask(const MCInst &Inst); 1544 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1545 bool validateMIMGDataSize(const MCInst &Inst); 1546 bool validateMIMGAddrSize(const MCInst &Inst); 1547 bool validateMIMGD16(const MCInst &Inst); 1548 bool validateMIMGDim(const MCInst &Inst); 1549 bool validateMIMGMSAA(const MCInst &Inst); 1550 bool validateOpSel(const MCInst &Inst); 1551 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1552 bool validateVccOperand(unsigned Reg) const; 1553 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1555 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1556 bool validateAGPRLdSt(const MCInst &Inst) const; 1557 bool validateVGPRAlign(const MCInst &Inst) const; 1558 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1559 bool validateDivScale(const MCInst &Inst); 1560 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1561 const SMLoc &IDLoc); 1562 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1563 unsigned getConstantBusLimit(unsigned Opcode) const; 1564 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1565 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1566 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1567 1568 bool isSupportedMnemo(StringRef Mnemo, 1569 const FeatureBitset &FBS); 1570 bool isSupportedMnemo(StringRef Mnemo, 1571 const FeatureBitset &FBS, 1572 ArrayRef<unsigned> Variants); 1573 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1574 1575 bool isId(const StringRef Id) const; 1576 bool isId(const AsmToken &Token, const StringRef Id) const; 1577 bool isToken(const AsmToken::TokenKind Kind) const; 1578 bool trySkipId(const StringRef Id); 1579 bool trySkipId(const StringRef Pref, const StringRef Id); 1580 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1581 bool trySkipToken(const AsmToken::TokenKind Kind); 1582 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1583 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1584 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1585 1586 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1587 AsmToken::TokenKind getTokenKind() const; 1588 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1589 bool parseExpr(OperandVector &Operands); 1590 StringRef getTokenStr() const; 1591 AsmToken peekToken(); 1592 AsmToken getToken() const; 1593 SMLoc getLoc() const; 1594 void lex(); 1595 1596 public: 1597 void onBeginOfFile() override; 1598 1599 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1600 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1601 1602 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1603 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1604 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1605 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1606 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1607 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1608 1609 bool parseSwizzleOperand(int64_t &Op, 1610 const unsigned MinVal, 1611 const unsigned MaxVal, 1612 const StringRef ErrMsg, 1613 SMLoc &Loc); 1614 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1615 const unsigned MinVal, 1616 const unsigned MaxVal, 1617 const StringRef ErrMsg); 1618 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1619 bool parseSwizzleOffset(int64_t &Imm); 1620 bool parseSwizzleMacro(int64_t &Imm); 1621 bool parseSwizzleQuadPerm(int64_t &Imm); 1622 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1623 bool parseSwizzleBroadcast(int64_t &Imm); 1624 bool parseSwizzleSwap(int64_t &Imm); 1625 bool parseSwizzleReverse(int64_t &Imm); 1626 1627 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1628 int64_t parseGPRIdxMacro(); 1629 1630 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1631 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1632 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1633 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1634 1635 AMDGPUOperand::Ptr defaultCPol() const; 1636 1637 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1638 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1639 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1640 AMDGPUOperand::Ptr defaultFlatOffset() const; 1641 1642 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1643 1644 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1645 OptionalImmIndexMap &OptionalIdx); 1646 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1647 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1648 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1649 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1650 OptionalImmIndexMap &OptionalIdx); 1651 1652 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1653 1654 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1655 bool IsAtomic = false); 1656 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1657 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1658 1659 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1660 1661 bool parseDimId(unsigned &Encoding); 1662 OperandMatchResultTy parseDim(OperandVector &Operands); 1663 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1664 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1665 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1666 int64_t parseDPPCtrlSel(StringRef Ctrl); 1667 int64_t parseDPPCtrlPerm(); 1668 AMDGPUOperand::Ptr defaultRowMask() const; 1669 AMDGPUOperand::Ptr defaultBankMask() const; 1670 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1671 AMDGPUOperand::Ptr defaultFI() const; 1672 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1673 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1674 1675 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1676 AMDGPUOperand::ImmTy Type); 1677 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1678 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1679 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1680 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1681 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1682 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1683 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1684 uint64_t BasicInstType, 1685 bool SkipDstVcc = false, 1686 bool SkipSrcVcc = false); 1687 1688 AMDGPUOperand::Ptr defaultBLGP() const; 1689 AMDGPUOperand::Ptr defaultCBSZ() const; 1690 AMDGPUOperand::Ptr defaultABID() const; 1691 1692 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1693 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1694 }; 1695 1696 struct OptionalOperand { 1697 const char *Name; 1698 AMDGPUOperand::ImmTy Type; 1699 bool IsBit; 1700 bool (*ConvertResult)(int64_t&); 1701 }; 1702 1703 } // end anonymous namespace 1704 1705 // May be called with integer type with equivalent bitwidth. 1706 static const fltSemantics *getFltSemantics(unsigned Size) { 1707 switch (Size) { 1708 case 4: 1709 return &APFloat::IEEEsingle(); 1710 case 8: 1711 return &APFloat::IEEEdouble(); 1712 case 2: 1713 return &APFloat::IEEEhalf(); 1714 default: 1715 llvm_unreachable("unsupported fp type"); 1716 } 1717 } 1718 1719 static const fltSemantics *getFltSemantics(MVT VT) { 1720 return getFltSemantics(VT.getSizeInBits() / 8); 1721 } 1722 1723 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1724 switch (OperandType) { 1725 case AMDGPU::OPERAND_REG_IMM_INT32: 1726 case AMDGPU::OPERAND_REG_IMM_FP32: 1727 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1728 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1729 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1733 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1734 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1735 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1736 case AMDGPU::OPERAND_KIMM32: 1737 return &APFloat::IEEEsingle(); 1738 case AMDGPU::OPERAND_REG_IMM_INT64: 1739 case AMDGPU::OPERAND_REG_IMM_FP64: 1740 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1741 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1743 return &APFloat::IEEEdouble(); 1744 case AMDGPU::OPERAND_REG_IMM_INT16: 1745 case AMDGPU::OPERAND_REG_IMM_FP16: 1746 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1747 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1748 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1751 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1752 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1753 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1754 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1755 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1756 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1757 case AMDGPU::OPERAND_KIMM16: 1758 return &APFloat::IEEEhalf(); 1759 default: 1760 llvm_unreachable("unsupported fp type"); 1761 } 1762 } 1763 1764 //===----------------------------------------------------------------------===// 1765 // Operand 1766 //===----------------------------------------------------------------------===// 1767 1768 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1769 bool Lost; 1770 1771 // Convert literal to single precision 1772 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1773 APFloat::rmNearestTiesToEven, 1774 &Lost); 1775 // We allow precision lost but not overflow or underflow 1776 if (Status != APFloat::opOK && 1777 Lost && 1778 ((Status & APFloat::opOverflow) != 0 || 1779 (Status & APFloat::opUnderflow) != 0)) { 1780 return false; 1781 } 1782 1783 return true; 1784 } 1785 1786 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1787 return isUIntN(Size, Val) || isIntN(Size, Val); 1788 } 1789 1790 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1791 if (VT.getScalarType() == MVT::i16) { 1792 // FP immediate values are broken. 1793 return isInlinableIntLiteral(Val); 1794 } 1795 1796 // f16/v2f16 operands work correctly for all values. 1797 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1798 } 1799 1800 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1801 1802 // This is a hack to enable named inline values like 1803 // shared_base with both 32-bit and 64-bit operands. 1804 // Note that these values are defined as 1805 // 32-bit operands only. 1806 if (isInlineValue()) { 1807 return true; 1808 } 1809 1810 if (!isImmTy(ImmTyNone)) { 1811 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1812 return false; 1813 } 1814 // TODO: We should avoid using host float here. It would be better to 1815 // check the float bit values which is what a few other places do. 1816 // We've had bot failures before due to weird NaN support on mips hosts. 1817 1818 APInt Literal(64, Imm.Val); 1819 1820 if (Imm.IsFPImm) { // We got fp literal token 1821 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1822 return AMDGPU::isInlinableLiteral64(Imm.Val, 1823 AsmParser->hasInv2PiInlineImm()); 1824 } 1825 1826 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1827 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1828 return false; 1829 1830 if (type.getScalarSizeInBits() == 16) { 1831 return isInlineableLiteralOp16( 1832 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1833 type, AsmParser->hasInv2PiInlineImm()); 1834 } 1835 1836 // Check if single precision literal is inlinable 1837 return AMDGPU::isInlinableLiteral32( 1838 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1839 AsmParser->hasInv2PiInlineImm()); 1840 } 1841 1842 // We got int literal token. 1843 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1844 return AMDGPU::isInlinableLiteral64(Imm.Val, 1845 AsmParser->hasInv2PiInlineImm()); 1846 } 1847 1848 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1849 return false; 1850 } 1851 1852 if (type.getScalarSizeInBits() == 16) { 1853 return isInlineableLiteralOp16( 1854 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1855 type, AsmParser->hasInv2PiInlineImm()); 1856 } 1857 1858 return AMDGPU::isInlinableLiteral32( 1859 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1860 AsmParser->hasInv2PiInlineImm()); 1861 } 1862 1863 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1864 // Check that this immediate can be added as literal 1865 if (!isImmTy(ImmTyNone)) { 1866 return false; 1867 } 1868 1869 if (!Imm.IsFPImm) { 1870 // We got int literal token. 1871 1872 if (type == MVT::f64 && hasFPModifiers()) { 1873 // Cannot apply fp modifiers to int literals preserving the same semantics 1874 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1875 // disable these cases. 1876 return false; 1877 } 1878 1879 unsigned Size = type.getSizeInBits(); 1880 if (Size == 64) 1881 Size = 32; 1882 1883 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1884 // types. 1885 return isSafeTruncation(Imm.Val, Size); 1886 } 1887 1888 // We got fp literal token 1889 if (type == MVT::f64) { // Expected 64-bit fp operand 1890 // We would set low 64-bits of literal to zeroes but we accept this literals 1891 return true; 1892 } 1893 1894 if (type == MVT::i64) { // Expected 64-bit int operand 1895 // We don't allow fp literals in 64-bit integer instructions. It is 1896 // unclear how we should encode them. 1897 return false; 1898 } 1899 1900 // We allow fp literals with f16x2 operands assuming that the specified 1901 // literal goes into the lower half and the upper half is zero. We also 1902 // require that the literal may be losslesly converted to f16. 1903 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1904 (type == MVT::v2i16)? MVT::i16 : 1905 (type == MVT::v2f32)? MVT::f32 : type; 1906 1907 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1908 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1909 } 1910 1911 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1912 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1913 } 1914 1915 bool AMDGPUOperand::isVRegWithInputMods() const { 1916 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1917 // GFX90A allows DPP on 64-bit operands. 1918 (isRegClass(AMDGPU::VReg_64RegClassID) && 1919 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1923 if (AsmParser->isVI()) 1924 return isVReg32(); 1925 else if (AsmParser->isGFX9Plus()) 1926 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1927 else 1928 return false; 1929 } 1930 1931 bool AMDGPUOperand::isSDWAFP16Operand() const { 1932 return isSDWAOperand(MVT::f16); 1933 } 1934 1935 bool AMDGPUOperand::isSDWAFP32Operand() const { 1936 return isSDWAOperand(MVT::f32); 1937 } 1938 1939 bool AMDGPUOperand::isSDWAInt16Operand() const { 1940 return isSDWAOperand(MVT::i16); 1941 } 1942 1943 bool AMDGPUOperand::isSDWAInt32Operand() const { 1944 return isSDWAOperand(MVT::i32); 1945 } 1946 1947 bool AMDGPUOperand::isBoolReg() const { 1948 auto FB = AsmParser->getFeatureBits(); 1949 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1950 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1951 } 1952 1953 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1954 { 1955 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1956 assert(Size == 2 || Size == 4 || Size == 8); 1957 1958 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1959 1960 if (Imm.Mods.Abs) { 1961 Val &= ~FpSignMask; 1962 } 1963 if (Imm.Mods.Neg) { 1964 Val ^= FpSignMask; 1965 } 1966 1967 return Val; 1968 } 1969 1970 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1971 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1972 Inst.getNumOperands())) { 1973 addLiteralImmOperand(Inst, Imm.Val, 1974 ApplyModifiers & 1975 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1976 } else { 1977 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1978 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1979 setImmKindNone(); 1980 } 1981 } 1982 1983 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1984 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1985 auto OpNum = Inst.getNumOperands(); 1986 // Check that this operand accepts literals 1987 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1988 1989 if (ApplyModifiers) { 1990 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1991 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1992 Val = applyInputFPModifiers(Val, Size); 1993 } 1994 1995 APInt Literal(64, Val); 1996 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1997 1998 if (Imm.IsFPImm) { // We got fp literal token 1999 switch (OpTy) { 2000 case AMDGPU::OPERAND_REG_IMM_INT64: 2001 case AMDGPU::OPERAND_REG_IMM_FP64: 2002 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2003 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2004 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2005 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2006 AsmParser->hasInv2PiInlineImm())) { 2007 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2008 setImmKindConst(); 2009 return; 2010 } 2011 2012 // Non-inlineable 2013 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2014 // For fp operands we check if low 32 bits are zeros 2015 if (Literal.getLoBits(32) != 0) { 2016 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2017 "Can't encode literal as exact 64-bit floating-point operand. " 2018 "Low 32-bits will be set to zero"); 2019 } 2020 2021 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2022 setImmKindLiteral(); 2023 return; 2024 } 2025 2026 // We don't allow fp literals in 64-bit integer instructions. It is 2027 // unclear how we should encode them. This case should be checked earlier 2028 // in predicate methods (isLiteralImm()) 2029 llvm_unreachable("fp literal in 64-bit integer instruction."); 2030 2031 case AMDGPU::OPERAND_REG_IMM_INT32: 2032 case AMDGPU::OPERAND_REG_IMM_FP32: 2033 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2034 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2035 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2036 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2037 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2038 case AMDGPU::OPERAND_REG_IMM_INT16: 2039 case AMDGPU::OPERAND_REG_IMM_FP16: 2040 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2041 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2042 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2043 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2044 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2045 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2046 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2047 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2048 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2049 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2050 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2051 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2052 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2053 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2054 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2055 case AMDGPU::OPERAND_KIMM32: 2056 case AMDGPU::OPERAND_KIMM16: { 2057 bool lost; 2058 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2059 // Convert literal to single precision 2060 FPLiteral.convert(*getOpFltSemantics(OpTy), 2061 APFloat::rmNearestTiesToEven, &lost); 2062 // We allow precision lost but not overflow or underflow. This should be 2063 // checked earlier in isLiteralImm() 2064 2065 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2066 Inst.addOperand(MCOperand::createImm(ImmVal)); 2067 setImmKindLiteral(); 2068 return; 2069 } 2070 default: 2071 llvm_unreachable("invalid operand size"); 2072 } 2073 2074 return; 2075 } 2076 2077 // We got int literal token. 2078 // Only sign extend inline immediates. 2079 switch (OpTy) { 2080 case AMDGPU::OPERAND_REG_IMM_INT32: 2081 case AMDGPU::OPERAND_REG_IMM_FP32: 2082 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2083 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2084 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2086 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2087 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2089 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2090 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2091 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2092 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2093 if (isSafeTruncation(Val, 32) && 2094 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2095 AsmParser->hasInv2PiInlineImm())) { 2096 Inst.addOperand(MCOperand::createImm(Val)); 2097 setImmKindConst(); 2098 return; 2099 } 2100 2101 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2102 setImmKindLiteral(); 2103 return; 2104 2105 case AMDGPU::OPERAND_REG_IMM_INT64: 2106 case AMDGPU::OPERAND_REG_IMM_FP64: 2107 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2108 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2109 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2110 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2111 Inst.addOperand(MCOperand::createImm(Val)); 2112 setImmKindConst(); 2113 return; 2114 } 2115 2116 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2117 setImmKindLiteral(); 2118 return; 2119 2120 case AMDGPU::OPERAND_REG_IMM_INT16: 2121 case AMDGPU::OPERAND_REG_IMM_FP16: 2122 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2123 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2124 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2126 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2127 if (isSafeTruncation(Val, 16) && 2128 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2129 AsmParser->hasInv2PiInlineImm())) { 2130 Inst.addOperand(MCOperand::createImm(Val)); 2131 setImmKindConst(); 2132 return; 2133 } 2134 2135 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2136 setImmKindLiteral(); 2137 return; 2138 2139 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2140 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2141 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2142 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2143 assert(isSafeTruncation(Val, 16)); 2144 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2145 AsmParser->hasInv2PiInlineImm())); 2146 2147 Inst.addOperand(MCOperand::createImm(Val)); 2148 return; 2149 } 2150 case AMDGPU::OPERAND_KIMM32: 2151 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2152 setImmKindNone(); 2153 return; 2154 case AMDGPU::OPERAND_KIMM16: 2155 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2156 setImmKindNone(); 2157 return; 2158 default: 2159 llvm_unreachable("invalid operand size"); 2160 } 2161 } 2162 2163 template <unsigned Bitwidth> 2164 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2165 APInt Literal(64, Imm.Val); 2166 setImmKindNone(); 2167 2168 if (!Imm.IsFPImm) { 2169 // We got int literal token. 2170 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2171 return; 2172 } 2173 2174 bool Lost; 2175 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2176 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2177 APFloat::rmNearestTiesToEven, &Lost); 2178 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2179 } 2180 2181 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2182 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2183 } 2184 2185 static bool isInlineValue(unsigned Reg) { 2186 switch (Reg) { 2187 case AMDGPU::SRC_SHARED_BASE: 2188 case AMDGPU::SRC_SHARED_LIMIT: 2189 case AMDGPU::SRC_PRIVATE_BASE: 2190 case AMDGPU::SRC_PRIVATE_LIMIT: 2191 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2192 return true; 2193 case AMDGPU::SRC_VCCZ: 2194 case AMDGPU::SRC_EXECZ: 2195 case AMDGPU::SRC_SCC: 2196 return true; 2197 case AMDGPU::SGPR_NULL: 2198 return true; 2199 default: 2200 return false; 2201 } 2202 } 2203 2204 bool AMDGPUOperand::isInlineValue() const { 2205 return isRegKind() && ::isInlineValue(getReg()); 2206 } 2207 2208 //===----------------------------------------------------------------------===// 2209 // AsmParser 2210 //===----------------------------------------------------------------------===// 2211 2212 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2213 if (Is == IS_VGPR) { 2214 switch (RegWidth) { 2215 default: return -1; 2216 case 1: return AMDGPU::VGPR_32RegClassID; 2217 case 2: return AMDGPU::VReg_64RegClassID; 2218 case 3: return AMDGPU::VReg_96RegClassID; 2219 case 4: return AMDGPU::VReg_128RegClassID; 2220 case 5: return AMDGPU::VReg_160RegClassID; 2221 case 6: return AMDGPU::VReg_192RegClassID; 2222 case 7: return AMDGPU::VReg_224RegClassID; 2223 case 8: return AMDGPU::VReg_256RegClassID; 2224 case 16: return AMDGPU::VReg_512RegClassID; 2225 case 32: return AMDGPU::VReg_1024RegClassID; 2226 } 2227 } else if (Is == IS_TTMP) { 2228 switch (RegWidth) { 2229 default: return -1; 2230 case 1: return AMDGPU::TTMP_32RegClassID; 2231 case 2: return AMDGPU::TTMP_64RegClassID; 2232 case 4: return AMDGPU::TTMP_128RegClassID; 2233 case 8: return AMDGPU::TTMP_256RegClassID; 2234 case 16: return AMDGPU::TTMP_512RegClassID; 2235 } 2236 } else if (Is == IS_SGPR) { 2237 switch (RegWidth) { 2238 default: return -1; 2239 case 1: return AMDGPU::SGPR_32RegClassID; 2240 case 2: return AMDGPU::SGPR_64RegClassID; 2241 case 3: return AMDGPU::SGPR_96RegClassID; 2242 case 4: return AMDGPU::SGPR_128RegClassID; 2243 case 5: return AMDGPU::SGPR_160RegClassID; 2244 case 6: return AMDGPU::SGPR_192RegClassID; 2245 case 7: return AMDGPU::SGPR_224RegClassID; 2246 case 8: return AMDGPU::SGPR_256RegClassID; 2247 case 16: return AMDGPU::SGPR_512RegClassID; 2248 } 2249 } else if (Is == IS_AGPR) { 2250 switch (RegWidth) { 2251 default: return -1; 2252 case 1: return AMDGPU::AGPR_32RegClassID; 2253 case 2: return AMDGPU::AReg_64RegClassID; 2254 case 3: return AMDGPU::AReg_96RegClassID; 2255 case 4: return AMDGPU::AReg_128RegClassID; 2256 case 5: return AMDGPU::AReg_160RegClassID; 2257 case 6: return AMDGPU::AReg_192RegClassID; 2258 case 7: return AMDGPU::AReg_224RegClassID; 2259 case 8: return AMDGPU::AReg_256RegClassID; 2260 case 16: return AMDGPU::AReg_512RegClassID; 2261 case 32: return AMDGPU::AReg_1024RegClassID; 2262 } 2263 } 2264 return -1; 2265 } 2266 2267 static unsigned getSpecialRegForName(StringRef RegName) { 2268 return StringSwitch<unsigned>(RegName) 2269 .Case("exec", AMDGPU::EXEC) 2270 .Case("vcc", AMDGPU::VCC) 2271 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2272 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2273 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2274 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2275 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2276 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2277 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2278 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2279 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2280 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2281 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2282 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2283 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2284 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2285 .Case("m0", AMDGPU::M0) 2286 .Case("vccz", AMDGPU::SRC_VCCZ) 2287 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2288 .Case("execz", AMDGPU::SRC_EXECZ) 2289 .Case("src_execz", AMDGPU::SRC_EXECZ) 2290 .Case("scc", AMDGPU::SRC_SCC) 2291 .Case("src_scc", AMDGPU::SRC_SCC) 2292 .Case("tba", AMDGPU::TBA) 2293 .Case("tma", AMDGPU::TMA) 2294 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2295 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2296 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2297 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2298 .Case("vcc_lo", AMDGPU::VCC_LO) 2299 .Case("vcc_hi", AMDGPU::VCC_HI) 2300 .Case("exec_lo", AMDGPU::EXEC_LO) 2301 .Case("exec_hi", AMDGPU::EXEC_HI) 2302 .Case("tma_lo", AMDGPU::TMA_LO) 2303 .Case("tma_hi", AMDGPU::TMA_HI) 2304 .Case("tba_lo", AMDGPU::TBA_LO) 2305 .Case("tba_hi", AMDGPU::TBA_HI) 2306 .Case("pc", AMDGPU::PC_REG) 2307 .Case("null", AMDGPU::SGPR_NULL) 2308 .Default(AMDGPU::NoRegister); 2309 } 2310 2311 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2312 SMLoc &EndLoc, bool RestoreOnFailure) { 2313 auto R = parseRegister(); 2314 if (!R) return true; 2315 assert(R->isReg()); 2316 RegNo = R->getReg(); 2317 StartLoc = R->getStartLoc(); 2318 EndLoc = R->getEndLoc(); 2319 return false; 2320 } 2321 2322 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2323 SMLoc &EndLoc) { 2324 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2325 } 2326 2327 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2328 SMLoc &StartLoc, 2329 SMLoc &EndLoc) { 2330 bool Result = 2331 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2332 bool PendingErrors = getParser().hasPendingError(); 2333 getParser().clearPendingErrors(); 2334 if (PendingErrors) 2335 return MatchOperand_ParseFail; 2336 if (Result) 2337 return MatchOperand_NoMatch; 2338 return MatchOperand_Success; 2339 } 2340 2341 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2342 RegisterKind RegKind, unsigned Reg1, 2343 SMLoc Loc) { 2344 switch (RegKind) { 2345 case IS_SPECIAL: 2346 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2347 Reg = AMDGPU::EXEC; 2348 RegWidth = 2; 2349 return true; 2350 } 2351 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2352 Reg = AMDGPU::FLAT_SCR; 2353 RegWidth = 2; 2354 return true; 2355 } 2356 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2357 Reg = AMDGPU::XNACK_MASK; 2358 RegWidth = 2; 2359 return true; 2360 } 2361 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2362 Reg = AMDGPU::VCC; 2363 RegWidth = 2; 2364 return true; 2365 } 2366 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2367 Reg = AMDGPU::TBA; 2368 RegWidth = 2; 2369 return true; 2370 } 2371 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2372 Reg = AMDGPU::TMA; 2373 RegWidth = 2; 2374 return true; 2375 } 2376 Error(Loc, "register does not fit in the list"); 2377 return false; 2378 case IS_VGPR: 2379 case IS_SGPR: 2380 case IS_AGPR: 2381 case IS_TTMP: 2382 if (Reg1 != Reg + RegWidth) { 2383 Error(Loc, "registers in a list must have consecutive indices"); 2384 return false; 2385 } 2386 RegWidth++; 2387 return true; 2388 default: 2389 llvm_unreachable("unexpected register kind"); 2390 } 2391 } 2392 2393 struct RegInfo { 2394 StringLiteral Name; 2395 RegisterKind Kind; 2396 }; 2397 2398 static constexpr RegInfo RegularRegisters[] = { 2399 {{"v"}, IS_VGPR}, 2400 {{"s"}, IS_SGPR}, 2401 {{"ttmp"}, IS_TTMP}, 2402 {{"acc"}, IS_AGPR}, 2403 {{"a"}, IS_AGPR}, 2404 }; 2405 2406 static bool isRegularReg(RegisterKind Kind) { 2407 return Kind == IS_VGPR || 2408 Kind == IS_SGPR || 2409 Kind == IS_TTMP || 2410 Kind == IS_AGPR; 2411 } 2412 2413 static const RegInfo* getRegularRegInfo(StringRef Str) { 2414 for (const RegInfo &Reg : RegularRegisters) 2415 if (Str.startswith(Reg.Name)) 2416 return &Reg; 2417 return nullptr; 2418 } 2419 2420 static bool getRegNum(StringRef Str, unsigned& Num) { 2421 return !Str.getAsInteger(10, Num); 2422 } 2423 2424 bool 2425 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2426 const AsmToken &NextToken) const { 2427 2428 // A list of consecutive registers: [s0,s1,s2,s3] 2429 if (Token.is(AsmToken::LBrac)) 2430 return true; 2431 2432 if (!Token.is(AsmToken::Identifier)) 2433 return false; 2434 2435 // A single register like s0 or a range of registers like s[0:1] 2436 2437 StringRef Str = Token.getString(); 2438 const RegInfo *Reg = getRegularRegInfo(Str); 2439 if (Reg) { 2440 StringRef RegName = Reg->Name; 2441 StringRef RegSuffix = Str.substr(RegName.size()); 2442 if (!RegSuffix.empty()) { 2443 unsigned Num; 2444 // A single register with an index: rXX 2445 if (getRegNum(RegSuffix, Num)) 2446 return true; 2447 } else { 2448 // A range of registers: r[XX:YY]. 2449 if (NextToken.is(AsmToken::LBrac)) 2450 return true; 2451 } 2452 } 2453 2454 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2455 } 2456 2457 bool 2458 AMDGPUAsmParser::isRegister() 2459 { 2460 return isRegister(getToken(), peekToken()); 2461 } 2462 2463 unsigned 2464 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2465 unsigned RegNum, 2466 unsigned RegWidth, 2467 SMLoc Loc) { 2468 2469 assert(isRegularReg(RegKind)); 2470 2471 unsigned AlignSize = 1; 2472 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2473 // SGPR and TTMP registers must be aligned. 2474 // Max required alignment is 4 dwords. 2475 AlignSize = std::min(RegWidth, 4u); 2476 } 2477 2478 if (RegNum % AlignSize != 0) { 2479 Error(Loc, "invalid register alignment"); 2480 return AMDGPU::NoRegister; 2481 } 2482 2483 unsigned RegIdx = RegNum / AlignSize; 2484 int RCID = getRegClass(RegKind, RegWidth); 2485 if (RCID == -1) { 2486 Error(Loc, "invalid or unsupported register size"); 2487 return AMDGPU::NoRegister; 2488 } 2489 2490 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2491 const MCRegisterClass RC = TRI->getRegClass(RCID); 2492 if (RegIdx >= RC.getNumRegs()) { 2493 Error(Loc, "register index is out of range"); 2494 return AMDGPU::NoRegister; 2495 } 2496 2497 return RC.getRegister(RegIdx); 2498 } 2499 2500 bool 2501 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2502 int64_t RegLo, RegHi; 2503 if (!skipToken(AsmToken::LBrac, "missing register index")) 2504 return false; 2505 2506 SMLoc FirstIdxLoc = getLoc(); 2507 SMLoc SecondIdxLoc; 2508 2509 if (!parseExpr(RegLo)) 2510 return false; 2511 2512 if (trySkipToken(AsmToken::Colon)) { 2513 SecondIdxLoc = getLoc(); 2514 if (!parseExpr(RegHi)) 2515 return false; 2516 } else { 2517 RegHi = RegLo; 2518 } 2519 2520 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2521 return false; 2522 2523 if (!isUInt<32>(RegLo)) { 2524 Error(FirstIdxLoc, "invalid register index"); 2525 return false; 2526 } 2527 2528 if (!isUInt<32>(RegHi)) { 2529 Error(SecondIdxLoc, "invalid register index"); 2530 return false; 2531 } 2532 2533 if (RegLo > RegHi) { 2534 Error(FirstIdxLoc, "first register index should not exceed second index"); 2535 return false; 2536 } 2537 2538 Num = static_cast<unsigned>(RegLo); 2539 Width = (RegHi - RegLo) + 1; 2540 return true; 2541 } 2542 2543 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2544 unsigned &RegNum, unsigned &RegWidth, 2545 SmallVectorImpl<AsmToken> &Tokens) { 2546 assert(isToken(AsmToken::Identifier)); 2547 unsigned Reg = getSpecialRegForName(getTokenStr()); 2548 if (Reg) { 2549 RegNum = 0; 2550 RegWidth = 1; 2551 RegKind = IS_SPECIAL; 2552 Tokens.push_back(getToken()); 2553 lex(); // skip register name 2554 } 2555 return Reg; 2556 } 2557 2558 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2559 unsigned &RegNum, unsigned &RegWidth, 2560 SmallVectorImpl<AsmToken> &Tokens) { 2561 assert(isToken(AsmToken::Identifier)); 2562 StringRef RegName = getTokenStr(); 2563 auto Loc = getLoc(); 2564 2565 const RegInfo *RI = getRegularRegInfo(RegName); 2566 if (!RI) { 2567 Error(Loc, "invalid register name"); 2568 return AMDGPU::NoRegister; 2569 } 2570 2571 Tokens.push_back(getToken()); 2572 lex(); // skip register name 2573 2574 RegKind = RI->Kind; 2575 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2576 if (!RegSuffix.empty()) { 2577 // Single 32-bit register: vXX. 2578 if (!getRegNum(RegSuffix, RegNum)) { 2579 Error(Loc, "invalid register index"); 2580 return AMDGPU::NoRegister; 2581 } 2582 RegWidth = 1; 2583 } else { 2584 // Range of registers: v[XX:YY]. ":YY" is optional. 2585 if (!ParseRegRange(RegNum, RegWidth)) 2586 return AMDGPU::NoRegister; 2587 } 2588 2589 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2590 } 2591 2592 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2593 unsigned &RegWidth, 2594 SmallVectorImpl<AsmToken> &Tokens) { 2595 unsigned Reg = AMDGPU::NoRegister; 2596 auto ListLoc = getLoc(); 2597 2598 if (!skipToken(AsmToken::LBrac, 2599 "expected a register or a list of registers")) { 2600 return AMDGPU::NoRegister; 2601 } 2602 2603 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2604 2605 auto Loc = getLoc(); 2606 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2607 return AMDGPU::NoRegister; 2608 if (RegWidth != 1) { 2609 Error(Loc, "expected a single 32-bit register"); 2610 return AMDGPU::NoRegister; 2611 } 2612 2613 for (; trySkipToken(AsmToken::Comma); ) { 2614 RegisterKind NextRegKind; 2615 unsigned NextReg, NextRegNum, NextRegWidth; 2616 Loc = getLoc(); 2617 2618 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2619 NextRegNum, NextRegWidth, 2620 Tokens)) { 2621 return AMDGPU::NoRegister; 2622 } 2623 if (NextRegWidth != 1) { 2624 Error(Loc, "expected a single 32-bit register"); 2625 return AMDGPU::NoRegister; 2626 } 2627 if (NextRegKind != RegKind) { 2628 Error(Loc, "registers in a list must be of the same kind"); 2629 return AMDGPU::NoRegister; 2630 } 2631 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2632 return AMDGPU::NoRegister; 2633 } 2634 2635 if (!skipToken(AsmToken::RBrac, 2636 "expected a comma or a closing square bracket")) { 2637 return AMDGPU::NoRegister; 2638 } 2639 2640 if (isRegularReg(RegKind)) 2641 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2642 2643 return Reg; 2644 } 2645 2646 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2647 unsigned &RegNum, unsigned &RegWidth, 2648 SmallVectorImpl<AsmToken> &Tokens) { 2649 auto Loc = getLoc(); 2650 Reg = AMDGPU::NoRegister; 2651 2652 if (isToken(AsmToken::Identifier)) { 2653 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2654 if (Reg == AMDGPU::NoRegister) 2655 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2656 } else { 2657 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2658 } 2659 2660 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2661 if (Reg == AMDGPU::NoRegister) { 2662 assert(Parser.hasPendingError()); 2663 return false; 2664 } 2665 2666 if (!subtargetHasRegister(*TRI, Reg)) { 2667 if (Reg == AMDGPU::SGPR_NULL) { 2668 Error(Loc, "'null' operand is not supported on this GPU"); 2669 } else { 2670 Error(Loc, "register not available on this GPU"); 2671 } 2672 return false; 2673 } 2674 2675 return true; 2676 } 2677 2678 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2679 unsigned &RegNum, unsigned &RegWidth, 2680 bool RestoreOnFailure /*=false*/) { 2681 Reg = AMDGPU::NoRegister; 2682 2683 SmallVector<AsmToken, 1> Tokens; 2684 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2685 if (RestoreOnFailure) { 2686 while (!Tokens.empty()) { 2687 getLexer().UnLex(Tokens.pop_back_val()); 2688 } 2689 } 2690 return true; 2691 } 2692 return false; 2693 } 2694 2695 Optional<StringRef> 2696 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2697 switch (RegKind) { 2698 case IS_VGPR: 2699 return StringRef(".amdgcn.next_free_vgpr"); 2700 case IS_SGPR: 2701 return StringRef(".amdgcn.next_free_sgpr"); 2702 default: 2703 return None; 2704 } 2705 } 2706 2707 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2708 auto SymbolName = getGprCountSymbolName(RegKind); 2709 assert(SymbolName && "initializing invalid register kind"); 2710 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2711 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2712 } 2713 2714 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2715 unsigned DwordRegIndex, 2716 unsigned RegWidth) { 2717 // Symbols are only defined for GCN targets 2718 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2719 return true; 2720 2721 auto SymbolName = getGprCountSymbolName(RegKind); 2722 if (!SymbolName) 2723 return true; 2724 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2725 2726 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2727 int64_t OldCount; 2728 2729 if (!Sym->isVariable()) 2730 return !Error(getLoc(), 2731 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2732 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2733 return !Error( 2734 getLoc(), 2735 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2736 2737 if (OldCount <= NewMax) 2738 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2739 2740 return true; 2741 } 2742 2743 std::unique_ptr<AMDGPUOperand> 2744 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2745 const auto &Tok = getToken(); 2746 SMLoc StartLoc = Tok.getLoc(); 2747 SMLoc EndLoc = Tok.getEndLoc(); 2748 RegisterKind RegKind; 2749 unsigned Reg, RegNum, RegWidth; 2750 2751 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2752 return nullptr; 2753 } 2754 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2755 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2756 return nullptr; 2757 } else 2758 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2759 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2760 } 2761 2762 OperandMatchResultTy 2763 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2764 // TODO: add syntactic sugar for 1/(2*PI) 2765 2766 assert(!isRegister()); 2767 assert(!isModifier()); 2768 2769 const auto& Tok = getToken(); 2770 const auto& NextTok = peekToken(); 2771 bool IsReal = Tok.is(AsmToken::Real); 2772 SMLoc S = getLoc(); 2773 bool Negate = false; 2774 2775 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2776 lex(); 2777 IsReal = true; 2778 Negate = true; 2779 } 2780 2781 if (IsReal) { 2782 // Floating-point expressions are not supported. 2783 // Can only allow floating-point literals with an 2784 // optional sign. 2785 2786 StringRef Num = getTokenStr(); 2787 lex(); 2788 2789 APFloat RealVal(APFloat::IEEEdouble()); 2790 auto roundMode = APFloat::rmNearestTiesToEven; 2791 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2792 return MatchOperand_ParseFail; 2793 } 2794 if (Negate) 2795 RealVal.changeSign(); 2796 2797 Operands.push_back( 2798 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2799 AMDGPUOperand::ImmTyNone, true)); 2800 2801 return MatchOperand_Success; 2802 2803 } else { 2804 int64_t IntVal; 2805 const MCExpr *Expr; 2806 SMLoc S = getLoc(); 2807 2808 if (HasSP3AbsModifier) { 2809 // This is a workaround for handling expressions 2810 // as arguments of SP3 'abs' modifier, for example: 2811 // |1.0| 2812 // |-1| 2813 // |1+x| 2814 // This syntax is not compatible with syntax of standard 2815 // MC expressions (due to the trailing '|'). 2816 SMLoc EndLoc; 2817 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2818 return MatchOperand_ParseFail; 2819 } else { 2820 if (Parser.parseExpression(Expr)) 2821 return MatchOperand_ParseFail; 2822 } 2823 2824 if (Expr->evaluateAsAbsolute(IntVal)) { 2825 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2826 } else { 2827 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2828 } 2829 2830 return MatchOperand_Success; 2831 } 2832 2833 return MatchOperand_NoMatch; 2834 } 2835 2836 OperandMatchResultTy 2837 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2838 if (!isRegister()) 2839 return MatchOperand_NoMatch; 2840 2841 if (auto R = parseRegister()) { 2842 assert(R->isReg()); 2843 Operands.push_back(std::move(R)); 2844 return MatchOperand_Success; 2845 } 2846 return MatchOperand_ParseFail; 2847 } 2848 2849 OperandMatchResultTy 2850 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2851 auto res = parseReg(Operands); 2852 if (res != MatchOperand_NoMatch) { 2853 return res; 2854 } else if (isModifier()) { 2855 return MatchOperand_NoMatch; 2856 } else { 2857 return parseImm(Operands, HasSP3AbsMod); 2858 } 2859 } 2860 2861 bool 2862 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2863 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2864 const auto &str = Token.getString(); 2865 return str == "abs" || str == "neg" || str == "sext"; 2866 } 2867 return false; 2868 } 2869 2870 bool 2871 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2872 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2873 } 2874 2875 bool 2876 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2877 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2878 } 2879 2880 bool 2881 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2882 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2883 } 2884 2885 // Check if this is an operand modifier or an opcode modifier 2886 // which may look like an expression but it is not. We should 2887 // avoid parsing these modifiers as expressions. Currently 2888 // recognized sequences are: 2889 // |...| 2890 // abs(...) 2891 // neg(...) 2892 // sext(...) 2893 // -reg 2894 // -|...| 2895 // -abs(...) 2896 // name:... 2897 // Note that simple opcode modifiers like 'gds' may be parsed as 2898 // expressions; this is a special case. See getExpressionAsToken. 2899 // 2900 bool 2901 AMDGPUAsmParser::isModifier() { 2902 2903 AsmToken Tok = getToken(); 2904 AsmToken NextToken[2]; 2905 peekTokens(NextToken); 2906 2907 return isOperandModifier(Tok, NextToken[0]) || 2908 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2909 isOpcodeModifierWithVal(Tok, NextToken[0]); 2910 } 2911 2912 // Check if the current token is an SP3 'neg' modifier. 2913 // Currently this modifier is allowed in the following context: 2914 // 2915 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2916 // 2. Before an 'abs' modifier: -abs(...) 2917 // 3. Before an SP3 'abs' modifier: -|...| 2918 // 2919 // In all other cases "-" is handled as a part 2920 // of an expression that follows the sign. 2921 // 2922 // Note: When "-" is followed by an integer literal, 2923 // this is interpreted as integer negation rather 2924 // than a floating-point NEG modifier applied to N. 2925 // Beside being contr-intuitive, such use of floating-point 2926 // NEG modifier would have resulted in different meaning 2927 // of integer literals used with VOP1/2/C and VOP3, 2928 // for example: 2929 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2930 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2931 // Negative fp literals with preceding "-" are 2932 // handled likewise for unifomtity 2933 // 2934 bool 2935 AMDGPUAsmParser::parseSP3NegModifier() { 2936 2937 AsmToken NextToken[2]; 2938 peekTokens(NextToken); 2939 2940 if (isToken(AsmToken::Minus) && 2941 (isRegister(NextToken[0], NextToken[1]) || 2942 NextToken[0].is(AsmToken::Pipe) || 2943 isId(NextToken[0], "abs"))) { 2944 lex(); 2945 return true; 2946 } 2947 2948 return false; 2949 } 2950 2951 OperandMatchResultTy 2952 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2953 bool AllowImm) { 2954 bool Neg, SP3Neg; 2955 bool Abs, SP3Abs; 2956 SMLoc Loc; 2957 2958 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2959 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2960 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2961 return MatchOperand_ParseFail; 2962 } 2963 2964 SP3Neg = parseSP3NegModifier(); 2965 2966 Loc = getLoc(); 2967 Neg = trySkipId("neg"); 2968 if (Neg && SP3Neg) { 2969 Error(Loc, "expected register or immediate"); 2970 return MatchOperand_ParseFail; 2971 } 2972 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2973 return MatchOperand_ParseFail; 2974 2975 Abs = trySkipId("abs"); 2976 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2977 return MatchOperand_ParseFail; 2978 2979 Loc = getLoc(); 2980 SP3Abs = trySkipToken(AsmToken::Pipe); 2981 if (Abs && SP3Abs) { 2982 Error(Loc, "expected register or immediate"); 2983 return MatchOperand_ParseFail; 2984 } 2985 2986 OperandMatchResultTy Res; 2987 if (AllowImm) { 2988 Res = parseRegOrImm(Operands, SP3Abs); 2989 } else { 2990 Res = parseReg(Operands); 2991 } 2992 if (Res != MatchOperand_Success) { 2993 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2994 } 2995 2996 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2997 return MatchOperand_ParseFail; 2998 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2999 return MatchOperand_ParseFail; 3000 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3001 return MatchOperand_ParseFail; 3002 3003 AMDGPUOperand::Modifiers Mods; 3004 Mods.Abs = Abs || SP3Abs; 3005 Mods.Neg = Neg || SP3Neg; 3006 3007 if (Mods.hasFPModifiers()) { 3008 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3009 if (Op.isExpr()) { 3010 Error(Op.getStartLoc(), "expected an absolute expression"); 3011 return MatchOperand_ParseFail; 3012 } 3013 Op.setModifiers(Mods); 3014 } 3015 return MatchOperand_Success; 3016 } 3017 3018 OperandMatchResultTy 3019 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3020 bool AllowImm) { 3021 bool Sext = trySkipId("sext"); 3022 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3023 return MatchOperand_ParseFail; 3024 3025 OperandMatchResultTy Res; 3026 if (AllowImm) { 3027 Res = parseRegOrImm(Operands); 3028 } else { 3029 Res = parseReg(Operands); 3030 } 3031 if (Res != MatchOperand_Success) { 3032 return Sext? MatchOperand_ParseFail : Res; 3033 } 3034 3035 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3036 return MatchOperand_ParseFail; 3037 3038 AMDGPUOperand::Modifiers Mods; 3039 Mods.Sext = Sext; 3040 3041 if (Mods.hasIntModifiers()) { 3042 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3043 if (Op.isExpr()) { 3044 Error(Op.getStartLoc(), "expected an absolute expression"); 3045 return MatchOperand_ParseFail; 3046 } 3047 Op.setModifiers(Mods); 3048 } 3049 3050 return MatchOperand_Success; 3051 } 3052 3053 OperandMatchResultTy 3054 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3055 return parseRegOrImmWithFPInputMods(Operands, false); 3056 } 3057 3058 OperandMatchResultTy 3059 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3060 return parseRegOrImmWithIntInputMods(Operands, false); 3061 } 3062 3063 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3064 auto Loc = getLoc(); 3065 if (trySkipId("off")) { 3066 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3067 AMDGPUOperand::ImmTyOff, false)); 3068 return MatchOperand_Success; 3069 } 3070 3071 if (!isRegister()) 3072 return MatchOperand_NoMatch; 3073 3074 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3075 if (Reg) { 3076 Operands.push_back(std::move(Reg)); 3077 return MatchOperand_Success; 3078 } 3079 3080 return MatchOperand_ParseFail; 3081 3082 } 3083 3084 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3085 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3086 3087 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3088 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3089 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3090 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3091 return Match_InvalidOperand; 3092 3093 if ((TSFlags & SIInstrFlags::VOP3) && 3094 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3095 getForcedEncodingSize() != 64) 3096 return Match_PreferE32; 3097 3098 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3099 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3100 // v_mac_f32/16 allow only dst_sel == DWORD; 3101 auto OpNum = 3102 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3103 const auto &Op = Inst.getOperand(OpNum); 3104 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3105 return Match_InvalidOperand; 3106 } 3107 } 3108 3109 return Match_Success; 3110 } 3111 3112 static ArrayRef<unsigned> getAllVariants() { 3113 static const unsigned Variants[] = { 3114 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3115 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3116 }; 3117 3118 return makeArrayRef(Variants); 3119 } 3120 3121 // What asm variants we should check 3122 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3123 if (getForcedEncodingSize() == 32) { 3124 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3125 return makeArrayRef(Variants); 3126 } 3127 3128 if (isForcedVOP3()) { 3129 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3130 return makeArrayRef(Variants); 3131 } 3132 3133 if (isForcedSDWA()) { 3134 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3135 AMDGPUAsmVariants::SDWA9}; 3136 return makeArrayRef(Variants); 3137 } 3138 3139 if (isForcedDPP()) { 3140 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3141 return makeArrayRef(Variants); 3142 } 3143 3144 return getAllVariants(); 3145 } 3146 3147 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3148 if (getForcedEncodingSize() == 32) 3149 return "e32"; 3150 3151 if (isForcedVOP3()) 3152 return "e64"; 3153 3154 if (isForcedSDWA()) 3155 return "sdwa"; 3156 3157 if (isForcedDPP()) 3158 return "dpp"; 3159 3160 return ""; 3161 } 3162 3163 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3164 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3165 const unsigned Num = Desc.getNumImplicitUses(); 3166 for (unsigned i = 0; i < Num; ++i) { 3167 unsigned Reg = Desc.ImplicitUses[i]; 3168 switch (Reg) { 3169 case AMDGPU::FLAT_SCR: 3170 case AMDGPU::VCC: 3171 case AMDGPU::VCC_LO: 3172 case AMDGPU::VCC_HI: 3173 case AMDGPU::M0: 3174 return Reg; 3175 default: 3176 break; 3177 } 3178 } 3179 return AMDGPU::NoRegister; 3180 } 3181 3182 // NB: This code is correct only when used to check constant 3183 // bus limitations because GFX7 support no f16 inline constants. 3184 // Note that there are no cases when a GFX7 opcode violates 3185 // constant bus limitations due to the use of an f16 constant. 3186 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3187 unsigned OpIdx) const { 3188 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3189 3190 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3191 return false; 3192 } 3193 3194 const MCOperand &MO = Inst.getOperand(OpIdx); 3195 3196 int64_t Val = MO.getImm(); 3197 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3198 3199 switch (OpSize) { // expected operand size 3200 case 8: 3201 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3202 case 4: 3203 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3204 case 2: { 3205 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3206 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3207 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3208 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3209 return AMDGPU::isInlinableIntLiteral(Val); 3210 3211 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3212 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3213 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3214 return AMDGPU::isInlinableIntLiteralV216(Val); 3215 3216 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3217 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3218 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3219 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3220 3221 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3222 } 3223 default: 3224 llvm_unreachable("invalid operand size"); 3225 } 3226 } 3227 3228 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3229 if (!isGFX10Plus()) 3230 return 1; 3231 3232 switch (Opcode) { 3233 // 64-bit shift instructions can use only one scalar value input 3234 case AMDGPU::V_LSHLREV_B64_e64: 3235 case AMDGPU::V_LSHLREV_B64_gfx10: 3236 case AMDGPU::V_LSHRREV_B64_e64: 3237 case AMDGPU::V_LSHRREV_B64_gfx10: 3238 case AMDGPU::V_ASHRREV_I64_e64: 3239 case AMDGPU::V_ASHRREV_I64_gfx10: 3240 case AMDGPU::V_LSHL_B64_e64: 3241 case AMDGPU::V_LSHR_B64_e64: 3242 case AMDGPU::V_ASHR_I64_e64: 3243 return 1; 3244 default: 3245 return 2; 3246 } 3247 } 3248 3249 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3250 const MCOperand &MO = Inst.getOperand(OpIdx); 3251 if (MO.isImm()) { 3252 return !isInlineConstant(Inst, OpIdx); 3253 } else if (MO.isReg()) { 3254 auto Reg = MO.getReg(); 3255 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3256 auto PReg = mc2PseudoReg(Reg); 3257 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3258 } else { 3259 return true; 3260 } 3261 } 3262 3263 bool 3264 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3265 const OperandVector &Operands) { 3266 const unsigned Opcode = Inst.getOpcode(); 3267 const MCInstrDesc &Desc = MII.get(Opcode); 3268 unsigned LastSGPR = AMDGPU::NoRegister; 3269 unsigned ConstantBusUseCount = 0; 3270 unsigned NumLiterals = 0; 3271 unsigned LiteralSize; 3272 3273 if (Desc.TSFlags & 3274 (SIInstrFlags::VOPC | 3275 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3276 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3277 SIInstrFlags::SDWA)) { 3278 // Check special imm operands (used by madmk, etc) 3279 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3280 ++NumLiterals; 3281 LiteralSize = 4; 3282 } 3283 3284 SmallDenseSet<unsigned> SGPRsUsed; 3285 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3286 if (SGPRUsed != AMDGPU::NoRegister) { 3287 SGPRsUsed.insert(SGPRUsed); 3288 ++ConstantBusUseCount; 3289 } 3290 3291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3294 3295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3296 3297 for (int OpIdx : OpIndices) { 3298 if (OpIdx == -1) break; 3299 3300 const MCOperand &MO = Inst.getOperand(OpIdx); 3301 if (usesConstantBus(Inst, OpIdx)) { 3302 if (MO.isReg()) { 3303 LastSGPR = mc2PseudoReg(MO.getReg()); 3304 // Pairs of registers with a partial intersections like these 3305 // s0, s[0:1] 3306 // flat_scratch_lo, flat_scratch 3307 // flat_scratch_lo, flat_scratch_hi 3308 // are theoretically valid but they are disabled anyway. 3309 // Note that this code mimics SIInstrInfo::verifyInstruction 3310 if (!SGPRsUsed.count(LastSGPR)) { 3311 SGPRsUsed.insert(LastSGPR); 3312 ++ConstantBusUseCount; 3313 } 3314 } else { // Expression or a literal 3315 3316 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3317 continue; // special operand like VINTERP attr_chan 3318 3319 // An instruction may use only one literal. 3320 // This has been validated on the previous step. 3321 // See validateVOPLiteral. 3322 // This literal may be used as more than one operand. 3323 // If all these operands are of the same size, 3324 // this literal counts as one scalar value. 3325 // Otherwise it counts as 2 scalar values. 3326 // See "GFX10 Shader Programming", section 3.6.2.3. 3327 3328 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3329 if (Size < 4) Size = 4; 3330 3331 if (NumLiterals == 0) { 3332 NumLiterals = 1; 3333 LiteralSize = Size; 3334 } else if (LiteralSize != Size) { 3335 NumLiterals = 2; 3336 } 3337 } 3338 } 3339 } 3340 } 3341 ConstantBusUseCount += NumLiterals; 3342 3343 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3344 return true; 3345 3346 SMLoc LitLoc = getLitLoc(Operands); 3347 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3348 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3349 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3350 return false; 3351 } 3352 3353 bool 3354 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3355 const OperandVector &Operands) { 3356 const unsigned Opcode = Inst.getOpcode(); 3357 const MCInstrDesc &Desc = MII.get(Opcode); 3358 3359 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3360 if (DstIdx == -1 || 3361 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3362 return true; 3363 } 3364 3365 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3366 3367 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3368 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3369 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3370 3371 assert(DstIdx != -1); 3372 const MCOperand &Dst = Inst.getOperand(DstIdx); 3373 assert(Dst.isReg()); 3374 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3375 3376 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3377 3378 for (int SrcIdx : SrcIndices) { 3379 if (SrcIdx == -1) break; 3380 const MCOperand &Src = Inst.getOperand(SrcIdx); 3381 if (Src.isReg()) { 3382 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3383 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3384 Error(getRegLoc(SrcReg, Operands), 3385 "destination must be different than all sources"); 3386 return false; 3387 } 3388 } 3389 } 3390 3391 return true; 3392 } 3393 3394 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3395 3396 const unsigned Opc = Inst.getOpcode(); 3397 const MCInstrDesc &Desc = MII.get(Opc); 3398 3399 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3400 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3401 assert(ClampIdx != -1); 3402 return Inst.getOperand(ClampIdx).getImm() == 0; 3403 } 3404 3405 return true; 3406 } 3407 3408 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3409 3410 const unsigned Opc = Inst.getOpcode(); 3411 const MCInstrDesc &Desc = MII.get(Opc); 3412 3413 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3414 return true; 3415 3416 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3417 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3418 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3419 3420 assert(VDataIdx != -1); 3421 3422 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3423 return true; 3424 3425 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3426 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3427 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3428 if (DMask == 0) 3429 DMask = 1; 3430 3431 unsigned DataSize = 3432 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3433 if (hasPackedD16()) { 3434 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3435 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3436 DataSize = (DataSize + 1) / 2; 3437 } 3438 3439 return (VDataSize / 4) == DataSize + TFESize; 3440 } 3441 3442 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3443 const unsigned Opc = Inst.getOpcode(); 3444 const MCInstrDesc &Desc = MII.get(Opc); 3445 3446 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3447 return true; 3448 3449 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3450 3451 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3452 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3453 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3454 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3455 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3456 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3457 3458 assert(VAddr0Idx != -1); 3459 assert(SrsrcIdx != -1); 3460 assert(SrsrcIdx > VAddr0Idx); 3461 3462 if (DimIdx == -1) 3463 return true; // intersect_ray 3464 3465 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3466 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3467 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3468 unsigned ActualAddrSize = 3469 IsNSA ? SrsrcIdx - VAddr0Idx 3470 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3471 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3472 3473 unsigned ExpectedAddrSize = 3474 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3475 3476 if (!IsNSA) { 3477 if (ExpectedAddrSize > 8) 3478 ExpectedAddrSize = 16; 3479 3480 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3481 // This provides backward compatibility for assembly created 3482 // before 160b/192b/224b types were directly supported. 3483 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3484 return true; 3485 } 3486 3487 return ActualAddrSize == ExpectedAddrSize; 3488 } 3489 3490 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3491 3492 const unsigned Opc = Inst.getOpcode(); 3493 const MCInstrDesc &Desc = MII.get(Opc); 3494 3495 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3496 return true; 3497 if (!Desc.mayLoad() || !Desc.mayStore()) 3498 return true; // Not atomic 3499 3500 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3501 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3502 3503 // This is an incomplete check because image_atomic_cmpswap 3504 // may only use 0x3 and 0xf while other atomic operations 3505 // may use 0x1 and 0x3. However these limitations are 3506 // verified when we check that dmask matches dst size. 3507 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3508 } 3509 3510 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3511 3512 const unsigned Opc = Inst.getOpcode(); 3513 const MCInstrDesc &Desc = MII.get(Opc); 3514 3515 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3516 return true; 3517 3518 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3519 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3520 3521 // GATHER4 instructions use dmask in a different fashion compared to 3522 // other MIMG instructions. The only useful DMASK values are 3523 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3524 // (red,red,red,red) etc.) The ISA document doesn't mention 3525 // this. 3526 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3527 } 3528 3529 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3530 const unsigned Opc = Inst.getOpcode(); 3531 const MCInstrDesc &Desc = MII.get(Opc); 3532 3533 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3534 return true; 3535 3536 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3537 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3538 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3539 3540 if (!BaseOpcode->MSAA) 3541 return true; 3542 3543 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3544 assert(DimIdx != -1); 3545 3546 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3547 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3548 3549 return DimInfo->MSAA; 3550 } 3551 3552 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3553 { 3554 switch (Opcode) { 3555 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3556 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3557 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3558 return true; 3559 default: 3560 return false; 3561 } 3562 } 3563 3564 // movrels* opcodes should only allow VGPRS as src0. 3565 // This is specified in .td description for vop1/vop3, 3566 // but sdwa is handled differently. See isSDWAOperand. 3567 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3568 const OperandVector &Operands) { 3569 3570 const unsigned Opc = Inst.getOpcode(); 3571 const MCInstrDesc &Desc = MII.get(Opc); 3572 3573 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3574 return true; 3575 3576 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3577 assert(Src0Idx != -1); 3578 3579 SMLoc ErrLoc; 3580 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3581 if (Src0.isReg()) { 3582 auto Reg = mc2PseudoReg(Src0.getReg()); 3583 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3584 if (!isSGPR(Reg, TRI)) 3585 return true; 3586 ErrLoc = getRegLoc(Reg, Operands); 3587 } else { 3588 ErrLoc = getConstLoc(Operands); 3589 } 3590 3591 Error(ErrLoc, "source operand must be a VGPR"); 3592 return false; 3593 } 3594 3595 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3596 const OperandVector &Operands) { 3597 3598 const unsigned Opc = Inst.getOpcode(); 3599 3600 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3601 return true; 3602 3603 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3604 assert(Src0Idx != -1); 3605 3606 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3607 if (!Src0.isReg()) 3608 return true; 3609 3610 auto Reg = mc2PseudoReg(Src0.getReg()); 3611 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3612 if (isSGPR(Reg, TRI)) { 3613 Error(getRegLoc(Reg, Operands), 3614 "source operand must be either a VGPR or an inline constant"); 3615 return false; 3616 } 3617 3618 return true; 3619 } 3620 3621 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3622 const OperandVector &Operands) { 3623 const unsigned Opc = Inst.getOpcode(); 3624 const MCInstrDesc &Desc = MII.get(Opc); 3625 3626 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3627 return true; 3628 3629 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3630 if (Src2Idx == -1) 3631 return true; 3632 3633 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3634 if (!Src2.isReg()) 3635 return true; 3636 3637 MCRegister Src2Reg = Src2.getReg(); 3638 MCRegister DstReg = Inst.getOperand(0).getReg(); 3639 if (Src2Reg == DstReg) 3640 return true; 3641 3642 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3643 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3644 return true; 3645 3646 if (isRegIntersect(Src2Reg, DstReg, TRI)) { 3647 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3648 "source 2 operand must not partially overlap with dst"); 3649 return false; 3650 } 3651 3652 return true; 3653 } 3654 3655 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3656 switch (Inst.getOpcode()) { 3657 default: 3658 return true; 3659 case V_DIV_SCALE_F32_gfx6_gfx7: 3660 case V_DIV_SCALE_F32_vi: 3661 case V_DIV_SCALE_F32_gfx10: 3662 case V_DIV_SCALE_F64_gfx6_gfx7: 3663 case V_DIV_SCALE_F64_vi: 3664 case V_DIV_SCALE_F64_gfx10: 3665 break; 3666 } 3667 3668 // TODO: Check that src0 = src1 or src2. 3669 3670 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3671 AMDGPU::OpName::src2_modifiers, 3672 AMDGPU::OpName::src2_modifiers}) { 3673 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3674 .getImm() & 3675 SISrcMods::ABS) { 3676 return false; 3677 } 3678 } 3679 3680 return true; 3681 } 3682 3683 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3684 3685 const unsigned Opc = Inst.getOpcode(); 3686 const MCInstrDesc &Desc = MII.get(Opc); 3687 3688 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3689 return true; 3690 3691 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3692 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3693 if (isCI() || isSI()) 3694 return false; 3695 } 3696 3697 return true; 3698 } 3699 3700 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3701 const unsigned Opc = Inst.getOpcode(); 3702 const MCInstrDesc &Desc = MII.get(Opc); 3703 3704 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3705 return true; 3706 3707 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3708 if (DimIdx < 0) 3709 return true; 3710 3711 long Imm = Inst.getOperand(DimIdx).getImm(); 3712 if (Imm < 0 || Imm >= 8) 3713 return false; 3714 3715 return true; 3716 } 3717 3718 static bool IsRevOpcode(const unsigned Opcode) 3719 { 3720 switch (Opcode) { 3721 case AMDGPU::V_SUBREV_F32_e32: 3722 case AMDGPU::V_SUBREV_F32_e64: 3723 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3724 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3725 case AMDGPU::V_SUBREV_F32_e32_vi: 3726 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3727 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3728 case AMDGPU::V_SUBREV_F32_e64_vi: 3729 3730 case AMDGPU::V_SUBREV_CO_U32_e32: 3731 case AMDGPU::V_SUBREV_CO_U32_e64: 3732 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3733 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3734 3735 case AMDGPU::V_SUBBREV_U32_e32: 3736 case AMDGPU::V_SUBBREV_U32_e64: 3737 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3738 case AMDGPU::V_SUBBREV_U32_e32_vi: 3739 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3740 case AMDGPU::V_SUBBREV_U32_e64_vi: 3741 3742 case AMDGPU::V_SUBREV_U32_e32: 3743 case AMDGPU::V_SUBREV_U32_e64: 3744 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3745 case AMDGPU::V_SUBREV_U32_e32_vi: 3746 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3747 case AMDGPU::V_SUBREV_U32_e64_vi: 3748 3749 case AMDGPU::V_SUBREV_F16_e32: 3750 case AMDGPU::V_SUBREV_F16_e64: 3751 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3752 case AMDGPU::V_SUBREV_F16_e32_vi: 3753 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3754 case AMDGPU::V_SUBREV_F16_e64_vi: 3755 3756 case AMDGPU::V_SUBREV_U16_e32: 3757 case AMDGPU::V_SUBREV_U16_e64: 3758 case AMDGPU::V_SUBREV_U16_e32_vi: 3759 case AMDGPU::V_SUBREV_U16_e64_vi: 3760 3761 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3762 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3763 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3764 3765 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3766 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3767 3768 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3769 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3770 3771 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3772 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3773 3774 case AMDGPU::V_LSHRREV_B32_e32: 3775 case AMDGPU::V_LSHRREV_B32_e64: 3776 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3777 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3778 case AMDGPU::V_LSHRREV_B32_e32_vi: 3779 case AMDGPU::V_LSHRREV_B32_e64_vi: 3780 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3781 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3782 3783 case AMDGPU::V_ASHRREV_I32_e32: 3784 case AMDGPU::V_ASHRREV_I32_e64: 3785 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3786 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3787 case AMDGPU::V_ASHRREV_I32_e32_vi: 3788 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3789 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3790 case AMDGPU::V_ASHRREV_I32_e64_vi: 3791 3792 case AMDGPU::V_LSHLREV_B32_e32: 3793 case AMDGPU::V_LSHLREV_B32_e64: 3794 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3795 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3796 case AMDGPU::V_LSHLREV_B32_e32_vi: 3797 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3798 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3799 case AMDGPU::V_LSHLREV_B32_e64_vi: 3800 3801 case AMDGPU::V_LSHLREV_B16_e32: 3802 case AMDGPU::V_LSHLREV_B16_e64: 3803 case AMDGPU::V_LSHLREV_B16_e32_vi: 3804 case AMDGPU::V_LSHLREV_B16_e64_vi: 3805 case AMDGPU::V_LSHLREV_B16_gfx10: 3806 3807 case AMDGPU::V_LSHRREV_B16_e32: 3808 case AMDGPU::V_LSHRREV_B16_e64: 3809 case AMDGPU::V_LSHRREV_B16_e32_vi: 3810 case AMDGPU::V_LSHRREV_B16_e64_vi: 3811 case AMDGPU::V_LSHRREV_B16_gfx10: 3812 3813 case AMDGPU::V_ASHRREV_I16_e32: 3814 case AMDGPU::V_ASHRREV_I16_e64: 3815 case AMDGPU::V_ASHRREV_I16_e32_vi: 3816 case AMDGPU::V_ASHRREV_I16_e64_vi: 3817 case AMDGPU::V_ASHRREV_I16_gfx10: 3818 3819 case AMDGPU::V_LSHLREV_B64_e64: 3820 case AMDGPU::V_LSHLREV_B64_gfx10: 3821 case AMDGPU::V_LSHLREV_B64_vi: 3822 3823 case AMDGPU::V_LSHRREV_B64_e64: 3824 case AMDGPU::V_LSHRREV_B64_gfx10: 3825 case AMDGPU::V_LSHRREV_B64_vi: 3826 3827 case AMDGPU::V_ASHRREV_I64_e64: 3828 case AMDGPU::V_ASHRREV_I64_gfx10: 3829 case AMDGPU::V_ASHRREV_I64_vi: 3830 3831 case AMDGPU::V_PK_LSHLREV_B16: 3832 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3833 case AMDGPU::V_PK_LSHLREV_B16_vi: 3834 3835 case AMDGPU::V_PK_LSHRREV_B16: 3836 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3837 case AMDGPU::V_PK_LSHRREV_B16_vi: 3838 case AMDGPU::V_PK_ASHRREV_I16: 3839 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3840 case AMDGPU::V_PK_ASHRREV_I16_vi: 3841 return true; 3842 default: 3843 return false; 3844 } 3845 } 3846 3847 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3848 3849 using namespace SIInstrFlags; 3850 const unsigned Opcode = Inst.getOpcode(); 3851 const MCInstrDesc &Desc = MII.get(Opcode); 3852 3853 // lds_direct register is defined so that it can be used 3854 // with 9-bit operands only. Ignore encodings which do not accept these. 3855 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3856 if ((Desc.TSFlags & Enc) == 0) 3857 return None; 3858 3859 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3860 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3861 if (SrcIdx == -1) 3862 break; 3863 const auto &Src = Inst.getOperand(SrcIdx); 3864 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3865 3866 if (isGFX90A()) 3867 return StringRef("lds_direct is not supported on this GPU"); 3868 3869 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3870 return StringRef("lds_direct cannot be used with this instruction"); 3871 3872 if (SrcName != OpName::src0) 3873 return StringRef("lds_direct may be used as src0 only"); 3874 } 3875 } 3876 3877 return None; 3878 } 3879 3880 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3881 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3882 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3883 if (Op.isFlatOffset()) 3884 return Op.getStartLoc(); 3885 } 3886 return getLoc(); 3887 } 3888 3889 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3890 const OperandVector &Operands) { 3891 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3892 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3893 return true; 3894 3895 auto Opcode = Inst.getOpcode(); 3896 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3897 assert(OpNum != -1); 3898 3899 const auto &Op = Inst.getOperand(OpNum); 3900 if (!hasFlatOffsets() && Op.getImm() != 0) { 3901 Error(getFlatOffsetLoc(Operands), 3902 "flat offset modifier is not supported on this GPU"); 3903 return false; 3904 } 3905 3906 // For FLAT segment the offset must be positive; 3907 // MSB is ignored and forced to zero. 3908 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3909 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3910 if (!isIntN(OffsetSize, Op.getImm())) { 3911 Error(getFlatOffsetLoc(Operands), 3912 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3913 return false; 3914 } 3915 } else { 3916 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3917 if (!isUIntN(OffsetSize, Op.getImm())) { 3918 Error(getFlatOffsetLoc(Operands), 3919 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3920 return false; 3921 } 3922 } 3923 3924 return true; 3925 } 3926 3927 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3928 // Start with second operand because SMEM Offset cannot be dst or src0. 3929 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3930 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3931 if (Op.isSMEMOffset()) 3932 return Op.getStartLoc(); 3933 } 3934 return getLoc(); 3935 } 3936 3937 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3938 const OperandVector &Operands) { 3939 if (isCI() || isSI()) 3940 return true; 3941 3942 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3943 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3944 return true; 3945 3946 auto Opcode = Inst.getOpcode(); 3947 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3948 if (OpNum == -1) 3949 return true; 3950 3951 const auto &Op = Inst.getOperand(OpNum); 3952 if (!Op.isImm()) 3953 return true; 3954 3955 uint64_t Offset = Op.getImm(); 3956 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3957 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3958 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3959 return true; 3960 3961 Error(getSMEMOffsetLoc(Operands), 3962 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3963 "expected a 21-bit signed offset"); 3964 3965 return false; 3966 } 3967 3968 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3969 unsigned Opcode = Inst.getOpcode(); 3970 const MCInstrDesc &Desc = MII.get(Opcode); 3971 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3972 return true; 3973 3974 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3975 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3976 3977 const int OpIndices[] = { Src0Idx, Src1Idx }; 3978 3979 unsigned NumExprs = 0; 3980 unsigned NumLiterals = 0; 3981 uint32_t LiteralValue; 3982 3983 for (int OpIdx : OpIndices) { 3984 if (OpIdx == -1) break; 3985 3986 const MCOperand &MO = Inst.getOperand(OpIdx); 3987 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3988 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3989 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3990 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3991 if (NumLiterals == 0 || LiteralValue != Value) { 3992 LiteralValue = Value; 3993 ++NumLiterals; 3994 } 3995 } else if (MO.isExpr()) { 3996 ++NumExprs; 3997 } 3998 } 3999 } 4000 4001 return NumLiterals + NumExprs <= 1; 4002 } 4003 4004 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4005 const unsigned Opc = Inst.getOpcode(); 4006 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4007 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4008 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4009 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4010 4011 if (OpSel & ~3) 4012 return false; 4013 } 4014 return true; 4015 } 4016 4017 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4018 const OperandVector &Operands) { 4019 const unsigned Opc = Inst.getOpcode(); 4020 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4021 if (DppCtrlIdx < 0) 4022 return true; 4023 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4024 4025 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4026 // DPP64 is supported for row_newbcast only. 4027 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4028 if (Src0Idx >= 0 && 4029 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4030 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4031 Error(S, "64 bit dpp only supports row_newbcast"); 4032 return false; 4033 } 4034 } 4035 4036 return true; 4037 } 4038 4039 // Check if VCC register matches wavefront size 4040 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4041 auto FB = getFeatureBits(); 4042 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4043 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4044 } 4045 4046 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4047 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4048 const OperandVector &Operands) { 4049 unsigned Opcode = Inst.getOpcode(); 4050 const MCInstrDesc &Desc = MII.get(Opcode); 4051 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4052 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4053 ImmIdx == -1) 4054 return true; 4055 4056 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4057 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4058 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4059 4060 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4061 4062 unsigned NumExprs = 0; 4063 unsigned NumLiterals = 0; 4064 uint32_t LiteralValue; 4065 4066 for (int OpIdx : OpIndices) { 4067 if (OpIdx == -1) 4068 continue; 4069 4070 const MCOperand &MO = Inst.getOperand(OpIdx); 4071 if (!MO.isImm() && !MO.isExpr()) 4072 continue; 4073 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4074 continue; 4075 4076 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4077 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4078 Error(getConstLoc(Operands), 4079 "inline constants are not allowed for this operand"); 4080 return false; 4081 } 4082 4083 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4084 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4085 if (NumLiterals == 0 || LiteralValue != Value) { 4086 LiteralValue = Value; 4087 ++NumLiterals; 4088 } 4089 } else if (MO.isExpr()) { 4090 ++NumExprs; 4091 } 4092 } 4093 NumLiterals += NumExprs; 4094 4095 if (!NumLiterals) 4096 return true; 4097 4098 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4099 Error(getLitLoc(Operands), "literal operands are not supported"); 4100 return false; 4101 } 4102 4103 if (NumLiterals > 1) { 4104 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4105 return false; 4106 } 4107 4108 return true; 4109 } 4110 4111 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4112 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4113 const MCRegisterInfo *MRI) { 4114 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4115 if (OpIdx < 0) 4116 return -1; 4117 4118 const MCOperand &Op = Inst.getOperand(OpIdx); 4119 if (!Op.isReg()) 4120 return -1; 4121 4122 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4123 auto Reg = Sub ? Sub : Op.getReg(); 4124 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4125 return AGPR32.contains(Reg) ? 1 : 0; 4126 } 4127 4128 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4129 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4130 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4131 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4132 SIInstrFlags::DS)) == 0) 4133 return true; 4134 4135 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4136 : AMDGPU::OpName::vdata; 4137 4138 const MCRegisterInfo *MRI = getMRI(); 4139 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4140 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4141 4142 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4143 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4144 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4145 return false; 4146 } 4147 4148 auto FB = getFeatureBits(); 4149 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4150 if (DataAreg < 0 || DstAreg < 0) 4151 return true; 4152 return DstAreg == DataAreg; 4153 } 4154 4155 return DstAreg < 1 && DataAreg < 1; 4156 } 4157 4158 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4159 auto FB = getFeatureBits(); 4160 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4161 return true; 4162 4163 const MCRegisterInfo *MRI = getMRI(); 4164 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4165 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4166 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4167 const MCOperand &Op = Inst.getOperand(I); 4168 if (!Op.isReg()) 4169 continue; 4170 4171 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4172 if (!Sub) 4173 continue; 4174 4175 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4176 return false; 4177 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4178 return false; 4179 } 4180 4181 return true; 4182 } 4183 4184 // gfx90a has an undocumented limitation: 4185 // DS_GWS opcodes must use even aligned registers. 4186 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4187 const OperandVector &Operands) { 4188 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4189 return true; 4190 4191 int Opc = Inst.getOpcode(); 4192 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4193 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4194 return true; 4195 4196 const MCRegisterInfo *MRI = getMRI(); 4197 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4198 int Data0Pos = 4199 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4200 assert(Data0Pos != -1); 4201 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4202 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4203 if (RegIdx & 1) { 4204 SMLoc RegLoc = getRegLoc(Reg, Operands); 4205 Error(RegLoc, "vgpr must be even aligned"); 4206 return false; 4207 } 4208 4209 return true; 4210 } 4211 4212 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4213 const OperandVector &Operands, 4214 const SMLoc &IDLoc) { 4215 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4216 AMDGPU::OpName::cpol); 4217 if (CPolPos == -1) 4218 return true; 4219 4220 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4221 4222 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4223 if ((TSFlags & (SIInstrFlags::SMRD)) && 4224 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4225 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4226 return false; 4227 } 4228 4229 if (isGFX90A() && (CPol & CPol::SCC)) { 4230 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4231 StringRef CStr(S.getPointer()); 4232 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4233 Error(S, "scc is not supported on this GPU"); 4234 return false; 4235 } 4236 4237 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4238 return true; 4239 4240 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4241 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4242 Error(IDLoc, "instruction must use glc"); 4243 return false; 4244 } 4245 } else { 4246 if (CPol & CPol::GLC) { 4247 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4248 StringRef CStr(S.getPointer()); 4249 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4250 Error(S, "instruction must not use glc"); 4251 return false; 4252 } 4253 } 4254 4255 return true; 4256 } 4257 4258 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4259 const SMLoc &IDLoc, 4260 const OperandVector &Operands) { 4261 if (auto ErrMsg = validateLdsDirect(Inst)) { 4262 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4263 return false; 4264 } 4265 if (!validateSOPLiteral(Inst)) { 4266 Error(getLitLoc(Operands), 4267 "only one literal operand is allowed"); 4268 return false; 4269 } 4270 if (!validateVOPLiteral(Inst, Operands)) { 4271 return false; 4272 } 4273 if (!validateConstantBusLimitations(Inst, Operands)) { 4274 return false; 4275 } 4276 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4277 return false; 4278 } 4279 if (!validateIntClampSupported(Inst)) { 4280 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4281 "integer clamping is not supported on this GPU"); 4282 return false; 4283 } 4284 if (!validateOpSel(Inst)) { 4285 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4286 "invalid op_sel operand"); 4287 return false; 4288 } 4289 if (!validateDPP(Inst, Operands)) { 4290 return false; 4291 } 4292 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4293 if (!validateMIMGD16(Inst)) { 4294 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4295 "d16 modifier is not supported on this GPU"); 4296 return false; 4297 } 4298 if (!validateMIMGDim(Inst)) { 4299 Error(IDLoc, "dim modifier is required on this GPU"); 4300 return false; 4301 } 4302 if (!validateMIMGMSAA(Inst)) { 4303 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4304 "invalid dim; must be MSAA type"); 4305 return false; 4306 } 4307 if (!validateMIMGDataSize(Inst)) { 4308 Error(IDLoc, 4309 "image data size does not match dmask and tfe"); 4310 return false; 4311 } 4312 if (!validateMIMGAddrSize(Inst)) { 4313 Error(IDLoc, 4314 "image address size does not match dim and a16"); 4315 return false; 4316 } 4317 if (!validateMIMGAtomicDMask(Inst)) { 4318 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4319 "invalid atomic image dmask"); 4320 return false; 4321 } 4322 if (!validateMIMGGatherDMask(Inst)) { 4323 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4324 "invalid image_gather dmask: only one bit must be set"); 4325 return false; 4326 } 4327 if (!validateMovrels(Inst, Operands)) { 4328 return false; 4329 } 4330 if (!validateFlatOffset(Inst, Operands)) { 4331 return false; 4332 } 4333 if (!validateSMEMOffset(Inst, Operands)) { 4334 return false; 4335 } 4336 if (!validateMAIAccWrite(Inst, Operands)) { 4337 return false; 4338 } 4339 if (!validateMFMA(Inst, Operands)) { 4340 return false; 4341 } 4342 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4343 return false; 4344 } 4345 4346 if (!validateAGPRLdSt(Inst)) { 4347 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4348 ? "invalid register class: data and dst should be all VGPR or AGPR" 4349 : "invalid register class: agpr loads and stores not supported on this GPU" 4350 ); 4351 return false; 4352 } 4353 if (!validateVGPRAlign(Inst)) { 4354 Error(IDLoc, 4355 "invalid register class: vgpr tuples must be 64 bit aligned"); 4356 return false; 4357 } 4358 if (!validateGWS(Inst, Operands)) { 4359 return false; 4360 } 4361 4362 if (!validateDivScale(Inst)) { 4363 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4364 return false; 4365 } 4366 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4367 return false; 4368 } 4369 4370 return true; 4371 } 4372 4373 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4374 const FeatureBitset &FBS, 4375 unsigned VariantID = 0); 4376 4377 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4378 const FeatureBitset &AvailableFeatures, 4379 unsigned VariantID); 4380 4381 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4382 const FeatureBitset &FBS) { 4383 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4384 } 4385 4386 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4387 const FeatureBitset &FBS, 4388 ArrayRef<unsigned> Variants) { 4389 for (auto Variant : Variants) { 4390 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4391 return true; 4392 } 4393 4394 return false; 4395 } 4396 4397 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4398 const SMLoc &IDLoc) { 4399 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4400 4401 // Check if requested instruction variant is supported. 4402 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4403 return false; 4404 4405 // This instruction is not supported. 4406 // Clear any other pending errors because they are no longer relevant. 4407 getParser().clearPendingErrors(); 4408 4409 // Requested instruction variant is not supported. 4410 // Check if any other variants are supported. 4411 StringRef VariantName = getMatchedVariantName(); 4412 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4413 return Error(IDLoc, 4414 Twine(VariantName, 4415 " variant of this instruction is not supported")); 4416 } 4417 4418 // Finally check if this instruction is supported on any other GPU. 4419 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4420 return Error(IDLoc, "instruction not supported on this GPU"); 4421 } 4422 4423 // Instruction not supported on any GPU. Probably a typo. 4424 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4425 return Error(IDLoc, "invalid instruction" + Suggestion); 4426 } 4427 4428 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4429 OperandVector &Operands, 4430 MCStreamer &Out, 4431 uint64_t &ErrorInfo, 4432 bool MatchingInlineAsm) { 4433 MCInst Inst; 4434 unsigned Result = Match_Success; 4435 for (auto Variant : getMatchedVariants()) { 4436 uint64_t EI; 4437 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4438 Variant); 4439 // We order match statuses from least to most specific. We use most specific 4440 // status as resulting 4441 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4442 if ((R == Match_Success) || 4443 (R == Match_PreferE32) || 4444 (R == Match_MissingFeature && Result != Match_PreferE32) || 4445 (R == Match_InvalidOperand && Result != Match_MissingFeature 4446 && Result != Match_PreferE32) || 4447 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4448 && Result != Match_MissingFeature 4449 && Result != Match_PreferE32)) { 4450 Result = R; 4451 ErrorInfo = EI; 4452 } 4453 if (R == Match_Success) 4454 break; 4455 } 4456 4457 if (Result == Match_Success) { 4458 if (!validateInstruction(Inst, IDLoc, Operands)) { 4459 return true; 4460 } 4461 Inst.setLoc(IDLoc); 4462 Out.emitInstruction(Inst, getSTI()); 4463 return false; 4464 } 4465 4466 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4467 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4468 return true; 4469 } 4470 4471 switch (Result) { 4472 default: break; 4473 case Match_MissingFeature: 4474 // It has been verified that the specified instruction 4475 // mnemonic is valid. A match was found but it requires 4476 // features which are not supported on this GPU. 4477 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4478 4479 case Match_InvalidOperand: { 4480 SMLoc ErrorLoc = IDLoc; 4481 if (ErrorInfo != ~0ULL) { 4482 if (ErrorInfo >= Operands.size()) { 4483 return Error(IDLoc, "too few operands for instruction"); 4484 } 4485 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4486 if (ErrorLoc == SMLoc()) 4487 ErrorLoc = IDLoc; 4488 } 4489 return Error(ErrorLoc, "invalid operand for instruction"); 4490 } 4491 4492 case Match_PreferE32: 4493 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4494 "should be encoded as e32"); 4495 case Match_MnemonicFail: 4496 llvm_unreachable("Invalid instructions should have been handled already"); 4497 } 4498 llvm_unreachable("Implement any new match types added!"); 4499 } 4500 4501 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4502 int64_t Tmp = -1; 4503 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4504 return true; 4505 } 4506 if (getParser().parseAbsoluteExpression(Tmp)) { 4507 return true; 4508 } 4509 Ret = static_cast<uint32_t>(Tmp); 4510 return false; 4511 } 4512 4513 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4514 uint32_t &Minor) { 4515 if (ParseAsAbsoluteExpression(Major)) 4516 return TokError("invalid major version"); 4517 4518 if (!trySkipToken(AsmToken::Comma)) 4519 return TokError("minor version number required, comma expected"); 4520 4521 if (ParseAsAbsoluteExpression(Minor)) 4522 return TokError("invalid minor version"); 4523 4524 return false; 4525 } 4526 4527 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4528 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4529 return TokError("directive only supported for amdgcn architecture"); 4530 4531 std::string TargetIDDirective; 4532 SMLoc TargetStart = getTok().getLoc(); 4533 if (getParser().parseEscapedString(TargetIDDirective)) 4534 return true; 4535 4536 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4537 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4538 return getParser().Error(TargetRange.Start, 4539 (Twine(".amdgcn_target directive's target id ") + 4540 Twine(TargetIDDirective) + 4541 Twine(" does not match the specified target id ") + 4542 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4543 4544 return false; 4545 } 4546 4547 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4548 return Error(Range.Start, "value out of range", Range); 4549 } 4550 4551 bool AMDGPUAsmParser::calculateGPRBlocks( 4552 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4553 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4554 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4555 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4556 // TODO(scott.linder): These calculations are duplicated from 4557 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4558 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4559 4560 unsigned NumVGPRs = NextFreeVGPR; 4561 unsigned NumSGPRs = NextFreeSGPR; 4562 4563 if (Version.Major >= 10) 4564 NumSGPRs = 0; 4565 else { 4566 unsigned MaxAddressableNumSGPRs = 4567 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4568 4569 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4570 NumSGPRs > MaxAddressableNumSGPRs) 4571 return OutOfRangeError(SGPRRange); 4572 4573 NumSGPRs += 4574 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4575 4576 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4577 NumSGPRs > MaxAddressableNumSGPRs) 4578 return OutOfRangeError(SGPRRange); 4579 4580 if (Features.test(FeatureSGPRInitBug)) 4581 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4582 } 4583 4584 VGPRBlocks = 4585 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4586 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4587 4588 return false; 4589 } 4590 4591 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4592 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4593 return TokError("directive only supported for amdgcn architecture"); 4594 4595 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4596 return TokError("directive only supported for amdhsa OS"); 4597 4598 StringRef KernelName; 4599 if (getParser().parseIdentifier(KernelName)) 4600 return true; 4601 4602 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4603 4604 StringSet<> Seen; 4605 4606 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4607 4608 SMRange VGPRRange; 4609 uint64_t NextFreeVGPR = 0; 4610 uint64_t AccumOffset = 0; 4611 SMRange SGPRRange; 4612 uint64_t NextFreeSGPR = 0; 4613 4614 // Count the number of user SGPRs implied from the enabled feature bits. 4615 unsigned ImpliedUserSGPRCount = 0; 4616 4617 // Track if the asm explicitly contains the directive for the user SGPR 4618 // count. 4619 Optional<unsigned> ExplicitUserSGPRCount; 4620 bool ReserveVCC = true; 4621 bool ReserveFlatScr = true; 4622 Optional<bool> EnableWavefrontSize32; 4623 4624 while (true) { 4625 while (trySkipToken(AsmToken::EndOfStatement)); 4626 4627 StringRef ID; 4628 SMRange IDRange = getTok().getLocRange(); 4629 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4630 return true; 4631 4632 if (ID == ".end_amdhsa_kernel") 4633 break; 4634 4635 if (Seen.find(ID) != Seen.end()) 4636 return TokError(".amdhsa_ directives cannot be repeated"); 4637 Seen.insert(ID); 4638 4639 SMLoc ValStart = getLoc(); 4640 int64_t IVal; 4641 if (getParser().parseAbsoluteExpression(IVal)) 4642 return true; 4643 SMLoc ValEnd = getLoc(); 4644 SMRange ValRange = SMRange(ValStart, ValEnd); 4645 4646 if (IVal < 0) 4647 return OutOfRangeError(ValRange); 4648 4649 uint64_t Val = IVal; 4650 4651 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4652 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4653 return OutOfRangeError(RANGE); \ 4654 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4655 4656 if (ID == ".amdhsa_group_segment_fixed_size") { 4657 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4658 return OutOfRangeError(ValRange); 4659 KD.group_segment_fixed_size = Val; 4660 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4661 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4662 return OutOfRangeError(ValRange); 4663 KD.private_segment_fixed_size = Val; 4664 } else if (ID == ".amdhsa_kernarg_size") { 4665 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4666 return OutOfRangeError(ValRange); 4667 KD.kernarg_size = Val; 4668 } else if (ID == ".amdhsa_user_sgpr_count") { 4669 ExplicitUserSGPRCount = Val; 4670 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4671 if (hasArchitectedFlatScratch()) 4672 return Error(IDRange.Start, 4673 "directive is not supported with architected flat scratch", 4674 IDRange); 4675 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4676 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4677 Val, ValRange); 4678 if (Val) 4679 ImpliedUserSGPRCount += 4; 4680 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4681 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4682 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4683 ValRange); 4684 if (Val) 4685 ImpliedUserSGPRCount += 2; 4686 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4687 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4688 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4689 ValRange); 4690 if (Val) 4691 ImpliedUserSGPRCount += 2; 4692 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4693 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4694 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4695 Val, ValRange); 4696 if (Val) 4697 ImpliedUserSGPRCount += 2; 4698 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4699 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4700 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4701 ValRange); 4702 if (Val) 4703 ImpliedUserSGPRCount += 2; 4704 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4705 if (hasArchitectedFlatScratch()) 4706 return Error(IDRange.Start, 4707 "directive is not supported with architected flat scratch", 4708 IDRange); 4709 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4710 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4711 ValRange); 4712 if (Val) 4713 ImpliedUserSGPRCount += 2; 4714 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4715 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4716 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4717 Val, ValRange); 4718 if (Val) 4719 ImpliedUserSGPRCount += 1; 4720 } else if (ID == ".amdhsa_wavefront_size32") { 4721 if (IVersion.Major < 10) 4722 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4723 EnableWavefrontSize32 = Val; 4724 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4725 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4726 Val, ValRange); 4727 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4728 if (hasArchitectedFlatScratch()) 4729 return Error(IDRange.Start, 4730 "directive is not supported with architected flat scratch", 4731 IDRange); 4732 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4733 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4734 } else if (ID == ".amdhsa_enable_private_segment") { 4735 if (!hasArchitectedFlatScratch()) 4736 return Error( 4737 IDRange.Start, 4738 "directive is not supported without architected flat scratch", 4739 IDRange); 4740 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4741 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4742 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4744 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4745 ValRange); 4746 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4748 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4749 ValRange); 4750 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4751 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4752 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4753 ValRange); 4754 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4755 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4756 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4757 ValRange); 4758 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4759 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4760 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4761 ValRange); 4762 } else if (ID == ".amdhsa_next_free_vgpr") { 4763 VGPRRange = ValRange; 4764 NextFreeVGPR = Val; 4765 } else if (ID == ".amdhsa_next_free_sgpr") { 4766 SGPRRange = ValRange; 4767 NextFreeSGPR = Val; 4768 } else if (ID == ".amdhsa_accum_offset") { 4769 if (!isGFX90A()) 4770 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4771 AccumOffset = Val; 4772 } else if (ID == ".amdhsa_reserve_vcc") { 4773 if (!isUInt<1>(Val)) 4774 return OutOfRangeError(ValRange); 4775 ReserveVCC = Val; 4776 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4777 if (IVersion.Major < 7) 4778 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4779 if (hasArchitectedFlatScratch()) 4780 return Error(IDRange.Start, 4781 "directive is not supported with architected flat scratch", 4782 IDRange); 4783 if (!isUInt<1>(Val)) 4784 return OutOfRangeError(ValRange); 4785 ReserveFlatScr = Val; 4786 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4787 if (IVersion.Major < 8) 4788 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4789 if (!isUInt<1>(Val)) 4790 return OutOfRangeError(ValRange); 4791 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4792 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4793 IDRange); 4794 } else if (ID == ".amdhsa_float_round_mode_32") { 4795 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4796 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4797 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4798 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4799 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4800 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4801 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4802 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4803 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4804 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4805 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4806 ValRange); 4807 } else if (ID == ".amdhsa_dx10_clamp") { 4808 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4809 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4810 } else if (ID == ".amdhsa_ieee_mode") { 4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4812 Val, ValRange); 4813 } else if (ID == ".amdhsa_fp16_overflow") { 4814 if (IVersion.Major < 9) 4815 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4816 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4817 ValRange); 4818 } else if (ID == ".amdhsa_tg_split") { 4819 if (!isGFX90A()) 4820 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4821 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4822 ValRange); 4823 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4824 if (IVersion.Major < 10) 4825 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4827 ValRange); 4828 } else if (ID == ".amdhsa_memory_ordered") { 4829 if (IVersion.Major < 10) 4830 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4831 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4832 ValRange); 4833 } else if (ID == ".amdhsa_forward_progress") { 4834 if (IVersion.Major < 10) 4835 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4837 ValRange); 4838 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4839 PARSE_BITS_ENTRY( 4840 KD.compute_pgm_rsrc2, 4841 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4842 ValRange); 4843 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4844 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4845 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4846 Val, ValRange); 4847 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4848 PARSE_BITS_ENTRY( 4849 KD.compute_pgm_rsrc2, 4850 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4851 ValRange); 4852 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4853 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4854 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4855 Val, ValRange); 4856 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4857 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4858 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4859 Val, ValRange); 4860 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4862 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4863 Val, ValRange); 4864 } else if (ID == ".amdhsa_exception_int_div_zero") { 4865 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4866 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4867 Val, ValRange); 4868 } else { 4869 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4870 } 4871 4872 #undef PARSE_BITS_ENTRY 4873 } 4874 4875 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4876 return TokError(".amdhsa_next_free_vgpr directive is required"); 4877 4878 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4879 return TokError(".amdhsa_next_free_sgpr directive is required"); 4880 4881 unsigned VGPRBlocks; 4882 unsigned SGPRBlocks; 4883 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4884 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4885 EnableWavefrontSize32, NextFreeVGPR, 4886 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4887 SGPRBlocks)) 4888 return true; 4889 4890 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4891 VGPRBlocks)) 4892 return OutOfRangeError(VGPRRange); 4893 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4894 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4895 4896 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4897 SGPRBlocks)) 4898 return OutOfRangeError(SGPRRange); 4899 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4900 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4901 SGPRBlocks); 4902 4903 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4904 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4905 "enabled user SGPRs"); 4906 4907 unsigned UserSGPRCount = 4908 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4909 4910 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4911 return TokError("too many user SGPRs enabled"); 4912 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4913 UserSGPRCount); 4914 4915 if (isGFX90A()) { 4916 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4917 return TokError(".amdhsa_accum_offset directive is required"); 4918 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4919 return TokError("accum_offset should be in range [4..256] in " 4920 "increments of 4"); 4921 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4922 return TokError("accum_offset exceeds total VGPR allocation"); 4923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4924 (AccumOffset / 4 - 1)); 4925 } 4926 4927 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4928 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4929 ReserveFlatScr); 4930 return false; 4931 } 4932 4933 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4934 uint32_t Major; 4935 uint32_t Minor; 4936 4937 if (ParseDirectiveMajorMinor(Major, Minor)) 4938 return true; 4939 4940 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4941 return false; 4942 } 4943 4944 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4945 uint32_t Major; 4946 uint32_t Minor; 4947 uint32_t Stepping; 4948 StringRef VendorName; 4949 StringRef ArchName; 4950 4951 // If this directive has no arguments, then use the ISA version for the 4952 // targeted GPU. 4953 if (isToken(AsmToken::EndOfStatement)) { 4954 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4955 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4956 ISA.Stepping, 4957 "AMD", "AMDGPU"); 4958 return false; 4959 } 4960 4961 if (ParseDirectiveMajorMinor(Major, Minor)) 4962 return true; 4963 4964 if (!trySkipToken(AsmToken::Comma)) 4965 return TokError("stepping version number required, comma expected"); 4966 4967 if (ParseAsAbsoluteExpression(Stepping)) 4968 return TokError("invalid stepping version"); 4969 4970 if (!trySkipToken(AsmToken::Comma)) 4971 return TokError("vendor name required, comma expected"); 4972 4973 if (!parseString(VendorName, "invalid vendor name")) 4974 return true; 4975 4976 if (!trySkipToken(AsmToken::Comma)) 4977 return TokError("arch name required, comma expected"); 4978 4979 if (!parseString(ArchName, "invalid arch name")) 4980 return true; 4981 4982 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4983 VendorName, ArchName); 4984 return false; 4985 } 4986 4987 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4988 amd_kernel_code_t &Header) { 4989 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4990 // assembly for backwards compatibility. 4991 if (ID == "max_scratch_backing_memory_byte_size") { 4992 Parser.eatToEndOfStatement(); 4993 return false; 4994 } 4995 4996 SmallString<40> ErrStr; 4997 raw_svector_ostream Err(ErrStr); 4998 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4999 return TokError(Err.str()); 5000 } 5001 Lex(); 5002 5003 if (ID == "enable_wavefront_size32") { 5004 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5005 if (!isGFX10Plus()) 5006 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5007 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5008 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5009 } else { 5010 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5011 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5012 } 5013 } 5014 5015 if (ID == "wavefront_size") { 5016 if (Header.wavefront_size == 5) { 5017 if (!isGFX10Plus()) 5018 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5019 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5020 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5021 } else if (Header.wavefront_size == 6) { 5022 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5023 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5024 } 5025 } 5026 5027 if (ID == "enable_wgp_mode") { 5028 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5029 !isGFX10Plus()) 5030 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5031 } 5032 5033 if (ID == "enable_mem_ordered") { 5034 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5035 !isGFX10Plus()) 5036 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5037 } 5038 5039 if (ID == "enable_fwd_progress") { 5040 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5041 !isGFX10Plus()) 5042 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5043 } 5044 5045 return false; 5046 } 5047 5048 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5049 amd_kernel_code_t Header; 5050 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5051 5052 while (true) { 5053 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5054 // will set the current token to EndOfStatement. 5055 while(trySkipToken(AsmToken::EndOfStatement)); 5056 5057 StringRef ID; 5058 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5059 return true; 5060 5061 if (ID == ".end_amd_kernel_code_t") 5062 break; 5063 5064 if (ParseAMDKernelCodeTValue(ID, Header)) 5065 return true; 5066 } 5067 5068 getTargetStreamer().EmitAMDKernelCodeT(Header); 5069 5070 return false; 5071 } 5072 5073 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5074 StringRef KernelName; 5075 if (!parseId(KernelName, "expected symbol name")) 5076 return true; 5077 5078 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5079 ELF::STT_AMDGPU_HSA_KERNEL); 5080 5081 KernelScope.initialize(getContext()); 5082 return false; 5083 } 5084 5085 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5086 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5087 return Error(getLoc(), 5088 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5089 "architectures"); 5090 } 5091 5092 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5093 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5094 return Error(getParser().getTok().getLoc(), "target id must match options"); 5095 5096 getTargetStreamer().EmitISAVersion(); 5097 Lex(); 5098 5099 return false; 5100 } 5101 5102 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5103 const char *AssemblerDirectiveBegin; 5104 const char *AssemblerDirectiveEnd; 5105 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5106 isHsaAbiVersion3AndAbove(&getSTI()) 5107 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5108 HSAMD::V3::AssemblerDirectiveEnd) 5109 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5110 HSAMD::AssemblerDirectiveEnd); 5111 5112 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5113 return Error(getLoc(), 5114 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5115 "not available on non-amdhsa OSes")).str()); 5116 } 5117 5118 std::string HSAMetadataString; 5119 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5120 HSAMetadataString)) 5121 return true; 5122 5123 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5124 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5125 return Error(getLoc(), "invalid HSA metadata"); 5126 } else { 5127 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5128 return Error(getLoc(), "invalid HSA metadata"); 5129 } 5130 5131 return false; 5132 } 5133 5134 /// Common code to parse out a block of text (typically YAML) between start and 5135 /// end directives. 5136 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5137 const char *AssemblerDirectiveEnd, 5138 std::string &CollectString) { 5139 5140 raw_string_ostream CollectStream(CollectString); 5141 5142 getLexer().setSkipSpace(false); 5143 5144 bool FoundEnd = false; 5145 while (!isToken(AsmToken::Eof)) { 5146 while (isToken(AsmToken::Space)) { 5147 CollectStream << getTokenStr(); 5148 Lex(); 5149 } 5150 5151 if (trySkipId(AssemblerDirectiveEnd)) { 5152 FoundEnd = true; 5153 break; 5154 } 5155 5156 CollectStream << Parser.parseStringToEndOfStatement() 5157 << getContext().getAsmInfo()->getSeparatorString(); 5158 5159 Parser.eatToEndOfStatement(); 5160 } 5161 5162 getLexer().setSkipSpace(true); 5163 5164 if (isToken(AsmToken::Eof) && !FoundEnd) { 5165 return TokError(Twine("expected directive ") + 5166 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5167 } 5168 5169 CollectStream.flush(); 5170 return false; 5171 } 5172 5173 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5174 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5175 std::string String; 5176 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5177 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5178 return true; 5179 5180 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5181 if (!PALMetadata->setFromString(String)) 5182 return Error(getLoc(), "invalid PAL metadata"); 5183 return false; 5184 } 5185 5186 /// Parse the assembler directive for old linear-format PAL metadata. 5187 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5188 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5189 return Error(getLoc(), 5190 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5191 "not available on non-amdpal OSes")).str()); 5192 } 5193 5194 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5195 PALMetadata->setLegacy(); 5196 for (;;) { 5197 uint32_t Key, Value; 5198 if (ParseAsAbsoluteExpression(Key)) { 5199 return TokError(Twine("invalid value in ") + 5200 Twine(PALMD::AssemblerDirective)); 5201 } 5202 if (!trySkipToken(AsmToken::Comma)) { 5203 return TokError(Twine("expected an even number of values in ") + 5204 Twine(PALMD::AssemblerDirective)); 5205 } 5206 if (ParseAsAbsoluteExpression(Value)) { 5207 return TokError(Twine("invalid value in ") + 5208 Twine(PALMD::AssemblerDirective)); 5209 } 5210 PALMetadata->setRegister(Key, Value); 5211 if (!trySkipToken(AsmToken::Comma)) 5212 break; 5213 } 5214 return false; 5215 } 5216 5217 /// ParseDirectiveAMDGPULDS 5218 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5219 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5220 if (getParser().checkForValidSection()) 5221 return true; 5222 5223 StringRef Name; 5224 SMLoc NameLoc = getLoc(); 5225 if (getParser().parseIdentifier(Name)) 5226 return TokError("expected identifier in directive"); 5227 5228 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5229 if (parseToken(AsmToken::Comma, "expected ','")) 5230 return true; 5231 5232 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5233 5234 int64_t Size; 5235 SMLoc SizeLoc = getLoc(); 5236 if (getParser().parseAbsoluteExpression(Size)) 5237 return true; 5238 if (Size < 0) 5239 return Error(SizeLoc, "size must be non-negative"); 5240 if (Size > LocalMemorySize) 5241 return Error(SizeLoc, "size is too large"); 5242 5243 int64_t Alignment = 4; 5244 if (trySkipToken(AsmToken::Comma)) { 5245 SMLoc AlignLoc = getLoc(); 5246 if (getParser().parseAbsoluteExpression(Alignment)) 5247 return true; 5248 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5249 return Error(AlignLoc, "alignment must be a power of two"); 5250 5251 // Alignment larger than the size of LDS is possible in theory, as long 5252 // as the linker manages to place to symbol at address 0, but we do want 5253 // to make sure the alignment fits nicely into a 32-bit integer. 5254 if (Alignment >= 1u << 31) 5255 return Error(AlignLoc, "alignment is too large"); 5256 } 5257 5258 if (parseToken(AsmToken::EndOfStatement, 5259 "unexpected token in '.amdgpu_lds' directive")) 5260 return true; 5261 5262 Symbol->redefineIfPossible(); 5263 if (!Symbol->isUndefined()) 5264 return Error(NameLoc, "invalid symbol redefinition"); 5265 5266 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5267 return false; 5268 } 5269 5270 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5271 StringRef IDVal = DirectiveID.getString(); 5272 5273 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5274 if (IDVal == ".amdhsa_kernel") 5275 return ParseDirectiveAMDHSAKernel(); 5276 5277 // TODO: Restructure/combine with PAL metadata directive. 5278 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5279 return ParseDirectiveHSAMetadata(); 5280 } else { 5281 if (IDVal == ".hsa_code_object_version") 5282 return ParseDirectiveHSACodeObjectVersion(); 5283 5284 if (IDVal == ".hsa_code_object_isa") 5285 return ParseDirectiveHSACodeObjectISA(); 5286 5287 if (IDVal == ".amd_kernel_code_t") 5288 return ParseDirectiveAMDKernelCodeT(); 5289 5290 if (IDVal == ".amdgpu_hsa_kernel") 5291 return ParseDirectiveAMDGPUHsaKernel(); 5292 5293 if (IDVal == ".amd_amdgpu_isa") 5294 return ParseDirectiveISAVersion(); 5295 5296 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5297 return ParseDirectiveHSAMetadata(); 5298 } 5299 5300 if (IDVal == ".amdgcn_target") 5301 return ParseDirectiveAMDGCNTarget(); 5302 5303 if (IDVal == ".amdgpu_lds") 5304 return ParseDirectiveAMDGPULDS(); 5305 5306 if (IDVal == PALMD::AssemblerDirectiveBegin) 5307 return ParseDirectivePALMetadataBegin(); 5308 5309 if (IDVal == PALMD::AssemblerDirective) 5310 return ParseDirectivePALMetadata(); 5311 5312 return true; 5313 } 5314 5315 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5316 unsigned RegNo) { 5317 5318 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5319 R.isValid(); ++R) { 5320 if (*R == RegNo) 5321 return isGFX9Plus(); 5322 } 5323 5324 // GFX10 has 2 more SGPRs 104 and 105. 5325 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5326 R.isValid(); ++R) { 5327 if (*R == RegNo) 5328 return hasSGPR104_SGPR105(); 5329 } 5330 5331 switch (RegNo) { 5332 case AMDGPU::SRC_SHARED_BASE: 5333 case AMDGPU::SRC_SHARED_LIMIT: 5334 case AMDGPU::SRC_PRIVATE_BASE: 5335 case AMDGPU::SRC_PRIVATE_LIMIT: 5336 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5337 return isGFX9Plus(); 5338 case AMDGPU::TBA: 5339 case AMDGPU::TBA_LO: 5340 case AMDGPU::TBA_HI: 5341 case AMDGPU::TMA: 5342 case AMDGPU::TMA_LO: 5343 case AMDGPU::TMA_HI: 5344 return !isGFX9Plus(); 5345 case AMDGPU::XNACK_MASK: 5346 case AMDGPU::XNACK_MASK_LO: 5347 case AMDGPU::XNACK_MASK_HI: 5348 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5349 case AMDGPU::SGPR_NULL: 5350 return isGFX10Plus(); 5351 default: 5352 break; 5353 } 5354 5355 if (isCI()) 5356 return true; 5357 5358 if (isSI() || isGFX10Plus()) { 5359 // No flat_scr on SI. 5360 // On GFX10 flat scratch is not a valid register operand and can only be 5361 // accessed with s_setreg/s_getreg. 5362 switch (RegNo) { 5363 case AMDGPU::FLAT_SCR: 5364 case AMDGPU::FLAT_SCR_LO: 5365 case AMDGPU::FLAT_SCR_HI: 5366 return false; 5367 default: 5368 return true; 5369 } 5370 } 5371 5372 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5373 // SI/CI have. 5374 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5375 R.isValid(); ++R) { 5376 if (*R == RegNo) 5377 return hasSGPR102_SGPR103(); 5378 } 5379 5380 return true; 5381 } 5382 5383 OperandMatchResultTy 5384 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5385 OperandMode Mode) { 5386 // Try to parse with a custom parser 5387 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5388 5389 // If we successfully parsed the operand or if there as an error parsing, 5390 // we are done. 5391 // 5392 // If we are parsing after we reach EndOfStatement then this means we 5393 // are appending default values to the Operands list. This is only done 5394 // by custom parser, so we shouldn't continue on to the generic parsing. 5395 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5396 isToken(AsmToken::EndOfStatement)) 5397 return ResTy; 5398 5399 SMLoc RBraceLoc; 5400 SMLoc LBraceLoc = getLoc(); 5401 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5402 unsigned Prefix = Operands.size(); 5403 5404 for (;;) { 5405 auto Loc = getLoc(); 5406 ResTy = parseReg(Operands); 5407 if (ResTy == MatchOperand_NoMatch) 5408 Error(Loc, "expected a register"); 5409 if (ResTy != MatchOperand_Success) 5410 return MatchOperand_ParseFail; 5411 5412 RBraceLoc = getLoc(); 5413 if (trySkipToken(AsmToken::RBrac)) 5414 break; 5415 5416 if (!skipToken(AsmToken::Comma, 5417 "expected a comma or a closing square bracket")) { 5418 return MatchOperand_ParseFail; 5419 } 5420 } 5421 5422 if (Operands.size() - Prefix > 1) { 5423 Operands.insert(Operands.begin() + Prefix, 5424 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5425 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5426 } 5427 5428 return MatchOperand_Success; 5429 } 5430 5431 return parseRegOrImm(Operands); 5432 } 5433 5434 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5435 // Clear any forced encodings from the previous instruction. 5436 setForcedEncodingSize(0); 5437 setForcedDPP(false); 5438 setForcedSDWA(false); 5439 5440 if (Name.endswith("_e64")) { 5441 setForcedEncodingSize(64); 5442 return Name.substr(0, Name.size() - 4); 5443 } else if (Name.endswith("_e32")) { 5444 setForcedEncodingSize(32); 5445 return Name.substr(0, Name.size() - 4); 5446 } else if (Name.endswith("_dpp")) { 5447 setForcedDPP(true); 5448 return Name.substr(0, Name.size() - 4); 5449 } else if (Name.endswith("_sdwa")) { 5450 setForcedSDWA(true); 5451 return Name.substr(0, Name.size() - 5); 5452 } 5453 return Name; 5454 } 5455 5456 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5457 StringRef Name, 5458 SMLoc NameLoc, OperandVector &Operands) { 5459 // Add the instruction mnemonic 5460 Name = parseMnemonicSuffix(Name); 5461 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5462 5463 bool IsMIMG = Name.startswith("image_"); 5464 5465 while (!trySkipToken(AsmToken::EndOfStatement)) { 5466 OperandMode Mode = OperandMode_Default; 5467 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5468 Mode = OperandMode_NSA; 5469 CPolSeen = 0; 5470 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5471 5472 if (Res != MatchOperand_Success) { 5473 checkUnsupportedInstruction(Name, NameLoc); 5474 if (!Parser.hasPendingError()) { 5475 // FIXME: use real operand location rather than the current location. 5476 StringRef Msg = 5477 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5478 "not a valid operand."; 5479 Error(getLoc(), Msg); 5480 } 5481 while (!trySkipToken(AsmToken::EndOfStatement)) { 5482 lex(); 5483 } 5484 return true; 5485 } 5486 5487 // Eat the comma or space if there is one. 5488 trySkipToken(AsmToken::Comma); 5489 } 5490 5491 return false; 5492 } 5493 5494 //===----------------------------------------------------------------------===// 5495 // Utility functions 5496 //===----------------------------------------------------------------------===// 5497 5498 OperandMatchResultTy 5499 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5500 5501 if (!trySkipId(Prefix, AsmToken::Colon)) 5502 return MatchOperand_NoMatch; 5503 5504 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5505 } 5506 5507 OperandMatchResultTy 5508 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5509 AMDGPUOperand::ImmTy ImmTy, 5510 bool (*ConvertResult)(int64_t&)) { 5511 SMLoc S = getLoc(); 5512 int64_t Value = 0; 5513 5514 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5515 if (Res != MatchOperand_Success) 5516 return Res; 5517 5518 if (ConvertResult && !ConvertResult(Value)) { 5519 Error(S, "invalid " + StringRef(Prefix) + " value."); 5520 } 5521 5522 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5523 return MatchOperand_Success; 5524 } 5525 5526 OperandMatchResultTy 5527 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5528 OperandVector &Operands, 5529 AMDGPUOperand::ImmTy ImmTy, 5530 bool (*ConvertResult)(int64_t&)) { 5531 SMLoc S = getLoc(); 5532 if (!trySkipId(Prefix, AsmToken::Colon)) 5533 return MatchOperand_NoMatch; 5534 5535 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5536 return MatchOperand_ParseFail; 5537 5538 unsigned Val = 0; 5539 const unsigned MaxSize = 4; 5540 5541 // FIXME: How to verify the number of elements matches the number of src 5542 // operands? 5543 for (int I = 0; ; ++I) { 5544 int64_t Op; 5545 SMLoc Loc = getLoc(); 5546 if (!parseExpr(Op)) 5547 return MatchOperand_ParseFail; 5548 5549 if (Op != 0 && Op != 1) { 5550 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5551 return MatchOperand_ParseFail; 5552 } 5553 5554 Val |= (Op << I); 5555 5556 if (trySkipToken(AsmToken::RBrac)) 5557 break; 5558 5559 if (I + 1 == MaxSize) { 5560 Error(getLoc(), "expected a closing square bracket"); 5561 return MatchOperand_ParseFail; 5562 } 5563 5564 if (!skipToken(AsmToken::Comma, "expected a comma")) 5565 return MatchOperand_ParseFail; 5566 } 5567 5568 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5569 return MatchOperand_Success; 5570 } 5571 5572 OperandMatchResultTy 5573 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5574 AMDGPUOperand::ImmTy ImmTy) { 5575 int64_t Bit; 5576 SMLoc S = getLoc(); 5577 5578 if (trySkipId(Name)) { 5579 Bit = 1; 5580 } else if (trySkipId("no", Name)) { 5581 Bit = 0; 5582 } else { 5583 return MatchOperand_NoMatch; 5584 } 5585 5586 if (Name == "r128" && !hasMIMG_R128()) { 5587 Error(S, "r128 modifier is not supported on this GPU"); 5588 return MatchOperand_ParseFail; 5589 } 5590 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5591 Error(S, "a16 modifier is not supported on this GPU"); 5592 return MatchOperand_ParseFail; 5593 } 5594 5595 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5596 ImmTy = AMDGPUOperand::ImmTyR128A16; 5597 5598 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5599 return MatchOperand_Success; 5600 } 5601 5602 OperandMatchResultTy 5603 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5604 unsigned CPolOn = 0; 5605 unsigned CPolOff = 0; 5606 SMLoc S = getLoc(); 5607 5608 if (trySkipId("glc")) 5609 CPolOn = AMDGPU::CPol::GLC; 5610 else if (trySkipId("noglc")) 5611 CPolOff = AMDGPU::CPol::GLC; 5612 else if (trySkipId("slc")) 5613 CPolOn = AMDGPU::CPol::SLC; 5614 else if (trySkipId("noslc")) 5615 CPolOff = AMDGPU::CPol::SLC; 5616 else if (trySkipId("dlc")) 5617 CPolOn = AMDGPU::CPol::DLC; 5618 else if (trySkipId("nodlc")) 5619 CPolOff = AMDGPU::CPol::DLC; 5620 else if (trySkipId("scc")) 5621 CPolOn = AMDGPU::CPol::SCC; 5622 else if (trySkipId("noscc")) 5623 CPolOff = AMDGPU::CPol::SCC; 5624 else 5625 return MatchOperand_NoMatch; 5626 5627 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5628 Error(S, "dlc modifier is not supported on this GPU"); 5629 return MatchOperand_ParseFail; 5630 } 5631 5632 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5633 Error(S, "scc modifier is not supported on this GPU"); 5634 return MatchOperand_ParseFail; 5635 } 5636 5637 if (CPolSeen & (CPolOn | CPolOff)) { 5638 Error(S, "duplicate cache policy modifier"); 5639 return MatchOperand_ParseFail; 5640 } 5641 5642 CPolSeen |= (CPolOn | CPolOff); 5643 5644 for (unsigned I = 1; I != Operands.size(); ++I) { 5645 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5646 if (Op.isCPol()) { 5647 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5648 return MatchOperand_Success; 5649 } 5650 } 5651 5652 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5653 AMDGPUOperand::ImmTyCPol)); 5654 5655 return MatchOperand_Success; 5656 } 5657 5658 static void addOptionalImmOperand( 5659 MCInst& Inst, const OperandVector& Operands, 5660 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5661 AMDGPUOperand::ImmTy ImmT, 5662 int64_t Default = 0) { 5663 auto i = OptionalIdx.find(ImmT); 5664 if (i != OptionalIdx.end()) { 5665 unsigned Idx = i->second; 5666 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5667 } else { 5668 Inst.addOperand(MCOperand::createImm(Default)); 5669 } 5670 } 5671 5672 OperandMatchResultTy 5673 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5674 StringRef &Value, 5675 SMLoc &StringLoc) { 5676 if (!trySkipId(Prefix, AsmToken::Colon)) 5677 return MatchOperand_NoMatch; 5678 5679 StringLoc = getLoc(); 5680 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5681 : MatchOperand_ParseFail; 5682 } 5683 5684 //===----------------------------------------------------------------------===// 5685 // MTBUF format 5686 //===----------------------------------------------------------------------===// 5687 5688 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5689 int64_t MaxVal, 5690 int64_t &Fmt) { 5691 int64_t Val; 5692 SMLoc Loc = getLoc(); 5693 5694 auto Res = parseIntWithPrefix(Pref, Val); 5695 if (Res == MatchOperand_ParseFail) 5696 return false; 5697 if (Res == MatchOperand_NoMatch) 5698 return true; 5699 5700 if (Val < 0 || Val > MaxVal) { 5701 Error(Loc, Twine("out of range ", StringRef(Pref))); 5702 return false; 5703 } 5704 5705 Fmt = Val; 5706 return true; 5707 } 5708 5709 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5710 // values to live in a joint format operand in the MCInst encoding. 5711 OperandMatchResultTy 5712 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5713 using namespace llvm::AMDGPU::MTBUFFormat; 5714 5715 int64_t Dfmt = DFMT_UNDEF; 5716 int64_t Nfmt = NFMT_UNDEF; 5717 5718 // dfmt and nfmt can appear in either order, and each is optional. 5719 for (int I = 0; I < 2; ++I) { 5720 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5721 return MatchOperand_ParseFail; 5722 5723 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5724 return MatchOperand_ParseFail; 5725 } 5726 // Skip optional comma between dfmt/nfmt 5727 // but guard against 2 commas following each other. 5728 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5729 !peekToken().is(AsmToken::Comma)) { 5730 trySkipToken(AsmToken::Comma); 5731 } 5732 } 5733 5734 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5735 return MatchOperand_NoMatch; 5736 5737 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5738 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5739 5740 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5741 return MatchOperand_Success; 5742 } 5743 5744 OperandMatchResultTy 5745 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5746 using namespace llvm::AMDGPU::MTBUFFormat; 5747 5748 int64_t Fmt = UFMT_UNDEF; 5749 5750 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5751 return MatchOperand_ParseFail; 5752 5753 if (Fmt == UFMT_UNDEF) 5754 return MatchOperand_NoMatch; 5755 5756 Format = Fmt; 5757 return MatchOperand_Success; 5758 } 5759 5760 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5761 int64_t &Nfmt, 5762 StringRef FormatStr, 5763 SMLoc Loc) { 5764 using namespace llvm::AMDGPU::MTBUFFormat; 5765 int64_t Format; 5766 5767 Format = getDfmt(FormatStr); 5768 if (Format != DFMT_UNDEF) { 5769 Dfmt = Format; 5770 return true; 5771 } 5772 5773 Format = getNfmt(FormatStr, getSTI()); 5774 if (Format != NFMT_UNDEF) { 5775 Nfmt = Format; 5776 return true; 5777 } 5778 5779 Error(Loc, "unsupported format"); 5780 return false; 5781 } 5782 5783 OperandMatchResultTy 5784 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5785 SMLoc FormatLoc, 5786 int64_t &Format) { 5787 using namespace llvm::AMDGPU::MTBUFFormat; 5788 5789 int64_t Dfmt = DFMT_UNDEF; 5790 int64_t Nfmt = NFMT_UNDEF; 5791 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5792 return MatchOperand_ParseFail; 5793 5794 if (trySkipToken(AsmToken::Comma)) { 5795 StringRef Str; 5796 SMLoc Loc = getLoc(); 5797 if (!parseId(Str, "expected a format string") || 5798 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5799 return MatchOperand_ParseFail; 5800 } 5801 if (Dfmt == DFMT_UNDEF) { 5802 Error(Loc, "duplicate numeric format"); 5803 return MatchOperand_ParseFail; 5804 } else if (Nfmt == NFMT_UNDEF) { 5805 Error(Loc, "duplicate data format"); 5806 return MatchOperand_ParseFail; 5807 } 5808 } 5809 5810 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5811 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5812 5813 if (isGFX10Plus()) { 5814 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5815 if (Ufmt == UFMT_UNDEF) { 5816 Error(FormatLoc, "unsupported format"); 5817 return MatchOperand_ParseFail; 5818 } 5819 Format = Ufmt; 5820 } else { 5821 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5822 } 5823 5824 return MatchOperand_Success; 5825 } 5826 5827 OperandMatchResultTy 5828 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5829 SMLoc Loc, 5830 int64_t &Format) { 5831 using namespace llvm::AMDGPU::MTBUFFormat; 5832 5833 auto Id = getUnifiedFormat(FormatStr); 5834 if (Id == UFMT_UNDEF) 5835 return MatchOperand_NoMatch; 5836 5837 if (!isGFX10Plus()) { 5838 Error(Loc, "unified format is not supported on this GPU"); 5839 return MatchOperand_ParseFail; 5840 } 5841 5842 Format = Id; 5843 return MatchOperand_Success; 5844 } 5845 5846 OperandMatchResultTy 5847 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5848 using namespace llvm::AMDGPU::MTBUFFormat; 5849 SMLoc Loc = getLoc(); 5850 5851 if (!parseExpr(Format)) 5852 return MatchOperand_ParseFail; 5853 if (!isValidFormatEncoding(Format, getSTI())) { 5854 Error(Loc, "out of range format"); 5855 return MatchOperand_ParseFail; 5856 } 5857 5858 return MatchOperand_Success; 5859 } 5860 5861 OperandMatchResultTy 5862 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5863 using namespace llvm::AMDGPU::MTBUFFormat; 5864 5865 if (!trySkipId("format", AsmToken::Colon)) 5866 return MatchOperand_NoMatch; 5867 5868 if (trySkipToken(AsmToken::LBrac)) { 5869 StringRef FormatStr; 5870 SMLoc Loc = getLoc(); 5871 if (!parseId(FormatStr, "expected a format string")) 5872 return MatchOperand_ParseFail; 5873 5874 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5875 if (Res == MatchOperand_NoMatch) 5876 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5877 if (Res != MatchOperand_Success) 5878 return Res; 5879 5880 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5881 return MatchOperand_ParseFail; 5882 5883 return MatchOperand_Success; 5884 } 5885 5886 return parseNumericFormat(Format); 5887 } 5888 5889 OperandMatchResultTy 5890 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5891 using namespace llvm::AMDGPU::MTBUFFormat; 5892 5893 int64_t Format = getDefaultFormatEncoding(getSTI()); 5894 OperandMatchResultTy Res; 5895 SMLoc Loc = getLoc(); 5896 5897 // Parse legacy format syntax. 5898 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5899 if (Res == MatchOperand_ParseFail) 5900 return Res; 5901 5902 bool FormatFound = (Res == MatchOperand_Success); 5903 5904 Operands.push_back( 5905 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5906 5907 if (FormatFound) 5908 trySkipToken(AsmToken::Comma); 5909 5910 if (isToken(AsmToken::EndOfStatement)) { 5911 // We are expecting an soffset operand, 5912 // but let matcher handle the error. 5913 return MatchOperand_Success; 5914 } 5915 5916 // Parse soffset. 5917 Res = parseRegOrImm(Operands); 5918 if (Res != MatchOperand_Success) 5919 return Res; 5920 5921 trySkipToken(AsmToken::Comma); 5922 5923 if (!FormatFound) { 5924 Res = parseSymbolicOrNumericFormat(Format); 5925 if (Res == MatchOperand_ParseFail) 5926 return Res; 5927 if (Res == MatchOperand_Success) { 5928 auto Size = Operands.size(); 5929 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5930 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5931 Op.setImm(Format); 5932 } 5933 return MatchOperand_Success; 5934 } 5935 5936 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5937 Error(getLoc(), "duplicate format"); 5938 return MatchOperand_ParseFail; 5939 } 5940 return MatchOperand_Success; 5941 } 5942 5943 //===----------------------------------------------------------------------===// 5944 // ds 5945 //===----------------------------------------------------------------------===// 5946 5947 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5948 const OperandVector &Operands) { 5949 OptionalImmIndexMap OptionalIdx; 5950 5951 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5952 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5953 5954 // Add the register arguments 5955 if (Op.isReg()) { 5956 Op.addRegOperands(Inst, 1); 5957 continue; 5958 } 5959 5960 // Handle optional arguments 5961 OptionalIdx[Op.getImmTy()] = i; 5962 } 5963 5964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5967 5968 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5969 } 5970 5971 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5972 bool IsGdsHardcoded) { 5973 OptionalImmIndexMap OptionalIdx; 5974 5975 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5976 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5977 5978 // Add the register arguments 5979 if (Op.isReg()) { 5980 Op.addRegOperands(Inst, 1); 5981 continue; 5982 } 5983 5984 if (Op.isToken() && Op.getToken() == "gds") { 5985 IsGdsHardcoded = true; 5986 continue; 5987 } 5988 5989 // Handle optional arguments 5990 OptionalIdx[Op.getImmTy()] = i; 5991 } 5992 5993 AMDGPUOperand::ImmTy OffsetType = 5994 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5995 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5996 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5997 AMDGPUOperand::ImmTyOffset; 5998 5999 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6000 6001 if (!IsGdsHardcoded) { 6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6003 } 6004 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6005 } 6006 6007 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6008 OptionalImmIndexMap OptionalIdx; 6009 6010 unsigned OperandIdx[4]; 6011 unsigned EnMask = 0; 6012 int SrcIdx = 0; 6013 6014 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6015 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6016 6017 // Add the register arguments 6018 if (Op.isReg()) { 6019 assert(SrcIdx < 4); 6020 OperandIdx[SrcIdx] = Inst.size(); 6021 Op.addRegOperands(Inst, 1); 6022 ++SrcIdx; 6023 continue; 6024 } 6025 6026 if (Op.isOff()) { 6027 assert(SrcIdx < 4); 6028 OperandIdx[SrcIdx] = Inst.size(); 6029 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6030 ++SrcIdx; 6031 continue; 6032 } 6033 6034 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6035 Op.addImmOperands(Inst, 1); 6036 continue; 6037 } 6038 6039 if (Op.isToken() && Op.getToken() == "done") 6040 continue; 6041 6042 // Handle optional arguments 6043 OptionalIdx[Op.getImmTy()] = i; 6044 } 6045 6046 assert(SrcIdx == 4); 6047 6048 bool Compr = false; 6049 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6050 Compr = true; 6051 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6052 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6053 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6054 } 6055 6056 for (auto i = 0; i < SrcIdx; ++i) { 6057 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6058 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6059 } 6060 } 6061 6062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6064 6065 Inst.addOperand(MCOperand::createImm(EnMask)); 6066 } 6067 6068 //===----------------------------------------------------------------------===// 6069 // s_waitcnt 6070 //===----------------------------------------------------------------------===// 6071 6072 static bool 6073 encodeCnt( 6074 const AMDGPU::IsaVersion ISA, 6075 int64_t &IntVal, 6076 int64_t CntVal, 6077 bool Saturate, 6078 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6079 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6080 { 6081 bool Failed = false; 6082 6083 IntVal = encode(ISA, IntVal, CntVal); 6084 if (CntVal != decode(ISA, IntVal)) { 6085 if (Saturate) { 6086 IntVal = encode(ISA, IntVal, -1); 6087 } else { 6088 Failed = true; 6089 } 6090 } 6091 return Failed; 6092 } 6093 6094 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6095 6096 SMLoc CntLoc = getLoc(); 6097 StringRef CntName = getTokenStr(); 6098 6099 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6100 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6101 return false; 6102 6103 int64_t CntVal; 6104 SMLoc ValLoc = getLoc(); 6105 if (!parseExpr(CntVal)) 6106 return false; 6107 6108 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6109 6110 bool Failed = true; 6111 bool Sat = CntName.endswith("_sat"); 6112 6113 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6114 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6115 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6116 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6117 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6118 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6119 } else { 6120 Error(CntLoc, "invalid counter name " + CntName); 6121 return false; 6122 } 6123 6124 if (Failed) { 6125 Error(ValLoc, "too large value for " + CntName); 6126 return false; 6127 } 6128 6129 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6130 return false; 6131 6132 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6133 if (isToken(AsmToken::EndOfStatement)) { 6134 Error(getLoc(), "expected a counter name"); 6135 return false; 6136 } 6137 } 6138 6139 return true; 6140 } 6141 6142 OperandMatchResultTy 6143 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6144 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6145 int64_t Waitcnt = getWaitcntBitMask(ISA); 6146 SMLoc S = getLoc(); 6147 6148 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6149 while (!isToken(AsmToken::EndOfStatement)) { 6150 if (!parseCnt(Waitcnt)) 6151 return MatchOperand_ParseFail; 6152 } 6153 } else { 6154 if (!parseExpr(Waitcnt)) 6155 return MatchOperand_ParseFail; 6156 } 6157 6158 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6159 return MatchOperand_Success; 6160 } 6161 6162 bool 6163 AMDGPUOperand::isSWaitCnt() const { 6164 return isImm(); 6165 } 6166 6167 //===----------------------------------------------------------------------===// 6168 // hwreg 6169 //===----------------------------------------------------------------------===// 6170 6171 bool 6172 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6173 OperandInfoTy &Offset, 6174 OperandInfoTy &Width) { 6175 using namespace llvm::AMDGPU::Hwreg; 6176 6177 // The register may be specified by name or using a numeric code 6178 HwReg.Loc = getLoc(); 6179 if (isToken(AsmToken::Identifier) && 6180 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6181 HwReg.IsSymbolic = true; 6182 lex(); // skip register name 6183 } else if (!parseExpr(HwReg.Id, "a register name")) { 6184 return false; 6185 } 6186 6187 if (trySkipToken(AsmToken::RParen)) 6188 return true; 6189 6190 // parse optional params 6191 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6192 return false; 6193 6194 Offset.Loc = getLoc(); 6195 if (!parseExpr(Offset.Id)) 6196 return false; 6197 6198 if (!skipToken(AsmToken::Comma, "expected a comma")) 6199 return false; 6200 6201 Width.Loc = getLoc(); 6202 return parseExpr(Width.Id) && 6203 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6204 } 6205 6206 bool 6207 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6208 const OperandInfoTy &Offset, 6209 const OperandInfoTy &Width) { 6210 6211 using namespace llvm::AMDGPU::Hwreg; 6212 6213 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6214 Error(HwReg.Loc, 6215 "specified hardware register is not supported on this GPU"); 6216 return false; 6217 } 6218 if (!isValidHwreg(HwReg.Id)) { 6219 Error(HwReg.Loc, 6220 "invalid code of hardware register: only 6-bit values are legal"); 6221 return false; 6222 } 6223 if (!isValidHwregOffset(Offset.Id)) { 6224 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6225 return false; 6226 } 6227 if (!isValidHwregWidth(Width.Id)) { 6228 Error(Width.Loc, 6229 "invalid bitfield width: only values from 1 to 32 are legal"); 6230 return false; 6231 } 6232 return true; 6233 } 6234 6235 OperandMatchResultTy 6236 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6237 using namespace llvm::AMDGPU::Hwreg; 6238 6239 int64_t ImmVal = 0; 6240 SMLoc Loc = getLoc(); 6241 6242 if (trySkipId("hwreg", AsmToken::LParen)) { 6243 OperandInfoTy HwReg(ID_UNKNOWN_); 6244 OperandInfoTy Offset(OFFSET_DEFAULT_); 6245 OperandInfoTy Width(WIDTH_DEFAULT_); 6246 if (parseHwregBody(HwReg, Offset, Width) && 6247 validateHwreg(HwReg, Offset, Width)) { 6248 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6249 } else { 6250 return MatchOperand_ParseFail; 6251 } 6252 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6253 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6254 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6255 return MatchOperand_ParseFail; 6256 } 6257 } else { 6258 return MatchOperand_ParseFail; 6259 } 6260 6261 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6262 return MatchOperand_Success; 6263 } 6264 6265 bool AMDGPUOperand::isHwreg() const { 6266 return isImmTy(ImmTyHwreg); 6267 } 6268 6269 //===----------------------------------------------------------------------===// 6270 // sendmsg 6271 //===----------------------------------------------------------------------===// 6272 6273 bool 6274 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6275 OperandInfoTy &Op, 6276 OperandInfoTy &Stream) { 6277 using namespace llvm::AMDGPU::SendMsg; 6278 6279 Msg.Loc = getLoc(); 6280 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6281 Msg.IsSymbolic = true; 6282 lex(); // skip message name 6283 } else if (!parseExpr(Msg.Id, "a message name")) { 6284 return false; 6285 } 6286 6287 if (trySkipToken(AsmToken::Comma)) { 6288 Op.IsDefined = true; 6289 Op.Loc = getLoc(); 6290 if (isToken(AsmToken::Identifier) && 6291 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6292 lex(); // skip operation name 6293 } else if (!parseExpr(Op.Id, "an operation name")) { 6294 return false; 6295 } 6296 6297 if (trySkipToken(AsmToken::Comma)) { 6298 Stream.IsDefined = true; 6299 Stream.Loc = getLoc(); 6300 if (!parseExpr(Stream.Id)) 6301 return false; 6302 } 6303 } 6304 6305 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6306 } 6307 6308 bool 6309 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6310 const OperandInfoTy &Op, 6311 const OperandInfoTy &Stream) { 6312 using namespace llvm::AMDGPU::SendMsg; 6313 6314 // Validation strictness depends on whether message is specified 6315 // in a symbolc or in a numeric form. In the latter case 6316 // only encoding possibility is checked. 6317 bool Strict = Msg.IsSymbolic; 6318 6319 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6320 Error(Msg.Loc, "invalid message id"); 6321 return false; 6322 } 6323 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6324 if (Op.IsDefined) { 6325 Error(Op.Loc, "message does not support operations"); 6326 } else { 6327 Error(Msg.Loc, "missing message operation"); 6328 } 6329 return false; 6330 } 6331 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6332 Error(Op.Loc, "invalid operation id"); 6333 return false; 6334 } 6335 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6336 Error(Stream.Loc, "message operation does not support streams"); 6337 return false; 6338 } 6339 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6340 Error(Stream.Loc, "invalid message stream id"); 6341 return false; 6342 } 6343 return true; 6344 } 6345 6346 OperandMatchResultTy 6347 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6348 using namespace llvm::AMDGPU::SendMsg; 6349 6350 int64_t ImmVal = 0; 6351 SMLoc Loc = getLoc(); 6352 6353 if (trySkipId("sendmsg", AsmToken::LParen)) { 6354 OperandInfoTy Msg(ID_UNKNOWN_); 6355 OperandInfoTy Op(OP_NONE_); 6356 OperandInfoTy Stream(STREAM_ID_NONE_); 6357 if (parseSendMsgBody(Msg, Op, Stream) && 6358 validateSendMsg(Msg, Op, Stream)) { 6359 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6360 } else { 6361 return MatchOperand_ParseFail; 6362 } 6363 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6364 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6365 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6366 return MatchOperand_ParseFail; 6367 } 6368 } else { 6369 return MatchOperand_ParseFail; 6370 } 6371 6372 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6373 return MatchOperand_Success; 6374 } 6375 6376 bool AMDGPUOperand::isSendMsg() const { 6377 return isImmTy(ImmTySendMsg); 6378 } 6379 6380 //===----------------------------------------------------------------------===// 6381 // v_interp 6382 //===----------------------------------------------------------------------===// 6383 6384 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6385 StringRef Str; 6386 SMLoc S = getLoc(); 6387 6388 if (!parseId(Str)) 6389 return MatchOperand_NoMatch; 6390 6391 int Slot = StringSwitch<int>(Str) 6392 .Case("p10", 0) 6393 .Case("p20", 1) 6394 .Case("p0", 2) 6395 .Default(-1); 6396 6397 if (Slot == -1) { 6398 Error(S, "invalid interpolation slot"); 6399 return MatchOperand_ParseFail; 6400 } 6401 6402 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6403 AMDGPUOperand::ImmTyInterpSlot)); 6404 return MatchOperand_Success; 6405 } 6406 6407 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6408 StringRef Str; 6409 SMLoc S = getLoc(); 6410 6411 if (!parseId(Str)) 6412 return MatchOperand_NoMatch; 6413 6414 if (!Str.startswith("attr")) { 6415 Error(S, "invalid interpolation attribute"); 6416 return MatchOperand_ParseFail; 6417 } 6418 6419 StringRef Chan = Str.take_back(2); 6420 int AttrChan = StringSwitch<int>(Chan) 6421 .Case(".x", 0) 6422 .Case(".y", 1) 6423 .Case(".z", 2) 6424 .Case(".w", 3) 6425 .Default(-1); 6426 if (AttrChan == -1) { 6427 Error(S, "invalid or missing interpolation attribute channel"); 6428 return MatchOperand_ParseFail; 6429 } 6430 6431 Str = Str.drop_back(2).drop_front(4); 6432 6433 uint8_t Attr; 6434 if (Str.getAsInteger(10, Attr)) { 6435 Error(S, "invalid or missing interpolation attribute number"); 6436 return MatchOperand_ParseFail; 6437 } 6438 6439 if (Attr > 63) { 6440 Error(S, "out of bounds interpolation attribute number"); 6441 return MatchOperand_ParseFail; 6442 } 6443 6444 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6445 6446 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6447 AMDGPUOperand::ImmTyInterpAttr)); 6448 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6449 AMDGPUOperand::ImmTyAttrChan)); 6450 return MatchOperand_Success; 6451 } 6452 6453 //===----------------------------------------------------------------------===// 6454 // exp 6455 //===----------------------------------------------------------------------===// 6456 6457 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6458 using namespace llvm::AMDGPU::Exp; 6459 6460 StringRef Str; 6461 SMLoc S = getLoc(); 6462 6463 if (!parseId(Str)) 6464 return MatchOperand_NoMatch; 6465 6466 unsigned Id = getTgtId(Str); 6467 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6468 Error(S, (Id == ET_INVALID) ? 6469 "invalid exp target" : 6470 "exp target is not supported on this GPU"); 6471 return MatchOperand_ParseFail; 6472 } 6473 6474 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6475 AMDGPUOperand::ImmTyExpTgt)); 6476 return MatchOperand_Success; 6477 } 6478 6479 //===----------------------------------------------------------------------===// 6480 // parser helpers 6481 //===----------------------------------------------------------------------===// 6482 6483 bool 6484 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6485 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6486 } 6487 6488 bool 6489 AMDGPUAsmParser::isId(const StringRef Id) const { 6490 return isId(getToken(), Id); 6491 } 6492 6493 bool 6494 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6495 return getTokenKind() == Kind; 6496 } 6497 6498 bool 6499 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6500 if (isId(Id)) { 6501 lex(); 6502 return true; 6503 } 6504 return false; 6505 } 6506 6507 bool 6508 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6509 if (isToken(AsmToken::Identifier)) { 6510 StringRef Tok = getTokenStr(); 6511 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6512 lex(); 6513 return true; 6514 } 6515 } 6516 return false; 6517 } 6518 6519 bool 6520 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6521 if (isId(Id) && peekToken().is(Kind)) { 6522 lex(); 6523 lex(); 6524 return true; 6525 } 6526 return false; 6527 } 6528 6529 bool 6530 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6531 if (isToken(Kind)) { 6532 lex(); 6533 return true; 6534 } 6535 return false; 6536 } 6537 6538 bool 6539 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6540 const StringRef ErrMsg) { 6541 if (!trySkipToken(Kind)) { 6542 Error(getLoc(), ErrMsg); 6543 return false; 6544 } 6545 return true; 6546 } 6547 6548 bool 6549 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6550 SMLoc S = getLoc(); 6551 6552 const MCExpr *Expr; 6553 if (Parser.parseExpression(Expr)) 6554 return false; 6555 6556 if (Expr->evaluateAsAbsolute(Imm)) 6557 return true; 6558 6559 if (Expected.empty()) { 6560 Error(S, "expected absolute expression"); 6561 } else { 6562 Error(S, Twine("expected ", Expected) + 6563 Twine(" or an absolute expression")); 6564 } 6565 return false; 6566 } 6567 6568 bool 6569 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6570 SMLoc S = getLoc(); 6571 6572 const MCExpr *Expr; 6573 if (Parser.parseExpression(Expr)) 6574 return false; 6575 6576 int64_t IntVal; 6577 if (Expr->evaluateAsAbsolute(IntVal)) { 6578 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6579 } else { 6580 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6581 } 6582 return true; 6583 } 6584 6585 bool 6586 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6587 if (isToken(AsmToken::String)) { 6588 Val = getToken().getStringContents(); 6589 lex(); 6590 return true; 6591 } else { 6592 Error(getLoc(), ErrMsg); 6593 return false; 6594 } 6595 } 6596 6597 bool 6598 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6599 if (isToken(AsmToken::Identifier)) { 6600 Val = getTokenStr(); 6601 lex(); 6602 return true; 6603 } else { 6604 if (!ErrMsg.empty()) 6605 Error(getLoc(), ErrMsg); 6606 return false; 6607 } 6608 } 6609 6610 AsmToken 6611 AMDGPUAsmParser::getToken() const { 6612 return Parser.getTok(); 6613 } 6614 6615 AsmToken 6616 AMDGPUAsmParser::peekToken() { 6617 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6618 } 6619 6620 void 6621 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6622 auto TokCount = getLexer().peekTokens(Tokens); 6623 6624 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6625 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6626 } 6627 6628 AsmToken::TokenKind 6629 AMDGPUAsmParser::getTokenKind() const { 6630 return getLexer().getKind(); 6631 } 6632 6633 SMLoc 6634 AMDGPUAsmParser::getLoc() const { 6635 return getToken().getLoc(); 6636 } 6637 6638 StringRef 6639 AMDGPUAsmParser::getTokenStr() const { 6640 return getToken().getString(); 6641 } 6642 6643 void 6644 AMDGPUAsmParser::lex() { 6645 Parser.Lex(); 6646 } 6647 6648 SMLoc 6649 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6650 const OperandVector &Operands) const { 6651 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6652 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6653 if (Test(Op)) 6654 return Op.getStartLoc(); 6655 } 6656 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6657 } 6658 6659 SMLoc 6660 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6661 const OperandVector &Operands) const { 6662 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6663 return getOperandLoc(Test, Operands); 6664 } 6665 6666 SMLoc 6667 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6668 const OperandVector &Operands) const { 6669 auto Test = [=](const AMDGPUOperand& Op) { 6670 return Op.isRegKind() && Op.getReg() == Reg; 6671 }; 6672 return getOperandLoc(Test, Operands); 6673 } 6674 6675 SMLoc 6676 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6677 auto Test = [](const AMDGPUOperand& Op) { 6678 return Op.IsImmKindLiteral() || Op.isExpr(); 6679 }; 6680 return getOperandLoc(Test, Operands); 6681 } 6682 6683 SMLoc 6684 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6685 auto Test = [](const AMDGPUOperand& Op) { 6686 return Op.isImmKindConst(); 6687 }; 6688 return getOperandLoc(Test, Operands); 6689 } 6690 6691 //===----------------------------------------------------------------------===// 6692 // swizzle 6693 //===----------------------------------------------------------------------===// 6694 6695 LLVM_READNONE 6696 static unsigned 6697 encodeBitmaskPerm(const unsigned AndMask, 6698 const unsigned OrMask, 6699 const unsigned XorMask) { 6700 using namespace llvm::AMDGPU::Swizzle; 6701 6702 return BITMASK_PERM_ENC | 6703 (AndMask << BITMASK_AND_SHIFT) | 6704 (OrMask << BITMASK_OR_SHIFT) | 6705 (XorMask << BITMASK_XOR_SHIFT); 6706 } 6707 6708 bool 6709 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6710 const unsigned MinVal, 6711 const unsigned MaxVal, 6712 const StringRef ErrMsg, 6713 SMLoc &Loc) { 6714 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6715 return false; 6716 } 6717 Loc = getLoc(); 6718 if (!parseExpr(Op)) { 6719 return false; 6720 } 6721 if (Op < MinVal || Op > MaxVal) { 6722 Error(Loc, ErrMsg); 6723 return false; 6724 } 6725 6726 return true; 6727 } 6728 6729 bool 6730 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6731 const unsigned MinVal, 6732 const unsigned MaxVal, 6733 const StringRef ErrMsg) { 6734 SMLoc Loc; 6735 for (unsigned i = 0; i < OpNum; ++i) { 6736 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6737 return false; 6738 } 6739 6740 return true; 6741 } 6742 6743 bool 6744 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6745 using namespace llvm::AMDGPU::Swizzle; 6746 6747 int64_t Lane[LANE_NUM]; 6748 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6749 "expected a 2-bit lane id")) { 6750 Imm = QUAD_PERM_ENC; 6751 for (unsigned I = 0; I < LANE_NUM; ++I) { 6752 Imm |= Lane[I] << (LANE_SHIFT * I); 6753 } 6754 return true; 6755 } 6756 return false; 6757 } 6758 6759 bool 6760 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6761 using namespace llvm::AMDGPU::Swizzle; 6762 6763 SMLoc Loc; 6764 int64_t GroupSize; 6765 int64_t LaneIdx; 6766 6767 if (!parseSwizzleOperand(GroupSize, 6768 2, 32, 6769 "group size must be in the interval [2,32]", 6770 Loc)) { 6771 return false; 6772 } 6773 if (!isPowerOf2_64(GroupSize)) { 6774 Error(Loc, "group size must be a power of two"); 6775 return false; 6776 } 6777 if (parseSwizzleOperand(LaneIdx, 6778 0, GroupSize - 1, 6779 "lane id must be in the interval [0,group size - 1]", 6780 Loc)) { 6781 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6782 return true; 6783 } 6784 return false; 6785 } 6786 6787 bool 6788 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6789 using namespace llvm::AMDGPU::Swizzle; 6790 6791 SMLoc Loc; 6792 int64_t GroupSize; 6793 6794 if (!parseSwizzleOperand(GroupSize, 6795 2, 32, 6796 "group size must be in the interval [2,32]", 6797 Loc)) { 6798 return false; 6799 } 6800 if (!isPowerOf2_64(GroupSize)) { 6801 Error(Loc, "group size must be a power of two"); 6802 return false; 6803 } 6804 6805 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6806 return true; 6807 } 6808 6809 bool 6810 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6811 using namespace llvm::AMDGPU::Swizzle; 6812 6813 SMLoc Loc; 6814 int64_t GroupSize; 6815 6816 if (!parseSwizzleOperand(GroupSize, 6817 1, 16, 6818 "group size must be in the interval [1,16]", 6819 Loc)) { 6820 return false; 6821 } 6822 if (!isPowerOf2_64(GroupSize)) { 6823 Error(Loc, "group size must be a power of two"); 6824 return false; 6825 } 6826 6827 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6828 return true; 6829 } 6830 6831 bool 6832 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6833 using namespace llvm::AMDGPU::Swizzle; 6834 6835 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6836 return false; 6837 } 6838 6839 StringRef Ctl; 6840 SMLoc StrLoc = getLoc(); 6841 if (!parseString(Ctl)) { 6842 return false; 6843 } 6844 if (Ctl.size() != BITMASK_WIDTH) { 6845 Error(StrLoc, "expected a 5-character mask"); 6846 return false; 6847 } 6848 6849 unsigned AndMask = 0; 6850 unsigned OrMask = 0; 6851 unsigned XorMask = 0; 6852 6853 for (size_t i = 0; i < Ctl.size(); ++i) { 6854 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6855 switch(Ctl[i]) { 6856 default: 6857 Error(StrLoc, "invalid mask"); 6858 return false; 6859 case '0': 6860 break; 6861 case '1': 6862 OrMask |= Mask; 6863 break; 6864 case 'p': 6865 AndMask |= Mask; 6866 break; 6867 case 'i': 6868 AndMask |= Mask; 6869 XorMask |= Mask; 6870 break; 6871 } 6872 } 6873 6874 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6875 return true; 6876 } 6877 6878 bool 6879 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6880 6881 SMLoc OffsetLoc = getLoc(); 6882 6883 if (!parseExpr(Imm, "a swizzle macro")) { 6884 return false; 6885 } 6886 if (!isUInt<16>(Imm)) { 6887 Error(OffsetLoc, "expected a 16-bit offset"); 6888 return false; 6889 } 6890 return true; 6891 } 6892 6893 bool 6894 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6895 using namespace llvm::AMDGPU::Swizzle; 6896 6897 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6898 6899 SMLoc ModeLoc = getLoc(); 6900 bool Ok = false; 6901 6902 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6903 Ok = parseSwizzleQuadPerm(Imm); 6904 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6905 Ok = parseSwizzleBitmaskPerm(Imm); 6906 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6907 Ok = parseSwizzleBroadcast(Imm); 6908 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6909 Ok = parseSwizzleSwap(Imm); 6910 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6911 Ok = parseSwizzleReverse(Imm); 6912 } else { 6913 Error(ModeLoc, "expected a swizzle mode"); 6914 } 6915 6916 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6917 } 6918 6919 return false; 6920 } 6921 6922 OperandMatchResultTy 6923 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6924 SMLoc S = getLoc(); 6925 int64_t Imm = 0; 6926 6927 if (trySkipId("offset")) { 6928 6929 bool Ok = false; 6930 if (skipToken(AsmToken::Colon, "expected a colon")) { 6931 if (trySkipId("swizzle")) { 6932 Ok = parseSwizzleMacro(Imm); 6933 } else { 6934 Ok = parseSwizzleOffset(Imm); 6935 } 6936 } 6937 6938 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6939 6940 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6941 } else { 6942 // Swizzle "offset" operand is optional. 6943 // If it is omitted, try parsing other optional operands. 6944 return parseOptionalOpr(Operands); 6945 } 6946 } 6947 6948 bool 6949 AMDGPUOperand::isSwizzle() const { 6950 return isImmTy(ImmTySwizzle); 6951 } 6952 6953 //===----------------------------------------------------------------------===// 6954 // VGPR Index Mode 6955 //===----------------------------------------------------------------------===// 6956 6957 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6958 6959 using namespace llvm::AMDGPU::VGPRIndexMode; 6960 6961 if (trySkipToken(AsmToken::RParen)) { 6962 return OFF; 6963 } 6964 6965 int64_t Imm = 0; 6966 6967 while (true) { 6968 unsigned Mode = 0; 6969 SMLoc S = getLoc(); 6970 6971 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6972 if (trySkipId(IdSymbolic[ModeId])) { 6973 Mode = 1 << ModeId; 6974 break; 6975 } 6976 } 6977 6978 if (Mode == 0) { 6979 Error(S, (Imm == 0)? 6980 "expected a VGPR index mode or a closing parenthesis" : 6981 "expected a VGPR index mode"); 6982 return UNDEF; 6983 } 6984 6985 if (Imm & Mode) { 6986 Error(S, "duplicate VGPR index mode"); 6987 return UNDEF; 6988 } 6989 Imm |= Mode; 6990 6991 if (trySkipToken(AsmToken::RParen)) 6992 break; 6993 if (!skipToken(AsmToken::Comma, 6994 "expected a comma or a closing parenthesis")) 6995 return UNDEF; 6996 } 6997 6998 return Imm; 6999 } 7000 7001 OperandMatchResultTy 7002 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7003 7004 using namespace llvm::AMDGPU::VGPRIndexMode; 7005 7006 int64_t Imm = 0; 7007 SMLoc S = getLoc(); 7008 7009 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7010 Imm = parseGPRIdxMacro(); 7011 if (Imm == UNDEF) 7012 return MatchOperand_ParseFail; 7013 } else { 7014 if (getParser().parseAbsoluteExpression(Imm)) 7015 return MatchOperand_ParseFail; 7016 if (Imm < 0 || !isUInt<4>(Imm)) { 7017 Error(S, "invalid immediate: only 4-bit values are legal"); 7018 return MatchOperand_ParseFail; 7019 } 7020 } 7021 7022 Operands.push_back( 7023 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7024 return MatchOperand_Success; 7025 } 7026 7027 bool AMDGPUOperand::isGPRIdxMode() const { 7028 return isImmTy(ImmTyGprIdxMode); 7029 } 7030 7031 //===----------------------------------------------------------------------===// 7032 // sopp branch targets 7033 //===----------------------------------------------------------------------===// 7034 7035 OperandMatchResultTy 7036 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7037 7038 // Make sure we are not parsing something 7039 // that looks like a label or an expression but is not. 7040 // This will improve error messages. 7041 if (isRegister() || isModifier()) 7042 return MatchOperand_NoMatch; 7043 7044 if (!parseExpr(Operands)) 7045 return MatchOperand_ParseFail; 7046 7047 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7048 assert(Opr.isImm() || Opr.isExpr()); 7049 SMLoc Loc = Opr.getStartLoc(); 7050 7051 // Currently we do not support arbitrary expressions as branch targets. 7052 // Only labels and absolute expressions are accepted. 7053 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7054 Error(Loc, "expected an absolute expression or a label"); 7055 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7056 Error(Loc, "expected a 16-bit signed jump offset"); 7057 } 7058 7059 return MatchOperand_Success; 7060 } 7061 7062 //===----------------------------------------------------------------------===// 7063 // Boolean holding registers 7064 //===----------------------------------------------------------------------===// 7065 7066 OperandMatchResultTy 7067 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7068 return parseReg(Operands); 7069 } 7070 7071 //===----------------------------------------------------------------------===// 7072 // mubuf 7073 //===----------------------------------------------------------------------===// 7074 7075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7076 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7077 } 7078 7079 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7080 const OperandVector &Operands, 7081 bool IsAtomic, 7082 bool IsLds) { 7083 bool IsLdsOpcode = IsLds; 7084 bool HasLdsModifier = false; 7085 OptionalImmIndexMap OptionalIdx; 7086 unsigned FirstOperandIdx = 1; 7087 bool IsAtomicReturn = false; 7088 7089 if (IsAtomic) { 7090 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7092 if (!Op.isCPol()) 7093 continue; 7094 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7095 break; 7096 } 7097 7098 if (!IsAtomicReturn) { 7099 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7100 if (NewOpc != -1) 7101 Inst.setOpcode(NewOpc); 7102 } 7103 7104 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7105 SIInstrFlags::IsAtomicRet; 7106 } 7107 7108 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7109 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7110 7111 // Add the register arguments 7112 if (Op.isReg()) { 7113 Op.addRegOperands(Inst, 1); 7114 // Insert a tied src for atomic return dst. 7115 // This cannot be postponed as subsequent calls to 7116 // addImmOperands rely on correct number of MC operands. 7117 if (IsAtomicReturn && i == FirstOperandIdx) 7118 Op.addRegOperands(Inst, 1); 7119 continue; 7120 } 7121 7122 // Handle the case where soffset is an immediate 7123 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7124 Op.addImmOperands(Inst, 1); 7125 continue; 7126 } 7127 7128 HasLdsModifier |= Op.isLDS(); 7129 7130 // Handle tokens like 'offen' which are sometimes hard-coded into the 7131 // asm string. There are no MCInst operands for these. 7132 if (Op.isToken()) { 7133 continue; 7134 } 7135 assert(Op.isImm()); 7136 7137 // Handle optional arguments 7138 OptionalIdx[Op.getImmTy()] = i; 7139 } 7140 7141 // This is a workaround for an llvm quirk which may result in an 7142 // incorrect instruction selection. Lds and non-lds versions of 7143 // MUBUF instructions are identical except that lds versions 7144 // have mandatory 'lds' modifier. However this modifier follows 7145 // optional modifiers and llvm asm matcher regards this 'lds' 7146 // modifier as an optional one. As a result, an lds version 7147 // of opcode may be selected even if it has no 'lds' modifier. 7148 if (IsLdsOpcode && !HasLdsModifier) { 7149 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7150 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7151 Inst.setOpcode(NoLdsOpcode); 7152 IsLdsOpcode = false; 7153 } 7154 } 7155 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7158 7159 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7161 } 7162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7163 } 7164 7165 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7166 OptionalImmIndexMap OptionalIdx; 7167 7168 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7169 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7170 7171 // Add the register arguments 7172 if (Op.isReg()) { 7173 Op.addRegOperands(Inst, 1); 7174 continue; 7175 } 7176 7177 // Handle the case where soffset is an immediate 7178 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7179 Op.addImmOperands(Inst, 1); 7180 continue; 7181 } 7182 7183 // Handle tokens like 'offen' which are sometimes hard-coded into the 7184 // asm string. There are no MCInst operands for these. 7185 if (Op.isToken()) { 7186 continue; 7187 } 7188 assert(Op.isImm()); 7189 7190 // Handle optional arguments 7191 OptionalIdx[Op.getImmTy()] = i; 7192 } 7193 7194 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7195 AMDGPUOperand::ImmTyOffset); 7196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7200 } 7201 7202 //===----------------------------------------------------------------------===// 7203 // mimg 7204 //===----------------------------------------------------------------------===// 7205 7206 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7207 bool IsAtomic) { 7208 unsigned I = 1; 7209 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7210 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7211 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7212 } 7213 7214 if (IsAtomic) { 7215 // Add src, same as dst 7216 assert(Desc.getNumDefs() == 1); 7217 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7218 } 7219 7220 OptionalImmIndexMap OptionalIdx; 7221 7222 for (unsigned E = Operands.size(); I != E; ++I) { 7223 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7224 7225 // Add the register arguments 7226 if (Op.isReg()) { 7227 Op.addRegOperands(Inst, 1); 7228 } else if (Op.isImmModifier()) { 7229 OptionalIdx[Op.getImmTy()] = I; 7230 } else if (!Op.isToken()) { 7231 llvm_unreachable("unexpected operand type"); 7232 } 7233 } 7234 7235 bool IsGFX10Plus = isGFX10Plus(); 7236 7237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7238 if (IsGFX10Plus) 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7243 if (IsGFX10Plus) 7244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7245 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7248 if (!IsGFX10Plus) 7249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7250 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7251 } 7252 7253 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7254 cvtMIMG(Inst, Operands, true); 7255 } 7256 7257 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7258 OptionalImmIndexMap OptionalIdx; 7259 bool IsAtomicReturn = false; 7260 7261 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7262 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7263 if (!Op.isCPol()) 7264 continue; 7265 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7266 break; 7267 } 7268 7269 if (!IsAtomicReturn) { 7270 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7271 if (NewOpc != -1) 7272 Inst.setOpcode(NewOpc); 7273 } 7274 7275 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7276 SIInstrFlags::IsAtomicRet; 7277 7278 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7279 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7280 7281 // Add the register arguments 7282 if (Op.isReg()) { 7283 Op.addRegOperands(Inst, 1); 7284 if (IsAtomicReturn && i == 1) 7285 Op.addRegOperands(Inst, 1); 7286 continue; 7287 } 7288 7289 // Handle the case where soffset is an immediate 7290 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7291 Op.addImmOperands(Inst, 1); 7292 continue; 7293 } 7294 7295 // Handle tokens like 'offen' which are sometimes hard-coded into the 7296 // asm string. There are no MCInst operands for these. 7297 if (Op.isToken()) { 7298 continue; 7299 } 7300 assert(Op.isImm()); 7301 7302 // Handle optional arguments 7303 OptionalIdx[Op.getImmTy()] = i; 7304 } 7305 7306 if ((int)Inst.getNumOperands() <= 7307 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7310 } 7311 7312 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7313 const OperandVector &Operands) { 7314 for (unsigned I = 1; I < Operands.size(); ++I) { 7315 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7316 if (Operand.isReg()) 7317 Operand.addRegOperands(Inst, 1); 7318 } 7319 7320 Inst.addOperand(MCOperand::createImm(1)); // a16 7321 } 7322 7323 //===----------------------------------------------------------------------===// 7324 // smrd 7325 //===----------------------------------------------------------------------===// 7326 7327 bool AMDGPUOperand::isSMRDOffset8() const { 7328 return isImm() && isUInt<8>(getImm()); 7329 } 7330 7331 bool AMDGPUOperand::isSMEMOffset() const { 7332 return isImm(); // Offset range is checked later by validator. 7333 } 7334 7335 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7336 // 32-bit literals are only supported on CI and we only want to use them 7337 // when the offset is > 8-bits. 7338 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7339 } 7340 7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7342 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7343 } 7344 7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7346 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7347 } 7348 7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7350 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7351 } 7352 7353 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7354 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7355 } 7356 7357 //===----------------------------------------------------------------------===// 7358 // vop3 7359 //===----------------------------------------------------------------------===// 7360 7361 static bool ConvertOmodMul(int64_t &Mul) { 7362 if (Mul != 1 && Mul != 2 && Mul != 4) 7363 return false; 7364 7365 Mul >>= 1; 7366 return true; 7367 } 7368 7369 static bool ConvertOmodDiv(int64_t &Div) { 7370 if (Div == 1) { 7371 Div = 0; 7372 return true; 7373 } 7374 7375 if (Div == 2) { 7376 Div = 3; 7377 return true; 7378 } 7379 7380 return false; 7381 } 7382 7383 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7384 // This is intentional and ensures compatibility with sp3. 7385 // See bug 35397 for details. 7386 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7387 if (BoundCtrl == 0 || BoundCtrl == 1) { 7388 BoundCtrl = 1; 7389 return true; 7390 } 7391 return false; 7392 } 7393 7394 // Note: the order in this table matches the order of operands in AsmString. 7395 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7396 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7397 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7398 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7399 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7400 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7401 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7402 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7403 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7404 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7405 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7406 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7407 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7408 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7409 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7410 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7411 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7412 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7413 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7414 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7415 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7416 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7417 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7418 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7419 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7420 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7421 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7422 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7423 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7424 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7425 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7426 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7427 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7428 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7429 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7430 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7431 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7432 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7433 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7434 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7435 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7436 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7437 }; 7438 7439 void AMDGPUAsmParser::onBeginOfFile() { 7440 if (!getParser().getStreamer().getTargetStreamer() || 7441 getSTI().getTargetTriple().getArch() == Triple::r600) 7442 return; 7443 7444 if (!getTargetStreamer().getTargetID()) 7445 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7446 7447 if (isHsaAbiVersion3AndAbove(&getSTI())) 7448 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7449 } 7450 7451 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7452 7453 OperandMatchResultTy res = parseOptionalOpr(Operands); 7454 7455 // This is a hack to enable hardcoded mandatory operands which follow 7456 // optional operands. 7457 // 7458 // Current design assumes that all operands after the first optional operand 7459 // are also optional. However implementation of some instructions violates 7460 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7461 // 7462 // To alleviate this problem, we have to (implicitly) parse extra operands 7463 // to make sure autogenerated parser of custom operands never hit hardcoded 7464 // mandatory operands. 7465 7466 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7467 if (res != MatchOperand_Success || 7468 isToken(AsmToken::EndOfStatement)) 7469 break; 7470 7471 trySkipToken(AsmToken::Comma); 7472 res = parseOptionalOpr(Operands); 7473 } 7474 7475 return res; 7476 } 7477 7478 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7479 OperandMatchResultTy res; 7480 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7481 // try to parse any optional operand here 7482 if (Op.IsBit) { 7483 res = parseNamedBit(Op.Name, Operands, Op.Type); 7484 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7485 res = parseOModOperand(Operands); 7486 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7487 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7488 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7489 res = parseSDWASel(Operands, Op.Name, Op.Type); 7490 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7491 res = parseSDWADstUnused(Operands); 7492 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7493 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7494 Op.Type == AMDGPUOperand::ImmTyNegLo || 7495 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7496 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7497 Op.ConvertResult); 7498 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7499 res = parseDim(Operands); 7500 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7501 res = parseCPol(Operands); 7502 } else { 7503 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7504 } 7505 if (res != MatchOperand_NoMatch) { 7506 return res; 7507 } 7508 } 7509 return MatchOperand_NoMatch; 7510 } 7511 7512 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7513 StringRef Name = getTokenStr(); 7514 if (Name == "mul") { 7515 return parseIntWithPrefix("mul", Operands, 7516 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7517 } 7518 7519 if (Name == "div") { 7520 return parseIntWithPrefix("div", Operands, 7521 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7522 } 7523 7524 return MatchOperand_NoMatch; 7525 } 7526 7527 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7528 cvtVOP3P(Inst, Operands); 7529 7530 int Opc = Inst.getOpcode(); 7531 7532 int SrcNum; 7533 const int Ops[] = { AMDGPU::OpName::src0, 7534 AMDGPU::OpName::src1, 7535 AMDGPU::OpName::src2 }; 7536 for (SrcNum = 0; 7537 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7538 ++SrcNum); 7539 assert(SrcNum > 0); 7540 7541 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7542 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7543 7544 if ((OpSel & (1 << SrcNum)) != 0) { 7545 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7546 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7547 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7548 } 7549 } 7550 7551 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7552 // 1. This operand is input modifiers 7553 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7554 // 2. This is not last operand 7555 && Desc.NumOperands > (OpNum + 1) 7556 // 3. Next operand is register class 7557 && Desc.OpInfo[OpNum + 1].RegClass != -1 7558 // 4. Next register is not tied to any other operand 7559 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7560 } 7561 7562 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7563 { 7564 OptionalImmIndexMap OptionalIdx; 7565 unsigned Opc = Inst.getOpcode(); 7566 7567 unsigned I = 1; 7568 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7569 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7570 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7571 } 7572 7573 for (unsigned E = Operands.size(); I != E; ++I) { 7574 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7575 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7576 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7577 } else if (Op.isInterpSlot() || 7578 Op.isInterpAttr() || 7579 Op.isAttrChan()) { 7580 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7581 } else if (Op.isImmModifier()) { 7582 OptionalIdx[Op.getImmTy()] = I; 7583 } else { 7584 llvm_unreachable("unhandled operand type"); 7585 } 7586 } 7587 7588 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7590 } 7591 7592 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7594 } 7595 7596 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7598 } 7599 } 7600 7601 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7602 OptionalImmIndexMap &OptionalIdx) { 7603 unsigned Opc = Inst.getOpcode(); 7604 7605 unsigned I = 1; 7606 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7607 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7608 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7609 } 7610 7611 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7612 // This instruction has src modifiers 7613 for (unsigned E = Operands.size(); I != E; ++I) { 7614 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7615 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7616 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7617 } else if (Op.isImmModifier()) { 7618 OptionalIdx[Op.getImmTy()] = I; 7619 } else if (Op.isRegOrImm()) { 7620 Op.addRegOrImmOperands(Inst, 1); 7621 } else { 7622 llvm_unreachable("unhandled operand type"); 7623 } 7624 } 7625 } else { 7626 // No src modifiers 7627 for (unsigned E = Operands.size(); I != E; ++I) { 7628 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7629 if (Op.isMod()) { 7630 OptionalIdx[Op.getImmTy()] = I; 7631 } else { 7632 Op.addRegOrImmOperands(Inst, 1); 7633 } 7634 } 7635 } 7636 7637 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7639 } 7640 7641 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7643 } 7644 7645 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7646 // it has src2 register operand that is tied to dst operand 7647 // we don't allow modifiers for this operand in assembler so src2_modifiers 7648 // should be 0. 7649 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7650 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7651 Opc == AMDGPU::V_MAC_F32_e64_vi || 7652 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7653 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7654 Opc == AMDGPU::V_MAC_F16_e64_vi || 7655 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7656 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7657 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7658 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7659 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7660 auto it = Inst.begin(); 7661 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7662 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7663 ++it; 7664 // Copy the operand to ensure it's not invalidated when Inst grows. 7665 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7666 } 7667 } 7668 7669 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7670 OptionalImmIndexMap OptionalIdx; 7671 cvtVOP3(Inst, Operands, OptionalIdx); 7672 } 7673 7674 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7675 OptionalImmIndexMap &OptIdx) { 7676 const int Opc = Inst.getOpcode(); 7677 const MCInstrDesc &Desc = MII.get(Opc); 7678 7679 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7680 7681 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7682 assert(!IsPacked); 7683 Inst.addOperand(Inst.getOperand(0)); 7684 } 7685 7686 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7687 // instruction, and then figure out where to actually put the modifiers 7688 7689 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7690 if (OpSelIdx != -1) { 7691 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7692 } 7693 7694 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7695 if (OpSelHiIdx != -1) { 7696 int DefaultVal = IsPacked ? -1 : 0; 7697 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7698 DefaultVal); 7699 } 7700 7701 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7702 if (NegLoIdx != -1) { 7703 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7704 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7705 } 7706 7707 const int Ops[] = { AMDGPU::OpName::src0, 7708 AMDGPU::OpName::src1, 7709 AMDGPU::OpName::src2 }; 7710 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7711 AMDGPU::OpName::src1_modifiers, 7712 AMDGPU::OpName::src2_modifiers }; 7713 7714 unsigned OpSel = 0; 7715 unsigned OpSelHi = 0; 7716 unsigned NegLo = 0; 7717 unsigned NegHi = 0; 7718 7719 if (OpSelIdx != -1) 7720 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7721 7722 if (OpSelHiIdx != -1) 7723 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7724 7725 if (NegLoIdx != -1) { 7726 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7727 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7728 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7729 } 7730 7731 for (int J = 0; J < 3; ++J) { 7732 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7733 if (OpIdx == -1) 7734 break; 7735 7736 uint32_t ModVal = 0; 7737 7738 if ((OpSel & (1 << J)) != 0) 7739 ModVal |= SISrcMods::OP_SEL_0; 7740 7741 if ((OpSelHi & (1 << J)) != 0) 7742 ModVal |= SISrcMods::OP_SEL_1; 7743 7744 if ((NegLo & (1 << J)) != 0) 7745 ModVal |= SISrcMods::NEG; 7746 7747 if ((NegHi & (1 << J)) != 0) 7748 ModVal |= SISrcMods::NEG_HI; 7749 7750 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7751 7752 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7753 } 7754 } 7755 7756 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7757 OptionalImmIndexMap OptIdx; 7758 cvtVOP3(Inst, Operands, OptIdx); 7759 cvtVOP3P(Inst, Operands, OptIdx); 7760 } 7761 7762 //===----------------------------------------------------------------------===// 7763 // dpp 7764 //===----------------------------------------------------------------------===// 7765 7766 bool AMDGPUOperand::isDPP8() const { 7767 return isImmTy(ImmTyDPP8); 7768 } 7769 7770 bool AMDGPUOperand::isDPPCtrl() const { 7771 using namespace AMDGPU::DPP; 7772 7773 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7774 if (result) { 7775 int64_t Imm = getImm(); 7776 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7777 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7778 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7779 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7780 (Imm == DppCtrl::WAVE_SHL1) || 7781 (Imm == DppCtrl::WAVE_ROL1) || 7782 (Imm == DppCtrl::WAVE_SHR1) || 7783 (Imm == DppCtrl::WAVE_ROR1) || 7784 (Imm == DppCtrl::ROW_MIRROR) || 7785 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7786 (Imm == DppCtrl::BCAST15) || 7787 (Imm == DppCtrl::BCAST31) || 7788 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7789 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7790 } 7791 return false; 7792 } 7793 7794 //===----------------------------------------------------------------------===// 7795 // mAI 7796 //===----------------------------------------------------------------------===// 7797 7798 bool AMDGPUOperand::isBLGP() const { 7799 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7800 } 7801 7802 bool AMDGPUOperand::isCBSZ() const { 7803 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7804 } 7805 7806 bool AMDGPUOperand::isABID() const { 7807 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7808 } 7809 7810 bool AMDGPUOperand::isS16Imm() const { 7811 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7812 } 7813 7814 bool AMDGPUOperand::isU16Imm() const { 7815 return isImm() && isUInt<16>(getImm()); 7816 } 7817 7818 //===----------------------------------------------------------------------===// 7819 // dim 7820 //===----------------------------------------------------------------------===// 7821 7822 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7823 // We want to allow "dim:1D" etc., 7824 // but the initial 1 is tokenized as an integer. 7825 std::string Token; 7826 if (isToken(AsmToken::Integer)) { 7827 SMLoc Loc = getToken().getEndLoc(); 7828 Token = std::string(getTokenStr()); 7829 lex(); 7830 if (getLoc() != Loc) 7831 return false; 7832 } 7833 7834 StringRef Suffix; 7835 if (!parseId(Suffix)) 7836 return false; 7837 Token += Suffix; 7838 7839 StringRef DimId = Token; 7840 if (DimId.startswith("SQ_RSRC_IMG_")) 7841 DimId = DimId.drop_front(12); 7842 7843 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7844 if (!DimInfo) 7845 return false; 7846 7847 Encoding = DimInfo->Encoding; 7848 return true; 7849 } 7850 7851 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7852 if (!isGFX10Plus()) 7853 return MatchOperand_NoMatch; 7854 7855 SMLoc S = getLoc(); 7856 7857 if (!trySkipId("dim", AsmToken::Colon)) 7858 return MatchOperand_NoMatch; 7859 7860 unsigned Encoding; 7861 SMLoc Loc = getLoc(); 7862 if (!parseDimId(Encoding)) { 7863 Error(Loc, "invalid dim value"); 7864 return MatchOperand_ParseFail; 7865 } 7866 7867 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7868 AMDGPUOperand::ImmTyDim)); 7869 return MatchOperand_Success; 7870 } 7871 7872 //===----------------------------------------------------------------------===// 7873 // dpp 7874 //===----------------------------------------------------------------------===// 7875 7876 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7877 SMLoc S = getLoc(); 7878 7879 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7880 return MatchOperand_NoMatch; 7881 7882 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7883 7884 int64_t Sels[8]; 7885 7886 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7887 return MatchOperand_ParseFail; 7888 7889 for (size_t i = 0; i < 8; ++i) { 7890 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7891 return MatchOperand_ParseFail; 7892 7893 SMLoc Loc = getLoc(); 7894 if (getParser().parseAbsoluteExpression(Sels[i])) 7895 return MatchOperand_ParseFail; 7896 if (0 > Sels[i] || 7 < Sels[i]) { 7897 Error(Loc, "expected a 3-bit value"); 7898 return MatchOperand_ParseFail; 7899 } 7900 } 7901 7902 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7903 return MatchOperand_ParseFail; 7904 7905 unsigned DPP8 = 0; 7906 for (size_t i = 0; i < 8; ++i) 7907 DPP8 |= (Sels[i] << (i * 3)); 7908 7909 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7910 return MatchOperand_Success; 7911 } 7912 7913 bool 7914 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7915 const OperandVector &Operands) { 7916 if (Ctrl == "row_newbcast") 7917 return isGFX90A(); 7918 7919 if (Ctrl == "row_share" || 7920 Ctrl == "row_xmask") 7921 return isGFX10Plus(); 7922 7923 if (Ctrl == "wave_shl" || 7924 Ctrl == "wave_shr" || 7925 Ctrl == "wave_rol" || 7926 Ctrl == "wave_ror" || 7927 Ctrl == "row_bcast") 7928 return isVI() || isGFX9(); 7929 7930 return Ctrl == "row_mirror" || 7931 Ctrl == "row_half_mirror" || 7932 Ctrl == "quad_perm" || 7933 Ctrl == "row_shl" || 7934 Ctrl == "row_shr" || 7935 Ctrl == "row_ror"; 7936 } 7937 7938 int64_t 7939 AMDGPUAsmParser::parseDPPCtrlPerm() { 7940 // quad_perm:[%d,%d,%d,%d] 7941 7942 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7943 return -1; 7944 7945 int64_t Val = 0; 7946 for (int i = 0; i < 4; ++i) { 7947 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7948 return -1; 7949 7950 int64_t Temp; 7951 SMLoc Loc = getLoc(); 7952 if (getParser().parseAbsoluteExpression(Temp)) 7953 return -1; 7954 if (Temp < 0 || Temp > 3) { 7955 Error(Loc, "expected a 2-bit value"); 7956 return -1; 7957 } 7958 7959 Val += (Temp << i * 2); 7960 } 7961 7962 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7963 return -1; 7964 7965 return Val; 7966 } 7967 7968 int64_t 7969 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7970 using namespace AMDGPU::DPP; 7971 7972 // sel:%d 7973 7974 int64_t Val; 7975 SMLoc Loc = getLoc(); 7976 7977 if (getParser().parseAbsoluteExpression(Val)) 7978 return -1; 7979 7980 struct DppCtrlCheck { 7981 int64_t Ctrl; 7982 int Lo; 7983 int Hi; 7984 }; 7985 7986 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7987 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7988 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7989 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7990 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7991 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7992 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7993 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7994 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7995 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7996 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7997 .Default({-1, 0, 0}); 7998 7999 bool Valid; 8000 if (Check.Ctrl == -1) { 8001 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8002 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8003 } else { 8004 Valid = Check.Lo <= Val && Val <= Check.Hi; 8005 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8006 } 8007 8008 if (!Valid) { 8009 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8010 return -1; 8011 } 8012 8013 return Val; 8014 } 8015 8016 OperandMatchResultTy 8017 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8018 using namespace AMDGPU::DPP; 8019 8020 if (!isToken(AsmToken::Identifier) || 8021 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8022 return MatchOperand_NoMatch; 8023 8024 SMLoc S = getLoc(); 8025 int64_t Val = -1; 8026 StringRef Ctrl; 8027 8028 parseId(Ctrl); 8029 8030 if (Ctrl == "row_mirror") { 8031 Val = DppCtrl::ROW_MIRROR; 8032 } else if (Ctrl == "row_half_mirror") { 8033 Val = DppCtrl::ROW_HALF_MIRROR; 8034 } else { 8035 if (skipToken(AsmToken::Colon, "expected a colon")) { 8036 if (Ctrl == "quad_perm") { 8037 Val = parseDPPCtrlPerm(); 8038 } else { 8039 Val = parseDPPCtrlSel(Ctrl); 8040 } 8041 } 8042 } 8043 8044 if (Val == -1) 8045 return MatchOperand_ParseFail; 8046 8047 Operands.push_back( 8048 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8049 return MatchOperand_Success; 8050 } 8051 8052 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8053 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8054 } 8055 8056 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8057 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8058 } 8059 8060 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8061 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8062 } 8063 8064 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8065 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8066 } 8067 8068 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8069 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8070 } 8071 8072 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8073 OptionalImmIndexMap OptionalIdx; 8074 8075 unsigned Opc = Inst.getOpcode(); 8076 bool HasModifiers = 8077 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8078 unsigned I = 1; 8079 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8080 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8081 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8082 } 8083 8084 int Fi = 0; 8085 for (unsigned E = Operands.size(); I != E; ++I) { 8086 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8087 MCOI::TIED_TO); 8088 if (TiedTo != -1) { 8089 assert((unsigned)TiedTo < Inst.getNumOperands()); 8090 // handle tied old or src2 for MAC instructions 8091 Inst.addOperand(Inst.getOperand(TiedTo)); 8092 } 8093 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8094 // Add the register arguments 8095 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8096 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8097 // Skip it. 8098 continue; 8099 } 8100 8101 if (IsDPP8) { 8102 if (Op.isDPP8()) { 8103 Op.addImmOperands(Inst, 1); 8104 } else if (HasModifiers && 8105 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8106 Op.addRegWithFPInputModsOperands(Inst, 2); 8107 } else if (Op.isFI()) { 8108 Fi = Op.getImm(); 8109 } else if (Op.isReg()) { 8110 Op.addRegOperands(Inst, 1); 8111 } else { 8112 llvm_unreachable("Invalid operand type"); 8113 } 8114 } else { 8115 if (HasModifiers && 8116 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8117 Op.addRegWithFPInputModsOperands(Inst, 2); 8118 } else if (Op.isReg()) { 8119 Op.addRegOperands(Inst, 1); 8120 } else if (Op.isDPPCtrl()) { 8121 Op.addImmOperands(Inst, 1); 8122 } else if (Op.isImm()) { 8123 // Handle optional arguments 8124 OptionalIdx[Op.getImmTy()] = I; 8125 } else { 8126 llvm_unreachable("Invalid operand type"); 8127 } 8128 } 8129 } 8130 8131 if (IsDPP8) { 8132 using namespace llvm::AMDGPU::DPP; 8133 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8134 } else { 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8138 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8140 } 8141 } 8142 } 8143 8144 //===----------------------------------------------------------------------===// 8145 // sdwa 8146 //===----------------------------------------------------------------------===// 8147 8148 OperandMatchResultTy 8149 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8150 AMDGPUOperand::ImmTy Type) { 8151 using namespace llvm::AMDGPU::SDWA; 8152 8153 SMLoc S = getLoc(); 8154 StringRef Value; 8155 OperandMatchResultTy res; 8156 8157 SMLoc StringLoc; 8158 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8159 if (res != MatchOperand_Success) { 8160 return res; 8161 } 8162 8163 int64_t Int; 8164 Int = StringSwitch<int64_t>(Value) 8165 .Case("BYTE_0", SdwaSel::BYTE_0) 8166 .Case("BYTE_1", SdwaSel::BYTE_1) 8167 .Case("BYTE_2", SdwaSel::BYTE_2) 8168 .Case("BYTE_3", SdwaSel::BYTE_3) 8169 .Case("WORD_0", SdwaSel::WORD_0) 8170 .Case("WORD_1", SdwaSel::WORD_1) 8171 .Case("DWORD", SdwaSel::DWORD) 8172 .Default(0xffffffff); 8173 8174 if (Int == 0xffffffff) { 8175 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8176 return MatchOperand_ParseFail; 8177 } 8178 8179 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8180 return MatchOperand_Success; 8181 } 8182 8183 OperandMatchResultTy 8184 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8185 using namespace llvm::AMDGPU::SDWA; 8186 8187 SMLoc S = getLoc(); 8188 StringRef Value; 8189 OperandMatchResultTy res; 8190 8191 SMLoc StringLoc; 8192 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8193 if (res != MatchOperand_Success) { 8194 return res; 8195 } 8196 8197 int64_t Int; 8198 Int = StringSwitch<int64_t>(Value) 8199 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8200 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8201 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8202 .Default(0xffffffff); 8203 8204 if (Int == 0xffffffff) { 8205 Error(StringLoc, "invalid dst_unused value"); 8206 return MatchOperand_ParseFail; 8207 } 8208 8209 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8210 return MatchOperand_Success; 8211 } 8212 8213 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8214 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8215 } 8216 8217 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8218 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8219 } 8220 8221 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8222 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8223 } 8224 8225 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8226 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8227 } 8228 8229 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8230 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8231 } 8232 8233 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8234 uint64_t BasicInstType, 8235 bool SkipDstVcc, 8236 bool SkipSrcVcc) { 8237 using namespace llvm::AMDGPU::SDWA; 8238 8239 OptionalImmIndexMap OptionalIdx; 8240 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8241 bool SkippedVcc = false; 8242 8243 unsigned I = 1; 8244 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8245 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8246 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8247 } 8248 8249 for (unsigned E = Operands.size(); I != E; ++I) { 8250 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8251 if (SkipVcc && !SkippedVcc && Op.isReg() && 8252 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8253 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8254 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8255 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8256 // Skip VCC only if we didn't skip it on previous iteration. 8257 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8258 if (BasicInstType == SIInstrFlags::VOP2 && 8259 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8260 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8261 SkippedVcc = true; 8262 continue; 8263 } else if (BasicInstType == SIInstrFlags::VOPC && 8264 Inst.getNumOperands() == 0) { 8265 SkippedVcc = true; 8266 continue; 8267 } 8268 } 8269 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8270 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8271 } else if (Op.isImm()) { 8272 // Handle optional arguments 8273 OptionalIdx[Op.getImmTy()] = I; 8274 } else { 8275 llvm_unreachable("Invalid operand type"); 8276 } 8277 SkippedVcc = false; 8278 } 8279 8280 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8281 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8282 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8283 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8284 switch (BasicInstType) { 8285 case SIInstrFlags::VOP1: 8286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8287 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8289 } 8290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8293 break; 8294 8295 case SIInstrFlags::VOP2: 8296 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8297 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8298 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8299 } 8300 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8304 break; 8305 8306 case SIInstrFlags::VOPC: 8307 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8310 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8311 break; 8312 8313 default: 8314 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8315 } 8316 } 8317 8318 // special case v_mac_{f16, f32}: 8319 // it has src2 register operand that is tied to dst operand 8320 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8321 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8322 auto it = Inst.begin(); 8323 std::advance( 8324 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8325 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8326 } 8327 } 8328 8329 //===----------------------------------------------------------------------===// 8330 // mAI 8331 //===----------------------------------------------------------------------===// 8332 8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8334 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8335 } 8336 8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8339 } 8340 8341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8342 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8343 } 8344 8345 /// Force static initialization. 8346 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8347 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8348 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8349 } 8350 8351 #define GET_REGISTER_MATCHER 8352 #define GET_MATCHER_IMPLEMENTATION 8353 #define GET_MNEMONIC_SPELL_CHECKER 8354 #define GET_MNEMONIC_CHECKER 8355 #include "AMDGPUGenAsmMatcher.inc" 8356 8357 // This fuction should be defined after auto-generated include so that we have 8358 // MatchClassKind enum defined 8359 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8360 unsigned Kind) { 8361 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8362 // But MatchInstructionImpl() expects to meet token and fails to validate 8363 // operand. This method checks if we are given immediate operand but expect to 8364 // get corresponding token. 8365 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8366 switch (Kind) { 8367 case MCK_addr64: 8368 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8369 case MCK_gds: 8370 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8371 case MCK_lds: 8372 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8373 case MCK_idxen: 8374 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8375 case MCK_offen: 8376 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8377 case MCK_SSrcB32: 8378 // When operands have expression values, they will return true for isToken, 8379 // because it is not possible to distinguish between a token and an 8380 // expression at parse time. MatchInstructionImpl() will always try to 8381 // match an operand as a token, when isToken returns true, and when the 8382 // name of the expression is not a valid token, the match will fail, 8383 // so we need to handle it here. 8384 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8385 case MCK_SSrcF32: 8386 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8387 case MCK_SoppBrTarget: 8388 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8389 case MCK_VReg32OrOff: 8390 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8391 case MCK_InterpSlot: 8392 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8393 case MCK_Attr: 8394 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8395 case MCK_AttrChan: 8396 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8397 case MCK_ImmSMEMOffset: 8398 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8399 case MCK_SReg_64: 8400 case MCK_SReg_64_XEXEC: 8401 // Null is defined as a 32-bit register but 8402 // it should also be enabled with 64-bit operands. 8403 // The following code enables it for SReg_64 operands 8404 // used as source and destination. Remaining source 8405 // operands are handled in isInlinableImm. 8406 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8407 default: 8408 return Match_InvalidOperand; 8409 } 8410 } 8411 8412 //===----------------------------------------------------------------------===// 8413 // endpgm 8414 //===----------------------------------------------------------------------===// 8415 8416 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8417 SMLoc S = getLoc(); 8418 int64_t Imm = 0; 8419 8420 if (!parseExpr(Imm)) { 8421 // The operand is optional, if not present default to 0 8422 Imm = 0; 8423 } 8424 8425 if (!isUInt<16>(Imm)) { 8426 Error(S, "expected a 16-bit value"); 8427 return MatchOperand_ParseFail; 8428 } 8429 8430 Operands.push_back( 8431 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8432 return MatchOperand_Success; 8433 } 8434 8435 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8436