1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 bool isGFX90A() const { 1390 return AMDGPU::isGFX90A(getSTI()); 1391 } 1392 1393 bool isGFX9Plus() const { 1394 return AMDGPU::isGFX9Plus(getSTI()); 1395 } 1396 1397 bool isGFX10() const { 1398 return AMDGPU::isGFX10(getSTI()); 1399 } 1400 1401 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1402 1403 bool isGFX10_BEncoding() const { 1404 return AMDGPU::isGFX10_BEncoding(getSTI()); 1405 } 1406 1407 bool hasInv2PiInlineImm() const { 1408 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1409 } 1410 1411 bool hasFlatOffsets() const { 1412 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1413 } 1414 1415 bool hasArchitectedFlatScratch() const { 1416 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1417 } 1418 1419 bool hasSGPR102_SGPR103() const { 1420 return !isVI() && !isGFX9(); 1421 } 1422 1423 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1424 1425 bool hasIntClamp() const { 1426 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1427 } 1428 1429 AMDGPUTargetStreamer &getTargetStreamer() { 1430 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1431 return static_cast<AMDGPUTargetStreamer &>(TS); 1432 } 1433 1434 const MCRegisterInfo *getMRI() const { 1435 // We need this const_cast because for some reason getContext() is not const 1436 // in MCAsmParser. 1437 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1438 } 1439 1440 const MCInstrInfo *getMII() const { 1441 return &MII; 1442 } 1443 1444 const FeatureBitset &getFeatureBits() const { 1445 return getSTI().getFeatureBits(); 1446 } 1447 1448 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1449 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1450 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1451 1452 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1453 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1454 bool isForcedDPP() const { return ForcedDPP; } 1455 bool isForcedSDWA() const { return ForcedSDWA; } 1456 ArrayRef<unsigned> getMatchedVariants() const; 1457 StringRef getMatchedVariantName() const; 1458 1459 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1460 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1461 bool RestoreOnFailure); 1462 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1463 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1464 SMLoc &EndLoc) override; 1465 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1466 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1467 unsigned Kind) override; 1468 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1469 OperandVector &Operands, MCStreamer &Out, 1470 uint64_t &ErrorInfo, 1471 bool MatchingInlineAsm) override; 1472 bool ParseDirective(AsmToken DirectiveID) override; 1473 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1474 OperandMode Mode = OperandMode_Default); 1475 StringRef parseMnemonicSuffix(StringRef Name); 1476 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1477 SMLoc NameLoc, OperandVector &Operands) override; 1478 //bool ProcessInstruction(MCInst &Inst); 1479 1480 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1481 1482 OperandMatchResultTy 1483 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1484 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1485 bool (*ConvertResult)(int64_t &) = nullptr); 1486 1487 OperandMatchResultTy 1488 parseOperandArrayWithPrefix(const char *Prefix, 1489 OperandVector &Operands, 1490 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1491 bool (*ConvertResult)(int64_t&) = nullptr); 1492 1493 OperandMatchResultTy 1494 parseNamedBit(StringRef Name, OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1496 OperandMatchResultTy parseCPol(OperandVector &Operands); 1497 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1498 StringRef &Value, 1499 SMLoc &StringLoc); 1500 1501 bool isModifier(); 1502 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1503 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1504 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1505 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1506 bool parseSP3NegModifier(); 1507 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1508 OperandMatchResultTy parseReg(OperandVector &Operands); 1509 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1510 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1511 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1512 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1513 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1514 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1515 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1516 OperandMatchResultTy parseUfmt(int64_t &Format); 1517 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1518 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1519 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1520 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1521 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1522 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1523 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1524 1525 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1526 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1527 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1528 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1529 1530 bool parseCnt(int64_t &IntVal); 1531 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1532 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1533 1534 private: 1535 struct OperandInfoTy { 1536 SMLoc Loc; 1537 int64_t Id; 1538 bool IsSymbolic = false; 1539 bool IsDefined = false; 1540 1541 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1542 }; 1543 1544 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1545 bool validateSendMsg(const OperandInfoTy &Msg, 1546 const OperandInfoTy &Op, 1547 const OperandInfoTy &Stream); 1548 1549 bool parseHwregBody(OperandInfoTy &HwReg, 1550 OperandInfoTy &Offset, 1551 OperandInfoTy &Width); 1552 bool validateHwreg(const OperandInfoTy &HwReg, 1553 const OperandInfoTy &Offset, 1554 const OperandInfoTy &Width); 1555 1556 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1557 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1558 1559 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1560 const OperandVector &Operands) const; 1561 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1562 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1563 SMLoc getLitLoc(const OperandVector &Operands) const; 1564 SMLoc getConstLoc(const OperandVector &Operands) const; 1565 1566 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1567 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1568 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1569 bool validateSOPLiteral(const MCInst &Inst) const; 1570 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1571 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1572 bool validateIntClampSupported(const MCInst &Inst); 1573 bool validateMIMGAtomicDMask(const MCInst &Inst); 1574 bool validateMIMGGatherDMask(const MCInst &Inst); 1575 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1576 bool validateMIMGDataSize(const MCInst &Inst); 1577 bool validateMIMGAddrSize(const MCInst &Inst); 1578 bool validateMIMGD16(const MCInst &Inst); 1579 bool validateMIMGDim(const MCInst &Inst); 1580 bool validateMIMGMSAA(const MCInst &Inst); 1581 bool validateOpSel(const MCInst &Inst); 1582 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1583 bool validateVccOperand(unsigned Reg) const; 1584 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1585 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1586 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1587 bool validateAGPRLdSt(const MCInst &Inst) const; 1588 bool validateVGPRAlign(const MCInst &Inst) const; 1589 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateDivScale(const MCInst &Inst); 1591 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1592 const SMLoc &IDLoc); 1593 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1594 unsigned getConstantBusLimit(unsigned Opcode) const; 1595 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1596 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1597 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1598 1599 bool isSupportedMnemo(StringRef Mnemo, 1600 const FeatureBitset &FBS); 1601 bool isSupportedMnemo(StringRef Mnemo, 1602 const FeatureBitset &FBS, 1603 ArrayRef<unsigned> Variants); 1604 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1605 1606 bool isId(const StringRef Id) const; 1607 bool isId(const AsmToken &Token, const StringRef Id) const; 1608 bool isToken(const AsmToken::TokenKind Kind) const; 1609 bool trySkipId(const StringRef Id); 1610 bool trySkipId(const StringRef Pref, const StringRef Id); 1611 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1612 bool trySkipToken(const AsmToken::TokenKind Kind); 1613 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1614 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1615 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1616 1617 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1618 AsmToken::TokenKind getTokenKind() const; 1619 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1620 bool parseExpr(OperandVector &Operands); 1621 StringRef getTokenStr() const; 1622 AsmToken peekToken(); 1623 AsmToken getToken() const; 1624 SMLoc getLoc() const; 1625 void lex(); 1626 1627 public: 1628 void onBeginOfFile() override; 1629 1630 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1631 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1632 1633 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1634 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1635 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1636 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1637 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1638 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1639 1640 bool parseSwizzleOperand(int64_t &Op, 1641 const unsigned MinVal, 1642 const unsigned MaxVal, 1643 const StringRef ErrMsg, 1644 SMLoc &Loc); 1645 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1646 const unsigned MinVal, 1647 const unsigned MaxVal, 1648 const StringRef ErrMsg); 1649 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1650 bool parseSwizzleOffset(int64_t &Imm); 1651 bool parseSwizzleMacro(int64_t &Imm); 1652 bool parseSwizzleQuadPerm(int64_t &Imm); 1653 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1654 bool parseSwizzleBroadcast(int64_t &Imm); 1655 bool parseSwizzleSwap(int64_t &Imm); 1656 bool parseSwizzleReverse(int64_t &Imm); 1657 1658 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1659 int64_t parseGPRIdxMacro(); 1660 1661 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1662 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1663 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1664 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1665 1666 AMDGPUOperand::Ptr defaultCPol() const; 1667 1668 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1669 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1670 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1671 AMDGPUOperand::Ptr defaultFlatOffset() const; 1672 1673 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1674 1675 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1676 OptionalImmIndexMap &OptionalIdx); 1677 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1678 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1679 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1680 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1681 OptionalImmIndexMap &OptionalIdx); 1682 1683 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1684 1685 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1686 bool IsAtomic = false); 1687 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1688 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1689 1690 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1691 1692 bool parseDimId(unsigned &Encoding); 1693 OperandMatchResultTy parseDim(OperandVector &Operands); 1694 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1695 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1696 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1697 int64_t parseDPPCtrlSel(StringRef Ctrl); 1698 int64_t parseDPPCtrlPerm(); 1699 AMDGPUOperand::Ptr defaultRowMask() const; 1700 AMDGPUOperand::Ptr defaultBankMask() const; 1701 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1702 AMDGPUOperand::Ptr defaultFI() const; 1703 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1704 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1705 1706 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1707 AMDGPUOperand::ImmTy Type); 1708 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1709 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1710 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1711 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1712 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1713 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1714 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1715 uint64_t BasicInstType, 1716 bool SkipDstVcc = false, 1717 bool SkipSrcVcc = false); 1718 1719 AMDGPUOperand::Ptr defaultBLGP() const; 1720 AMDGPUOperand::Ptr defaultCBSZ() const; 1721 AMDGPUOperand::Ptr defaultABID() const; 1722 1723 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1724 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1725 }; 1726 1727 struct OptionalOperand { 1728 const char *Name; 1729 AMDGPUOperand::ImmTy Type; 1730 bool IsBit; 1731 bool (*ConvertResult)(int64_t&); 1732 }; 1733 1734 } // end anonymous namespace 1735 1736 // May be called with integer type with equivalent bitwidth. 1737 static const fltSemantics *getFltSemantics(unsigned Size) { 1738 switch (Size) { 1739 case 4: 1740 return &APFloat::IEEEsingle(); 1741 case 8: 1742 return &APFloat::IEEEdouble(); 1743 case 2: 1744 return &APFloat::IEEEhalf(); 1745 default: 1746 llvm_unreachable("unsupported fp type"); 1747 } 1748 } 1749 1750 static const fltSemantics *getFltSemantics(MVT VT) { 1751 return getFltSemantics(VT.getSizeInBits() / 8); 1752 } 1753 1754 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1755 switch (OperandType) { 1756 case AMDGPU::OPERAND_REG_IMM_INT32: 1757 case AMDGPU::OPERAND_REG_IMM_FP32: 1758 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1759 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1760 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1764 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1766 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1767 case AMDGPU::OPERAND_KIMM32: 1768 return &APFloat::IEEEsingle(); 1769 case AMDGPU::OPERAND_REG_IMM_INT64: 1770 case AMDGPU::OPERAND_REG_IMM_FP64: 1771 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1772 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1773 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1774 return &APFloat::IEEEdouble(); 1775 case AMDGPU::OPERAND_REG_IMM_INT16: 1776 case AMDGPU::OPERAND_REG_IMM_FP16: 1777 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1778 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1779 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1780 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1781 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1782 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1785 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1786 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1787 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1788 case AMDGPU::OPERAND_KIMM16: 1789 return &APFloat::IEEEhalf(); 1790 default: 1791 llvm_unreachable("unsupported fp type"); 1792 } 1793 } 1794 1795 //===----------------------------------------------------------------------===// 1796 // Operand 1797 //===----------------------------------------------------------------------===// 1798 1799 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1800 bool Lost; 1801 1802 // Convert literal to single precision 1803 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1804 APFloat::rmNearestTiesToEven, 1805 &Lost); 1806 // We allow precision lost but not overflow or underflow 1807 if (Status != APFloat::opOK && 1808 Lost && 1809 ((Status & APFloat::opOverflow) != 0 || 1810 (Status & APFloat::opUnderflow) != 0)) { 1811 return false; 1812 } 1813 1814 return true; 1815 } 1816 1817 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1818 return isUIntN(Size, Val) || isIntN(Size, Val); 1819 } 1820 1821 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1822 if (VT.getScalarType() == MVT::i16) { 1823 // FP immediate values are broken. 1824 return isInlinableIntLiteral(Val); 1825 } 1826 1827 // f16/v2f16 operands work correctly for all values. 1828 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1829 } 1830 1831 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1832 1833 // This is a hack to enable named inline values like 1834 // shared_base with both 32-bit and 64-bit operands. 1835 // Note that these values are defined as 1836 // 32-bit operands only. 1837 if (isInlineValue()) { 1838 return true; 1839 } 1840 1841 if (!isImmTy(ImmTyNone)) { 1842 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1843 return false; 1844 } 1845 // TODO: We should avoid using host float here. It would be better to 1846 // check the float bit values which is what a few other places do. 1847 // We've had bot failures before due to weird NaN support on mips hosts. 1848 1849 APInt Literal(64, Imm.Val); 1850 1851 if (Imm.IsFPImm) { // We got fp literal token 1852 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1853 return AMDGPU::isInlinableLiteral64(Imm.Val, 1854 AsmParser->hasInv2PiInlineImm()); 1855 } 1856 1857 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1858 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1859 return false; 1860 1861 if (type.getScalarSizeInBits() == 16) { 1862 return isInlineableLiteralOp16( 1863 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1864 type, AsmParser->hasInv2PiInlineImm()); 1865 } 1866 1867 // Check if single precision literal is inlinable 1868 return AMDGPU::isInlinableLiteral32( 1869 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1870 AsmParser->hasInv2PiInlineImm()); 1871 } 1872 1873 // We got int literal token. 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1880 return false; 1881 } 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 return AMDGPU::isInlinableLiteral32( 1890 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1891 AsmParser->hasInv2PiInlineImm()); 1892 } 1893 1894 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1895 // Check that this immediate can be added as literal 1896 if (!isImmTy(ImmTyNone)) { 1897 return false; 1898 } 1899 1900 if (!Imm.IsFPImm) { 1901 // We got int literal token. 1902 1903 if (type == MVT::f64 && hasFPModifiers()) { 1904 // Cannot apply fp modifiers to int literals preserving the same semantics 1905 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1906 // disable these cases. 1907 return false; 1908 } 1909 1910 unsigned Size = type.getSizeInBits(); 1911 if (Size == 64) 1912 Size = 32; 1913 1914 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1915 // types. 1916 return isSafeTruncation(Imm.Val, Size); 1917 } 1918 1919 // We got fp literal token 1920 if (type == MVT::f64) { // Expected 64-bit fp operand 1921 // We would set low 64-bits of literal to zeroes but we accept this literals 1922 return true; 1923 } 1924 1925 if (type == MVT::i64) { // Expected 64-bit int operand 1926 // We don't allow fp literals in 64-bit integer instructions. It is 1927 // unclear how we should encode them. 1928 return false; 1929 } 1930 1931 // We allow fp literals with f16x2 operands assuming that the specified 1932 // literal goes into the lower half and the upper half is zero. We also 1933 // require that the literal may be losslessly converted to f16. 1934 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1935 (type == MVT::v2i16)? MVT::i16 : 1936 (type == MVT::v2f32)? MVT::f32 : type; 1937 1938 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1939 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1940 } 1941 1942 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1943 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1944 } 1945 1946 bool AMDGPUOperand::isVRegWithInputMods() const { 1947 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1948 // GFX90A allows DPP on 64-bit operands. 1949 (isRegClass(AMDGPU::VReg_64RegClassID) && 1950 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1951 } 1952 1953 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1954 if (AsmParser->isVI()) 1955 return isVReg32(); 1956 else if (AsmParser->isGFX9Plus()) 1957 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1958 else 1959 return false; 1960 } 1961 1962 bool AMDGPUOperand::isSDWAFP16Operand() const { 1963 return isSDWAOperand(MVT::f16); 1964 } 1965 1966 bool AMDGPUOperand::isSDWAFP32Operand() const { 1967 return isSDWAOperand(MVT::f32); 1968 } 1969 1970 bool AMDGPUOperand::isSDWAInt16Operand() const { 1971 return isSDWAOperand(MVT::i16); 1972 } 1973 1974 bool AMDGPUOperand::isSDWAInt32Operand() const { 1975 return isSDWAOperand(MVT::i32); 1976 } 1977 1978 bool AMDGPUOperand::isBoolReg() const { 1979 auto FB = AsmParser->getFeatureBits(); 1980 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1981 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1982 } 1983 1984 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1985 { 1986 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1987 assert(Size == 2 || Size == 4 || Size == 8); 1988 1989 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1990 1991 if (Imm.Mods.Abs) { 1992 Val &= ~FpSignMask; 1993 } 1994 if (Imm.Mods.Neg) { 1995 Val ^= FpSignMask; 1996 } 1997 1998 return Val; 1999 } 2000 2001 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2002 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2003 Inst.getNumOperands())) { 2004 addLiteralImmOperand(Inst, Imm.Val, 2005 ApplyModifiers & 2006 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2007 } else { 2008 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2009 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2010 setImmKindNone(); 2011 } 2012 } 2013 2014 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2015 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2016 auto OpNum = Inst.getNumOperands(); 2017 // Check that this operand accepts literals 2018 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2019 2020 if (ApplyModifiers) { 2021 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2022 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2023 Val = applyInputFPModifiers(Val, Size); 2024 } 2025 2026 APInt Literal(64, Val); 2027 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2028 2029 if (Imm.IsFPImm) { // We got fp literal token 2030 switch (OpTy) { 2031 case AMDGPU::OPERAND_REG_IMM_INT64: 2032 case AMDGPU::OPERAND_REG_IMM_FP64: 2033 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2034 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2036 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2037 AsmParser->hasInv2PiInlineImm())) { 2038 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2039 setImmKindConst(); 2040 return; 2041 } 2042 2043 // Non-inlineable 2044 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2045 // For fp operands we check if low 32 bits are zeros 2046 if (Literal.getLoBits(32) != 0) { 2047 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2048 "Can't encode literal as exact 64-bit floating-point operand. " 2049 "Low 32-bits will be set to zero"); 2050 } 2051 2052 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2053 setImmKindLiteral(); 2054 return; 2055 } 2056 2057 // We don't allow fp literals in 64-bit integer instructions. It is 2058 // unclear how we should encode them. This case should be checked earlier 2059 // in predicate methods (isLiteralImm()) 2060 llvm_unreachable("fp literal in 64-bit integer instruction."); 2061 2062 case AMDGPU::OPERAND_REG_IMM_INT32: 2063 case AMDGPU::OPERAND_REG_IMM_FP32: 2064 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_INT16: 2070 case AMDGPU::OPERAND_REG_IMM_FP16: 2071 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2072 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2073 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2075 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2076 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2077 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2078 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2079 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2080 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2081 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2082 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2083 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2084 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2086 case AMDGPU::OPERAND_KIMM32: 2087 case AMDGPU::OPERAND_KIMM16: { 2088 bool lost; 2089 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2090 // Convert literal to single precision 2091 FPLiteral.convert(*getOpFltSemantics(OpTy), 2092 APFloat::rmNearestTiesToEven, &lost); 2093 // We allow precision lost but not overflow or underflow. This should be 2094 // checked earlier in isLiteralImm() 2095 2096 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2097 Inst.addOperand(MCOperand::createImm(ImmVal)); 2098 setImmKindLiteral(); 2099 return; 2100 } 2101 default: 2102 llvm_unreachable("invalid operand size"); 2103 } 2104 2105 return; 2106 } 2107 2108 // We got int literal token. 2109 // Only sign extend inline immediates. 2110 switch (OpTy) { 2111 case AMDGPU::OPERAND_REG_IMM_INT32: 2112 case AMDGPU::OPERAND_REG_IMM_FP32: 2113 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2119 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2120 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2122 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2123 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2124 if (isSafeTruncation(Val, 32) && 2125 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2126 AsmParser->hasInv2PiInlineImm())) { 2127 Inst.addOperand(MCOperand::createImm(Val)); 2128 setImmKindConst(); 2129 return; 2130 } 2131 2132 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2133 setImmKindLiteral(); 2134 return; 2135 2136 case AMDGPU::OPERAND_REG_IMM_INT64: 2137 case AMDGPU::OPERAND_REG_IMM_FP64: 2138 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2139 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2140 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2141 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2142 Inst.addOperand(MCOperand::createImm(Val)); 2143 setImmKindConst(); 2144 return; 2145 } 2146 2147 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2148 setImmKindLiteral(); 2149 return; 2150 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2158 if (isSafeTruncation(Val, 16) && 2159 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2160 AsmParser->hasInv2PiInlineImm())) { 2161 Inst.addOperand(MCOperand::createImm(Val)); 2162 setImmKindConst(); 2163 return; 2164 } 2165 2166 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2167 setImmKindLiteral(); 2168 return; 2169 2170 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2171 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2172 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2173 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2174 assert(isSafeTruncation(Val, 16)); 2175 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2176 AsmParser->hasInv2PiInlineImm())); 2177 2178 Inst.addOperand(MCOperand::createImm(Val)); 2179 return; 2180 } 2181 case AMDGPU::OPERAND_KIMM32: 2182 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2183 setImmKindNone(); 2184 return; 2185 case AMDGPU::OPERAND_KIMM16: 2186 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2187 setImmKindNone(); 2188 return; 2189 default: 2190 llvm_unreachable("invalid operand size"); 2191 } 2192 } 2193 2194 template <unsigned Bitwidth> 2195 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2196 APInt Literal(64, Imm.Val); 2197 setImmKindNone(); 2198 2199 if (!Imm.IsFPImm) { 2200 // We got int literal token. 2201 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2202 return; 2203 } 2204 2205 bool Lost; 2206 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2207 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2208 APFloat::rmNearestTiesToEven, &Lost); 2209 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2210 } 2211 2212 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2213 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2214 } 2215 2216 static bool isInlineValue(unsigned Reg) { 2217 switch (Reg) { 2218 case AMDGPU::SRC_SHARED_BASE: 2219 case AMDGPU::SRC_SHARED_LIMIT: 2220 case AMDGPU::SRC_PRIVATE_BASE: 2221 case AMDGPU::SRC_PRIVATE_LIMIT: 2222 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2223 return true; 2224 case AMDGPU::SRC_VCCZ: 2225 case AMDGPU::SRC_EXECZ: 2226 case AMDGPU::SRC_SCC: 2227 return true; 2228 case AMDGPU::SGPR_NULL: 2229 return true; 2230 default: 2231 return false; 2232 } 2233 } 2234 2235 bool AMDGPUOperand::isInlineValue() const { 2236 return isRegKind() && ::isInlineValue(getReg()); 2237 } 2238 2239 //===----------------------------------------------------------------------===// 2240 // AsmParser 2241 //===----------------------------------------------------------------------===// 2242 2243 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2244 if (Is == IS_VGPR) { 2245 switch (RegWidth) { 2246 default: return -1; 2247 case 1: return AMDGPU::VGPR_32RegClassID; 2248 case 2: return AMDGPU::VReg_64RegClassID; 2249 case 3: return AMDGPU::VReg_96RegClassID; 2250 case 4: return AMDGPU::VReg_128RegClassID; 2251 case 5: return AMDGPU::VReg_160RegClassID; 2252 case 6: return AMDGPU::VReg_192RegClassID; 2253 case 7: return AMDGPU::VReg_224RegClassID; 2254 case 8: return AMDGPU::VReg_256RegClassID; 2255 case 16: return AMDGPU::VReg_512RegClassID; 2256 case 32: return AMDGPU::VReg_1024RegClassID; 2257 } 2258 } else if (Is == IS_TTMP) { 2259 switch (RegWidth) { 2260 default: return -1; 2261 case 1: return AMDGPU::TTMP_32RegClassID; 2262 case 2: return AMDGPU::TTMP_64RegClassID; 2263 case 4: return AMDGPU::TTMP_128RegClassID; 2264 case 8: return AMDGPU::TTMP_256RegClassID; 2265 case 16: return AMDGPU::TTMP_512RegClassID; 2266 } 2267 } else if (Is == IS_SGPR) { 2268 switch (RegWidth) { 2269 default: return -1; 2270 case 1: return AMDGPU::SGPR_32RegClassID; 2271 case 2: return AMDGPU::SGPR_64RegClassID; 2272 case 3: return AMDGPU::SGPR_96RegClassID; 2273 case 4: return AMDGPU::SGPR_128RegClassID; 2274 case 5: return AMDGPU::SGPR_160RegClassID; 2275 case 6: return AMDGPU::SGPR_192RegClassID; 2276 case 7: return AMDGPU::SGPR_224RegClassID; 2277 case 8: return AMDGPU::SGPR_256RegClassID; 2278 case 16: return AMDGPU::SGPR_512RegClassID; 2279 } 2280 } else if (Is == IS_AGPR) { 2281 switch (RegWidth) { 2282 default: return -1; 2283 case 1: return AMDGPU::AGPR_32RegClassID; 2284 case 2: return AMDGPU::AReg_64RegClassID; 2285 case 3: return AMDGPU::AReg_96RegClassID; 2286 case 4: return AMDGPU::AReg_128RegClassID; 2287 case 5: return AMDGPU::AReg_160RegClassID; 2288 case 6: return AMDGPU::AReg_192RegClassID; 2289 case 7: return AMDGPU::AReg_224RegClassID; 2290 case 8: return AMDGPU::AReg_256RegClassID; 2291 case 16: return AMDGPU::AReg_512RegClassID; 2292 case 32: return AMDGPU::AReg_1024RegClassID; 2293 } 2294 } 2295 return -1; 2296 } 2297 2298 static unsigned getSpecialRegForName(StringRef RegName) { 2299 return StringSwitch<unsigned>(RegName) 2300 .Case("exec", AMDGPU::EXEC) 2301 .Case("vcc", AMDGPU::VCC) 2302 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2303 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2304 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2305 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2306 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2307 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2308 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2309 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2310 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2311 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2312 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2313 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2314 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2315 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2316 .Case("m0", AMDGPU::M0) 2317 .Case("vccz", AMDGPU::SRC_VCCZ) 2318 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2319 .Case("execz", AMDGPU::SRC_EXECZ) 2320 .Case("src_execz", AMDGPU::SRC_EXECZ) 2321 .Case("scc", AMDGPU::SRC_SCC) 2322 .Case("src_scc", AMDGPU::SRC_SCC) 2323 .Case("tba", AMDGPU::TBA) 2324 .Case("tma", AMDGPU::TMA) 2325 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2326 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2327 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2328 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2329 .Case("vcc_lo", AMDGPU::VCC_LO) 2330 .Case("vcc_hi", AMDGPU::VCC_HI) 2331 .Case("exec_lo", AMDGPU::EXEC_LO) 2332 .Case("exec_hi", AMDGPU::EXEC_HI) 2333 .Case("tma_lo", AMDGPU::TMA_LO) 2334 .Case("tma_hi", AMDGPU::TMA_HI) 2335 .Case("tba_lo", AMDGPU::TBA_LO) 2336 .Case("tba_hi", AMDGPU::TBA_HI) 2337 .Case("pc", AMDGPU::PC_REG) 2338 .Case("null", AMDGPU::SGPR_NULL) 2339 .Default(AMDGPU::NoRegister); 2340 } 2341 2342 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2343 SMLoc &EndLoc, bool RestoreOnFailure) { 2344 auto R = parseRegister(); 2345 if (!R) return true; 2346 assert(R->isReg()); 2347 RegNo = R->getReg(); 2348 StartLoc = R->getStartLoc(); 2349 EndLoc = R->getEndLoc(); 2350 return false; 2351 } 2352 2353 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2354 SMLoc &EndLoc) { 2355 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2356 } 2357 2358 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2359 SMLoc &StartLoc, 2360 SMLoc &EndLoc) { 2361 bool Result = 2362 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2363 bool PendingErrors = getParser().hasPendingError(); 2364 getParser().clearPendingErrors(); 2365 if (PendingErrors) 2366 return MatchOperand_ParseFail; 2367 if (Result) 2368 return MatchOperand_NoMatch; 2369 return MatchOperand_Success; 2370 } 2371 2372 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2373 RegisterKind RegKind, unsigned Reg1, 2374 SMLoc Loc) { 2375 switch (RegKind) { 2376 case IS_SPECIAL: 2377 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2378 Reg = AMDGPU::EXEC; 2379 RegWidth = 2; 2380 return true; 2381 } 2382 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2383 Reg = AMDGPU::FLAT_SCR; 2384 RegWidth = 2; 2385 return true; 2386 } 2387 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2388 Reg = AMDGPU::XNACK_MASK; 2389 RegWidth = 2; 2390 return true; 2391 } 2392 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2393 Reg = AMDGPU::VCC; 2394 RegWidth = 2; 2395 return true; 2396 } 2397 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2398 Reg = AMDGPU::TBA; 2399 RegWidth = 2; 2400 return true; 2401 } 2402 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2403 Reg = AMDGPU::TMA; 2404 RegWidth = 2; 2405 return true; 2406 } 2407 Error(Loc, "register does not fit in the list"); 2408 return false; 2409 case IS_VGPR: 2410 case IS_SGPR: 2411 case IS_AGPR: 2412 case IS_TTMP: 2413 if (Reg1 != Reg + RegWidth) { 2414 Error(Loc, "registers in a list must have consecutive indices"); 2415 return false; 2416 } 2417 RegWidth++; 2418 return true; 2419 default: 2420 llvm_unreachable("unexpected register kind"); 2421 } 2422 } 2423 2424 struct RegInfo { 2425 StringLiteral Name; 2426 RegisterKind Kind; 2427 }; 2428 2429 static constexpr RegInfo RegularRegisters[] = { 2430 {{"v"}, IS_VGPR}, 2431 {{"s"}, IS_SGPR}, 2432 {{"ttmp"}, IS_TTMP}, 2433 {{"acc"}, IS_AGPR}, 2434 {{"a"}, IS_AGPR}, 2435 }; 2436 2437 static bool isRegularReg(RegisterKind Kind) { 2438 return Kind == IS_VGPR || 2439 Kind == IS_SGPR || 2440 Kind == IS_TTMP || 2441 Kind == IS_AGPR; 2442 } 2443 2444 static const RegInfo* getRegularRegInfo(StringRef Str) { 2445 for (const RegInfo &Reg : RegularRegisters) 2446 if (Str.startswith(Reg.Name)) 2447 return &Reg; 2448 return nullptr; 2449 } 2450 2451 static bool getRegNum(StringRef Str, unsigned& Num) { 2452 return !Str.getAsInteger(10, Num); 2453 } 2454 2455 bool 2456 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2457 const AsmToken &NextToken) const { 2458 2459 // A list of consecutive registers: [s0,s1,s2,s3] 2460 if (Token.is(AsmToken::LBrac)) 2461 return true; 2462 2463 if (!Token.is(AsmToken::Identifier)) 2464 return false; 2465 2466 // A single register like s0 or a range of registers like s[0:1] 2467 2468 StringRef Str = Token.getString(); 2469 const RegInfo *Reg = getRegularRegInfo(Str); 2470 if (Reg) { 2471 StringRef RegName = Reg->Name; 2472 StringRef RegSuffix = Str.substr(RegName.size()); 2473 if (!RegSuffix.empty()) { 2474 unsigned Num; 2475 // A single register with an index: rXX 2476 if (getRegNum(RegSuffix, Num)) 2477 return true; 2478 } else { 2479 // A range of registers: r[XX:YY]. 2480 if (NextToken.is(AsmToken::LBrac)) 2481 return true; 2482 } 2483 } 2484 2485 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2486 } 2487 2488 bool 2489 AMDGPUAsmParser::isRegister() 2490 { 2491 return isRegister(getToken(), peekToken()); 2492 } 2493 2494 unsigned 2495 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2496 unsigned RegNum, 2497 unsigned RegWidth, 2498 SMLoc Loc) { 2499 2500 assert(isRegularReg(RegKind)); 2501 2502 unsigned AlignSize = 1; 2503 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2504 // SGPR and TTMP registers must be aligned. 2505 // Max required alignment is 4 dwords. 2506 AlignSize = std::min(RegWidth, 4u); 2507 } 2508 2509 if (RegNum % AlignSize != 0) { 2510 Error(Loc, "invalid register alignment"); 2511 return AMDGPU::NoRegister; 2512 } 2513 2514 unsigned RegIdx = RegNum / AlignSize; 2515 int RCID = getRegClass(RegKind, RegWidth); 2516 if (RCID == -1) { 2517 Error(Loc, "invalid or unsupported register size"); 2518 return AMDGPU::NoRegister; 2519 } 2520 2521 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2522 const MCRegisterClass RC = TRI->getRegClass(RCID); 2523 if (RegIdx >= RC.getNumRegs()) { 2524 Error(Loc, "register index is out of range"); 2525 return AMDGPU::NoRegister; 2526 } 2527 2528 return RC.getRegister(RegIdx); 2529 } 2530 2531 bool 2532 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2533 int64_t RegLo, RegHi; 2534 if (!skipToken(AsmToken::LBrac, "missing register index")) 2535 return false; 2536 2537 SMLoc FirstIdxLoc = getLoc(); 2538 SMLoc SecondIdxLoc; 2539 2540 if (!parseExpr(RegLo)) 2541 return false; 2542 2543 if (trySkipToken(AsmToken::Colon)) { 2544 SecondIdxLoc = getLoc(); 2545 if (!parseExpr(RegHi)) 2546 return false; 2547 } else { 2548 RegHi = RegLo; 2549 } 2550 2551 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2552 return false; 2553 2554 if (!isUInt<32>(RegLo)) { 2555 Error(FirstIdxLoc, "invalid register index"); 2556 return false; 2557 } 2558 2559 if (!isUInt<32>(RegHi)) { 2560 Error(SecondIdxLoc, "invalid register index"); 2561 return false; 2562 } 2563 2564 if (RegLo > RegHi) { 2565 Error(FirstIdxLoc, "first register index should not exceed second index"); 2566 return false; 2567 } 2568 2569 Num = static_cast<unsigned>(RegLo); 2570 Width = (RegHi - RegLo) + 1; 2571 return true; 2572 } 2573 2574 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2575 unsigned &RegNum, unsigned &RegWidth, 2576 SmallVectorImpl<AsmToken> &Tokens) { 2577 assert(isToken(AsmToken::Identifier)); 2578 unsigned Reg = getSpecialRegForName(getTokenStr()); 2579 if (Reg) { 2580 RegNum = 0; 2581 RegWidth = 1; 2582 RegKind = IS_SPECIAL; 2583 Tokens.push_back(getToken()); 2584 lex(); // skip register name 2585 } 2586 return Reg; 2587 } 2588 2589 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2590 unsigned &RegNum, unsigned &RegWidth, 2591 SmallVectorImpl<AsmToken> &Tokens) { 2592 assert(isToken(AsmToken::Identifier)); 2593 StringRef RegName = getTokenStr(); 2594 auto Loc = getLoc(); 2595 2596 const RegInfo *RI = getRegularRegInfo(RegName); 2597 if (!RI) { 2598 Error(Loc, "invalid register name"); 2599 return AMDGPU::NoRegister; 2600 } 2601 2602 Tokens.push_back(getToken()); 2603 lex(); // skip register name 2604 2605 RegKind = RI->Kind; 2606 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2607 if (!RegSuffix.empty()) { 2608 // Single 32-bit register: vXX. 2609 if (!getRegNum(RegSuffix, RegNum)) { 2610 Error(Loc, "invalid register index"); 2611 return AMDGPU::NoRegister; 2612 } 2613 RegWidth = 1; 2614 } else { 2615 // Range of registers: v[XX:YY]. ":YY" is optional. 2616 if (!ParseRegRange(RegNum, RegWidth)) 2617 return AMDGPU::NoRegister; 2618 } 2619 2620 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2621 } 2622 2623 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2624 unsigned &RegWidth, 2625 SmallVectorImpl<AsmToken> &Tokens) { 2626 unsigned Reg = AMDGPU::NoRegister; 2627 auto ListLoc = getLoc(); 2628 2629 if (!skipToken(AsmToken::LBrac, 2630 "expected a register or a list of registers")) { 2631 return AMDGPU::NoRegister; 2632 } 2633 2634 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2635 2636 auto Loc = getLoc(); 2637 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2638 return AMDGPU::NoRegister; 2639 if (RegWidth != 1) { 2640 Error(Loc, "expected a single 32-bit register"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 for (; trySkipToken(AsmToken::Comma); ) { 2645 RegisterKind NextRegKind; 2646 unsigned NextReg, NextRegNum, NextRegWidth; 2647 Loc = getLoc(); 2648 2649 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2650 NextRegNum, NextRegWidth, 2651 Tokens)) { 2652 return AMDGPU::NoRegister; 2653 } 2654 if (NextRegWidth != 1) { 2655 Error(Loc, "expected a single 32-bit register"); 2656 return AMDGPU::NoRegister; 2657 } 2658 if (NextRegKind != RegKind) { 2659 Error(Loc, "registers in a list must be of the same kind"); 2660 return AMDGPU::NoRegister; 2661 } 2662 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2663 return AMDGPU::NoRegister; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, 2667 "expected a comma or a closing square bracket")) { 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 if (isRegularReg(RegKind)) 2672 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2673 2674 return Reg; 2675 } 2676 2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2678 unsigned &RegNum, unsigned &RegWidth, 2679 SmallVectorImpl<AsmToken> &Tokens) { 2680 auto Loc = getLoc(); 2681 Reg = AMDGPU::NoRegister; 2682 2683 if (isToken(AsmToken::Identifier)) { 2684 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2685 if (Reg == AMDGPU::NoRegister) 2686 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2687 } else { 2688 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2689 } 2690 2691 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2692 if (Reg == AMDGPU::NoRegister) { 2693 assert(Parser.hasPendingError()); 2694 return false; 2695 } 2696 2697 if (!subtargetHasRegister(*TRI, Reg)) { 2698 if (Reg == AMDGPU::SGPR_NULL) { 2699 Error(Loc, "'null' operand is not supported on this GPU"); 2700 } else { 2701 Error(Loc, "register not available on this GPU"); 2702 } 2703 return false; 2704 } 2705 2706 return true; 2707 } 2708 2709 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2710 unsigned &RegNum, unsigned &RegWidth, 2711 bool RestoreOnFailure /*=false*/) { 2712 Reg = AMDGPU::NoRegister; 2713 2714 SmallVector<AsmToken, 1> Tokens; 2715 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2716 if (RestoreOnFailure) { 2717 while (!Tokens.empty()) { 2718 getLexer().UnLex(Tokens.pop_back_val()); 2719 } 2720 } 2721 return true; 2722 } 2723 return false; 2724 } 2725 2726 Optional<StringRef> 2727 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2728 switch (RegKind) { 2729 case IS_VGPR: 2730 return StringRef(".amdgcn.next_free_vgpr"); 2731 case IS_SGPR: 2732 return StringRef(".amdgcn.next_free_sgpr"); 2733 default: 2734 return None; 2735 } 2736 } 2737 2738 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2739 auto SymbolName = getGprCountSymbolName(RegKind); 2740 assert(SymbolName && "initializing invalid register kind"); 2741 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2742 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2743 } 2744 2745 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2746 unsigned DwordRegIndex, 2747 unsigned RegWidth) { 2748 // Symbols are only defined for GCN targets 2749 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2750 return true; 2751 2752 auto SymbolName = getGprCountSymbolName(RegKind); 2753 if (!SymbolName) 2754 return true; 2755 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2756 2757 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2758 int64_t OldCount; 2759 2760 if (!Sym->isVariable()) 2761 return !Error(getLoc(), 2762 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2763 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2764 return !Error( 2765 getLoc(), 2766 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2767 2768 if (OldCount <= NewMax) 2769 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2770 2771 return true; 2772 } 2773 2774 std::unique_ptr<AMDGPUOperand> 2775 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2776 const auto &Tok = getToken(); 2777 SMLoc StartLoc = Tok.getLoc(); 2778 SMLoc EndLoc = Tok.getEndLoc(); 2779 RegisterKind RegKind; 2780 unsigned Reg, RegNum, RegWidth; 2781 2782 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2783 return nullptr; 2784 } 2785 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2786 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2787 return nullptr; 2788 } else 2789 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2790 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2791 } 2792 2793 OperandMatchResultTy 2794 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2795 // TODO: add syntactic sugar for 1/(2*PI) 2796 2797 assert(!isRegister()); 2798 assert(!isModifier()); 2799 2800 const auto& Tok = getToken(); 2801 const auto& NextTok = peekToken(); 2802 bool IsReal = Tok.is(AsmToken::Real); 2803 SMLoc S = getLoc(); 2804 bool Negate = false; 2805 2806 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2807 lex(); 2808 IsReal = true; 2809 Negate = true; 2810 } 2811 2812 if (IsReal) { 2813 // Floating-point expressions are not supported. 2814 // Can only allow floating-point literals with an 2815 // optional sign. 2816 2817 StringRef Num = getTokenStr(); 2818 lex(); 2819 2820 APFloat RealVal(APFloat::IEEEdouble()); 2821 auto roundMode = APFloat::rmNearestTiesToEven; 2822 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2823 return MatchOperand_ParseFail; 2824 } 2825 if (Negate) 2826 RealVal.changeSign(); 2827 2828 Operands.push_back( 2829 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2830 AMDGPUOperand::ImmTyNone, true)); 2831 2832 return MatchOperand_Success; 2833 2834 } else { 2835 int64_t IntVal; 2836 const MCExpr *Expr; 2837 SMLoc S = getLoc(); 2838 2839 if (HasSP3AbsModifier) { 2840 // This is a workaround for handling expressions 2841 // as arguments of SP3 'abs' modifier, for example: 2842 // |1.0| 2843 // |-1| 2844 // |1+x| 2845 // This syntax is not compatible with syntax of standard 2846 // MC expressions (due to the trailing '|'). 2847 SMLoc EndLoc; 2848 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2849 return MatchOperand_ParseFail; 2850 } else { 2851 if (Parser.parseExpression(Expr)) 2852 return MatchOperand_ParseFail; 2853 } 2854 2855 if (Expr->evaluateAsAbsolute(IntVal)) { 2856 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2857 } else { 2858 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2859 } 2860 2861 return MatchOperand_Success; 2862 } 2863 2864 return MatchOperand_NoMatch; 2865 } 2866 2867 OperandMatchResultTy 2868 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2869 if (!isRegister()) 2870 return MatchOperand_NoMatch; 2871 2872 if (auto R = parseRegister()) { 2873 assert(R->isReg()); 2874 Operands.push_back(std::move(R)); 2875 return MatchOperand_Success; 2876 } 2877 return MatchOperand_ParseFail; 2878 } 2879 2880 OperandMatchResultTy 2881 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2882 auto res = parseReg(Operands); 2883 if (res != MatchOperand_NoMatch) { 2884 return res; 2885 } else if (isModifier()) { 2886 return MatchOperand_NoMatch; 2887 } else { 2888 return parseImm(Operands, HasSP3AbsMod); 2889 } 2890 } 2891 2892 bool 2893 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2894 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2895 const auto &str = Token.getString(); 2896 return str == "abs" || str == "neg" || str == "sext"; 2897 } 2898 return false; 2899 } 2900 2901 bool 2902 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2903 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2904 } 2905 2906 bool 2907 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2908 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2909 } 2910 2911 bool 2912 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2913 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2914 } 2915 2916 // Check if this is an operand modifier or an opcode modifier 2917 // which may look like an expression but it is not. We should 2918 // avoid parsing these modifiers as expressions. Currently 2919 // recognized sequences are: 2920 // |...| 2921 // abs(...) 2922 // neg(...) 2923 // sext(...) 2924 // -reg 2925 // -|...| 2926 // -abs(...) 2927 // name:... 2928 // Note that simple opcode modifiers like 'gds' may be parsed as 2929 // expressions; this is a special case. See getExpressionAsToken. 2930 // 2931 bool 2932 AMDGPUAsmParser::isModifier() { 2933 2934 AsmToken Tok = getToken(); 2935 AsmToken NextToken[2]; 2936 peekTokens(NextToken); 2937 2938 return isOperandModifier(Tok, NextToken[0]) || 2939 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2940 isOpcodeModifierWithVal(Tok, NextToken[0]); 2941 } 2942 2943 // Check if the current token is an SP3 'neg' modifier. 2944 // Currently this modifier is allowed in the following context: 2945 // 2946 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2947 // 2. Before an 'abs' modifier: -abs(...) 2948 // 3. Before an SP3 'abs' modifier: -|...| 2949 // 2950 // In all other cases "-" is handled as a part 2951 // of an expression that follows the sign. 2952 // 2953 // Note: When "-" is followed by an integer literal, 2954 // this is interpreted as integer negation rather 2955 // than a floating-point NEG modifier applied to N. 2956 // Beside being contr-intuitive, such use of floating-point 2957 // NEG modifier would have resulted in different meaning 2958 // of integer literals used with VOP1/2/C and VOP3, 2959 // for example: 2960 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2961 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2962 // Negative fp literals with preceding "-" are 2963 // handled likewise for uniformity 2964 // 2965 bool 2966 AMDGPUAsmParser::parseSP3NegModifier() { 2967 2968 AsmToken NextToken[2]; 2969 peekTokens(NextToken); 2970 2971 if (isToken(AsmToken::Minus) && 2972 (isRegister(NextToken[0], NextToken[1]) || 2973 NextToken[0].is(AsmToken::Pipe) || 2974 isId(NextToken[0], "abs"))) { 2975 lex(); 2976 return true; 2977 } 2978 2979 return false; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2984 bool AllowImm) { 2985 bool Neg, SP3Neg; 2986 bool Abs, SP3Abs; 2987 SMLoc Loc; 2988 2989 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2990 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2991 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 SP3Neg = parseSP3NegModifier(); 2996 2997 Loc = getLoc(); 2998 Neg = trySkipId("neg"); 2999 if (Neg && SP3Neg) { 3000 Error(Loc, "expected register or immediate"); 3001 return MatchOperand_ParseFail; 3002 } 3003 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3004 return MatchOperand_ParseFail; 3005 3006 Abs = trySkipId("abs"); 3007 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3008 return MatchOperand_ParseFail; 3009 3010 Loc = getLoc(); 3011 SP3Abs = trySkipToken(AsmToken::Pipe); 3012 if (Abs && SP3Abs) { 3013 Error(Loc, "expected register or immediate"); 3014 return MatchOperand_ParseFail; 3015 } 3016 3017 OperandMatchResultTy Res; 3018 if (AllowImm) { 3019 Res = parseRegOrImm(Operands, SP3Abs); 3020 } else { 3021 Res = parseReg(Operands); 3022 } 3023 if (Res != MatchOperand_Success) { 3024 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3025 } 3026 3027 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3028 return MatchOperand_ParseFail; 3029 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3030 return MatchOperand_ParseFail; 3031 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3032 return MatchOperand_ParseFail; 3033 3034 AMDGPUOperand::Modifiers Mods; 3035 Mods.Abs = Abs || SP3Abs; 3036 Mods.Neg = Neg || SP3Neg; 3037 3038 if (Mods.hasFPModifiers()) { 3039 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3040 if (Op.isExpr()) { 3041 Error(Op.getStartLoc(), "expected an absolute expression"); 3042 return MatchOperand_ParseFail; 3043 } 3044 Op.setModifiers(Mods); 3045 } 3046 return MatchOperand_Success; 3047 } 3048 3049 OperandMatchResultTy 3050 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3051 bool AllowImm) { 3052 bool Sext = trySkipId("sext"); 3053 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3054 return MatchOperand_ParseFail; 3055 3056 OperandMatchResultTy Res; 3057 if (AllowImm) { 3058 Res = parseRegOrImm(Operands); 3059 } else { 3060 Res = parseReg(Operands); 3061 } 3062 if (Res != MatchOperand_Success) { 3063 return Sext? MatchOperand_ParseFail : Res; 3064 } 3065 3066 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3067 return MatchOperand_ParseFail; 3068 3069 AMDGPUOperand::Modifiers Mods; 3070 Mods.Sext = Sext; 3071 3072 if (Mods.hasIntModifiers()) { 3073 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3074 if (Op.isExpr()) { 3075 Error(Op.getStartLoc(), "expected an absolute expression"); 3076 return MatchOperand_ParseFail; 3077 } 3078 Op.setModifiers(Mods); 3079 } 3080 3081 return MatchOperand_Success; 3082 } 3083 3084 OperandMatchResultTy 3085 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3086 return parseRegOrImmWithFPInputMods(Operands, false); 3087 } 3088 3089 OperandMatchResultTy 3090 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3091 return parseRegOrImmWithIntInputMods(Operands, false); 3092 } 3093 3094 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3095 auto Loc = getLoc(); 3096 if (trySkipId("off")) { 3097 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3098 AMDGPUOperand::ImmTyOff, false)); 3099 return MatchOperand_Success; 3100 } 3101 3102 if (!isRegister()) 3103 return MatchOperand_NoMatch; 3104 3105 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3106 if (Reg) { 3107 Operands.push_back(std::move(Reg)); 3108 return MatchOperand_Success; 3109 } 3110 3111 return MatchOperand_ParseFail; 3112 3113 } 3114 3115 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3116 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3117 3118 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3119 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3120 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3121 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3122 return Match_InvalidOperand; 3123 3124 if ((TSFlags & SIInstrFlags::VOP3) && 3125 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3126 getForcedEncodingSize() != 64) 3127 return Match_PreferE32; 3128 3129 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3130 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3131 // v_mac_f32/16 allow only dst_sel == DWORD; 3132 auto OpNum = 3133 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3134 const auto &Op = Inst.getOperand(OpNum); 3135 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3136 return Match_InvalidOperand; 3137 } 3138 } 3139 3140 return Match_Success; 3141 } 3142 3143 static ArrayRef<unsigned> getAllVariants() { 3144 static const unsigned Variants[] = { 3145 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3146 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3147 }; 3148 3149 return makeArrayRef(Variants); 3150 } 3151 3152 // What asm variants we should check 3153 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3154 if (getForcedEncodingSize() == 32) { 3155 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3156 return makeArrayRef(Variants); 3157 } 3158 3159 if (isForcedVOP3()) { 3160 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3161 return makeArrayRef(Variants); 3162 } 3163 3164 if (isForcedSDWA()) { 3165 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3166 AMDGPUAsmVariants::SDWA9}; 3167 return makeArrayRef(Variants); 3168 } 3169 3170 if (isForcedDPP()) { 3171 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3172 return makeArrayRef(Variants); 3173 } 3174 3175 return getAllVariants(); 3176 } 3177 3178 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3179 if (getForcedEncodingSize() == 32) 3180 return "e32"; 3181 3182 if (isForcedVOP3()) 3183 return "e64"; 3184 3185 if (isForcedSDWA()) 3186 return "sdwa"; 3187 3188 if (isForcedDPP()) 3189 return "dpp"; 3190 3191 return ""; 3192 } 3193 3194 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3195 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3196 const unsigned Num = Desc.getNumImplicitUses(); 3197 for (unsigned i = 0; i < Num; ++i) { 3198 unsigned Reg = Desc.ImplicitUses[i]; 3199 switch (Reg) { 3200 case AMDGPU::FLAT_SCR: 3201 case AMDGPU::VCC: 3202 case AMDGPU::VCC_LO: 3203 case AMDGPU::VCC_HI: 3204 case AMDGPU::M0: 3205 return Reg; 3206 default: 3207 break; 3208 } 3209 } 3210 return AMDGPU::NoRegister; 3211 } 3212 3213 // NB: This code is correct only when used to check constant 3214 // bus limitations because GFX7 support no f16 inline constants. 3215 // Note that there are no cases when a GFX7 opcode violates 3216 // constant bus limitations due to the use of an f16 constant. 3217 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3218 unsigned OpIdx) const { 3219 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3220 3221 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3222 return false; 3223 } 3224 3225 const MCOperand &MO = Inst.getOperand(OpIdx); 3226 3227 int64_t Val = MO.getImm(); 3228 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3229 3230 switch (OpSize) { // expected operand size 3231 case 8: 3232 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3233 case 4: 3234 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3235 case 2: { 3236 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3237 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3238 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3239 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3240 return AMDGPU::isInlinableIntLiteral(Val); 3241 3242 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3243 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3245 return AMDGPU::isInlinableIntLiteralV216(Val); 3246 3247 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3248 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3249 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3250 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3251 3252 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3253 } 3254 default: 3255 llvm_unreachable("invalid operand size"); 3256 } 3257 } 3258 3259 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3260 if (!isGFX10Plus()) 3261 return 1; 3262 3263 switch (Opcode) { 3264 // 64-bit shift instructions can use only one scalar value input 3265 case AMDGPU::V_LSHLREV_B64_e64: 3266 case AMDGPU::V_LSHLREV_B64_gfx10: 3267 case AMDGPU::V_LSHRREV_B64_e64: 3268 case AMDGPU::V_LSHRREV_B64_gfx10: 3269 case AMDGPU::V_ASHRREV_I64_e64: 3270 case AMDGPU::V_ASHRREV_I64_gfx10: 3271 case AMDGPU::V_LSHL_B64_e64: 3272 case AMDGPU::V_LSHR_B64_e64: 3273 case AMDGPU::V_ASHR_I64_e64: 3274 return 1; 3275 default: 3276 return 2; 3277 } 3278 } 3279 3280 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3281 const MCOperand &MO = Inst.getOperand(OpIdx); 3282 if (MO.isImm()) { 3283 return !isInlineConstant(Inst, OpIdx); 3284 } else if (MO.isReg()) { 3285 auto Reg = MO.getReg(); 3286 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3287 auto PReg = mc2PseudoReg(Reg); 3288 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3289 } else { 3290 return true; 3291 } 3292 } 3293 3294 bool 3295 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3296 const OperandVector &Operands) { 3297 const unsigned Opcode = Inst.getOpcode(); 3298 const MCInstrDesc &Desc = MII.get(Opcode); 3299 unsigned LastSGPR = AMDGPU::NoRegister; 3300 unsigned ConstantBusUseCount = 0; 3301 unsigned NumLiterals = 0; 3302 unsigned LiteralSize; 3303 3304 if (Desc.TSFlags & 3305 (SIInstrFlags::VOPC | 3306 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3307 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3308 SIInstrFlags::SDWA)) { 3309 // Check special imm operands (used by madmk, etc) 3310 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3311 ++NumLiterals; 3312 LiteralSize = 4; 3313 } 3314 3315 SmallDenseSet<unsigned> SGPRsUsed; 3316 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3317 if (SGPRUsed != AMDGPU::NoRegister) { 3318 SGPRsUsed.insert(SGPRUsed); 3319 ++ConstantBusUseCount; 3320 } 3321 3322 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3323 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3324 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3325 3326 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3327 3328 for (int OpIdx : OpIndices) { 3329 if (OpIdx == -1) break; 3330 3331 const MCOperand &MO = Inst.getOperand(OpIdx); 3332 if (usesConstantBus(Inst, OpIdx)) { 3333 if (MO.isReg()) { 3334 LastSGPR = mc2PseudoReg(MO.getReg()); 3335 // Pairs of registers with a partial intersections like these 3336 // s0, s[0:1] 3337 // flat_scratch_lo, flat_scratch 3338 // flat_scratch_lo, flat_scratch_hi 3339 // are theoretically valid but they are disabled anyway. 3340 // Note that this code mimics SIInstrInfo::verifyInstruction 3341 if (!SGPRsUsed.count(LastSGPR)) { 3342 SGPRsUsed.insert(LastSGPR); 3343 ++ConstantBusUseCount; 3344 } 3345 } else { // Expression or a literal 3346 3347 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3348 continue; // special operand like VINTERP attr_chan 3349 3350 // An instruction may use only one literal. 3351 // This has been validated on the previous step. 3352 // See validateVOPLiteral. 3353 // This literal may be used as more than one operand. 3354 // If all these operands are of the same size, 3355 // this literal counts as one scalar value. 3356 // Otherwise it counts as 2 scalar values. 3357 // See "GFX10 Shader Programming", section 3.6.2.3. 3358 3359 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3360 if (Size < 4) Size = 4; 3361 3362 if (NumLiterals == 0) { 3363 NumLiterals = 1; 3364 LiteralSize = Size; 3365 } else if (LiteralSize != Size) { 3366 NumLiterals = 2; 3367 } 3368 } 3369 } 3370 } 3371 } 3372 ConstantBusUseCount += NumLiterals; 3373 3374 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3375 return true; 3376 3377 SMLoc LitLoc = getLitLoc(Operands); 3378 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3379 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3380 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3381 return false; 3382 } 3383 3384 bool 3385 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3386 const OperandVector &Operands) { 3387 const unsigned Opcode = Inst.getOpcode(); 3388 const MCInstrDesc &Desc = MII.get(Opcode); 3389 3390 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3391 if (DstIdx == -1 || 3392 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3393 return true; 3394 } 3395 3396 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3397 3398 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3399 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3400 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3401 3402 assert(DstIdx != -1); 3403 const MCOperand &Dst = Inst.getOperand(DstIdx); 3404 assert(Dst.isReg()); 3405 3406 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3407 3408 for (int SrcIdx : SrcIndices) { 3409 if (SrcIdx == -1) break; 3410 const MCOperand &Src = Inst.getOperand(SrcIdx); 3411 if (Src.isReg()) { 3412 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3413 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3414 Error(getRegLoc(SrcReg, Operands), 3415 "destination must be different than all sources"); 3416 return false; 3417 } 3418 } 3419 } 3420 3421 return true; 3422 } 3423 3424 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3425 3426 const unsigned Opc = Inst.getOpcode(); 3427 const MCInstrDesc &Desc = MII.get(Opc); 3428 3429 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3430 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3431 assert(ClampIdx != -1); 3432 return Inst.getOperand(ClampIdx).getImm() == 0; 3433 } 3434 3435 return true; 3436 } 3437 3438 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3439 3440 const unsigned Opc = Inst.getOpcode(); 3441 const MCInstrDesc &Desc = MII.get(Opc); 3442 3443 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3444 return true; 3445 3446 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3447 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3448 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3449 3450 assert(VDataIdx != -1); 3451 3452 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3453 return true; 3454 3455 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3456 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3457 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3458 if (DMask == 0) 3459 DMask = 1; 3460 3461 unsigned DataSize = 3462 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3463 if (hasPackedD16()) { 3464 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3465 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3466 DataSize = (DataSize + 1) / 2; 3467 } 3468 3469 return (VDataSize / 4) == DataSize + TFESize; 3470 } 3471 3472 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3473 const unsigned Opc = Inst.getOpcode(); 3474 const MCInstrDesc &Desc = MII.get(Opc); 3475 3476 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3477 return true; 3478 3479 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3480 3481 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3482 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3483 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3484 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3485 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3486 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3487 3488 assert(VAddr0Idx != -1); 3489 assert(SrsrcIdx != -1); 3490 assert(SrsrcIdx > VAddr0Idx); 3491 3492 if (DimIdx == -1) 3493 return true; // intersect_ray 3494 3495 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3496 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3497 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3498 unsigned ActualAddrSize = 3499 IsNSA ? SrsrcIdx - VAddr0Idx 3500 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3501 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3502 3503 unsigned ExpectedAddrSize = 3504 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3505 3506 if (!IsNSA) { 3507 if (ExpectedAddrSize > 8) 3508 ExpectedAddrSize = 16; 3509 3510 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3511 // This provides backward compatibility for assembly created 3512 // before 160b/192b/224b types were directly supported. 3513 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3514 return true; 3515 } 3516 3517 return ActualAddrSize == ExpectedAddrSize; 3518 } 3519 3520 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3521 3522 const unsigned Opc = Inst.getOpcode(); 3523 const MCInstrDesc &Desc = MII.get(Opc); 3524 3525 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3526 return true; 3527 if (!Desc.mayLoad() || !Desc.mayStore()) 3528 return true; // Not atomic 3529 3530 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3531 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3532 3533 // This is an incomplete check because image_atomic_cmpswap 3534 // may only use 0x3 and 0xf while other atomic operations 3535 // may use 0x1 and 0x3. However these limitations are 3536 // verified when we check that dmask matches dst size. 3537 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3538 } 3539 3540 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3541 3542 const unsigned Opc = Inst.getOpcode(); 3543 const MCInstrDesc &Desc = MII.get(Opc); 3544 3545 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3546 return true; 3547 3548 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3549 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3550 3551 // GATHER4 instructions use dmask in a different fashion compared to 3552 // other MIMG instructions. The only useful DMASK values are 3553 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3554 // (red,red,red,red) etc.) The ISA document doesn't mention 3555 // this. 3556 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3557 } 3558 3559 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3560 const unsigned Opc = Inst.getOpcode(); 3561 const MCInstrDesc &Desc = MII.get(Opc); 3562 3563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3564 return true; 3565 3566 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3567 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3568 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3569 3570 if (!BaseOpcode->MSAA) 3571 return true; 3572 3573 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3574 assert(DimIdx != -1); 3575 3576 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3577 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3578 3579 return DimInfo->MSAA; 3580 } 3581 3582 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3583 { 3584 switch (Opcode) { 3585 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3586 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3587 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3588 return true; 3589 default: 3590 return false; 3591 } 3592 } 3593 3594 // movrels* opcodes should only allow VGPRS as src0. 3595 // This is specified in .td description for vop1/vop3, 3596 // but sdwa is handled differently. See isSDWAOperand. 3597 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3598 const OperandVector &Operands) { 3599 3600 const unsigned Opc = Inst.getOpcode(); 3601 const MCInstrDesc &Desc = MII.get(Opc); 3602 3603 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3604 return true; 3605 3606 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3607 assert(Src0Idx != -1); 3608 3609 SMLoc ErrLoc; 3610 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3611 if (Src0.isReg()) { 3612 auto Reg = mc2PseudoReg(Src0.getReg()); 3613 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3614 if (!isSGPR(Reg, TRI)) 3615 return true; 3616 ErrLoc = getRegLoc(Reg, Operands); 3617 } else { 3618 ErrLoc = getConstLoc(Operands); 3619 } 3620 3621 Error(ErrLoc, "source operand must be a VGPR"); 3622 return false; 3623 } 3624 3625 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3626 const OperandVector &Operands) { 3627 3628 const unsigned Opc = Inst.getOpcode(); 3629 3630 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3631 return true; 3632 3633 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3634 assert(Src0Idx != -1); 3635 3636 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3637 if (!Src0.isReg()) 3638 return true; 3639 3640 auto Reg = mc2PseudoReg(Src0.getReg()); 3641 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3642 if (isSGPR(Reg, TRI)) { 3643 Error(getRegLoc(Reg, Operands), 3644 "source operand must be either a VGPR or an inline constant"); 3645 return false; 3646 } 3647 3648 return true; 3649 } 3650 3651 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3652 const OperandVector &Operands) { 3653 const unsigned Opc = Inst.getOpcode(); 3654 const MCInstrDesc &Desc = MII.get(Opc); 3655 3656 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3657 return true; 3658 3659 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3660 if (Src2Idx == -1) 3661 return true; 3662 3663 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3664 if (!Src2.isReg()) 3665 return true; 3666 3667 MCRegister Src2Reg = Src2.getReg(); 3668 MCRegister DstReg = Inst.getOperand(0).getReg(); 3669 if (Src2Reg == DstReg) 3670 return true; 3671 3672 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3673 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3674 return true; 3675 3676 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3677 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3678 "source 2 operand must not partially overlap with dst"); 3679 return false; 3680 } 3681 3682 return true; 3683 } 3684 3685 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3686 switch (Inst.getOpcode()) { 3687 default: 3688 return true; 3689 case V_DIV_SCALE_F32_gfx6_gfx7: 3690 case V_DIV_SCALE_F32_vi: 3691 case V_DIV_SCALE_F32_gfx10: 3692 case V_DIV_SCALE_F64_gfx6_gfx7: 3693 case V_DIV_SCALE_F64_vi: 3694 case V_DIV_SCALE_F64_gfx10: 3695 break; 3696 } 3697 3698 // TODO: Check that src0 = src1 or src2. 3699 3700 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3701 AMDGPU::OpName::src2_modifiers, 3702 AMDGPU::OpName::src2_modifiers}) { 3703 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3704 .getImm() & 3705 SISrcMods::ABS) { 3706 return false; 3707 } 3708 } 3709 3710 return true; 3711 } 3712 3713 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3714 3715 const unsigned Opc = Inst.getOpcode(); 3716 const MCInstrDesc &Desc = MII.get(Opc); 3717 3718 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3719 return true; 3720 3721 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3722 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3723 if (isCI() || isSI()) 3724 return false; 3725 } 3726 3727 return true; 3728 } 3729 3730 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3731 const unsigned Opc = Inst.getOpcode(); 3732 const MCInstrDesc &Desc = MII.get(Opc); 3733 3734 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3735 return true; 3736 3737 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3738 if (DimIdx < 0) 3739 return true; 3740 3741 long Imm = Inst.getOperand(DimIdx).getImm(); 3742 if (Imm < 0 || Imm >= 8) 3743 return false; 3744 3745 return true; 3746 } 3747 3748 static bool IsRevOpcode(const unsigned Opcode) 3749 { 3750 switch (Opcode) { 3751 case AMDGPU::V_SUBREV_F32_e32: 3752 case AMDGPU::V_SUBREV_F32_e64: 3753 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3754 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3755 case AMDGPU::V_SUBREV_F32_e32_vi: 3756 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3757 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3758 case AMDGPU::V_SUBREV_F32_e64_vi: 3759 3760 case AMDGPU::V_SUBREV_CO_U32_e32: 3761 case AMDGPU::V_SUBREV_CO_U32_e64: 3762 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3763 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3764 3765 case AMDGPU::V_SUBBREV_U32_e32: 3766 case AMDGPU::V_SUBBREV_U32_e64: 3767 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3768 case AMDGPU::V_SUBBREV_U32_e32_vi: 3769 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3770 case AMDGPU::V_SUBBREV_U32_e64_vi: 3771 3772 case AMDGPU::V_SUBREV_U32_e32: 3773 case AMDGPU::V_SUBREV_U32_e64: 3774 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3775 case AMDGPU::V_SUBREV_U32_e32_vi: 3776 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3777 case AMDGPU::V_SUBREV_U32_e64_vi: 3778 3779 case AMDGPU::V_SUBREV_F16_e32: 3780 case AMDGPU::V_SUBREV_F16_e64: 3781 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3782 case AMDGPU::V_SUBREV_F16_e32_vi: 3783 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3784 case AMDGPU::V_SUBREV_F16_e64_vi: 3785 3786 case AMDGPU::V_SUBREV_U16_e32: 3787 case AMDGPU::V_SUBREV_U16_e64: 3788 case AMDGPU::V_SUBREV_U16_e32_vi: 3789 case AMDGPU::V_SUBREV_U16_e64_vi: 3790 3791 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3792 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3793 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3794 3795 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3796 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3797 3798 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3799 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3800 3801 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3802 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3803 3804 case AMDGPU::V_LSHRREV_B32_e32: 3805 case AMDGPU::V_LSHRREV_B32_e64: 3806 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3807 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3808 case AMDGPU::V_LSHRREV_B32_e32_vi: 3809 case AMDGPU::V_LSHRREV_B32_e64_vi: 3810 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3811 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3812 3813 case AMDGPU::V_ASHRREV_I32_e32: 3814 case AMDGPU::V_ASHRREV_I32_e64: 3815 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3816 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3817 case AMDGPU::V_ASHRREV_I32_e32_vi: 3818 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3819 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3820 case AMDGPU::V_ASHRREV_I32_e64_vi: 3821 3822 case AMDGPU::V_LSHLREV_B32_e32: 3823 case AMDGPU::V_LSHLREV_B32_e64: 3824 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3825 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3826 case AMDGPU::V_LSHLREV_B32_e32_vi: 3827 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3828 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3829 case AMDGPU::V_LSHLREV_B32_e64_vi: 3830 3831 case AMDGPU::V_LSHLREV_B16_e32: 3832 case AMDGPU::V_LSHLREV_B16_e64: 3833 case AMDGPU::V_LSHLREV_B16_e32_vi: 3834 case AMDGPU::V_LSHLREV_B16_e64_vi: 3835 case AMDGPU::V_LSHLREV_B16_gfx10: 3836 3837 case AMDGPU::V_LSHRREV_B16_e32: 3838 case AMDGPU::V_LSHRREV_B16_e64: 3839 case AMDGPU::V_LSHRREV_B16_e32_vi: 3840 case AMDGPU::V_LSHRREV_B16_e64_vi: 3841 case AMDGPU::V_LSHRREV_B16_gfx10: 3842 3843 case AMDGPU::V_ASHRREV_I16_e32: 3844 case AMDGPU::V_ASHRREV_I16_e64: 3845 case AMDGPU::V_ASHRREV_I16_e32_vi: 3846 case AMDGPU::V_ASHRREV_I16_e64_vi: 3847 case AMDGPU::V_ASHRREV_I16_gfx10: 3848 3849 case AMDGPU::V_LSHLREV_B64_e64: 3850 case AMDGPU::V_LSHLREV_B64_gfx10: 3851 case AMDGPU::V_LSHLREV_B64_vi: 3852 3853 case AMDGPU::V_LSHRREV_B64_e64: 3854 case AMDGPU::V_LSHRREV_B64_gfx10: 3855 case AMDGPU::V_LSHRREV_B64_vi: 3856 3857 case AMDGPU::V_ASHRREV_I64_e64: 3858 case AMDGPU::V_ASHRREV_I64_gfx10: 3859 case AMDGPU::V_ASHRREV_I64_vi: 3860 3861 case AMDGPU::V_PK_LSHLREV_B16: 3862 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3863 case AMDGPU::V_PK_LSHLREV_B16_vi: 3864 3865 case AMDGPU::V_PK_LSHRREV_B16: 3866 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3867 case AMDGPU::V_PK_LSHRREV_B16_vi: 3868 case AMDGPU::V_PK_ASHRREV_I16: 3869 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3870 case AMDGPU::V_PK_ASHRREV_I16_vi: 3871 return true; 3872 default: 3873 return false; 3874 } 3875 } 3876 3877 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3878 3879 using namespace SIInstrFlags; 3880 const unsigned Opcode = Inst.getOpcode(); 3881 const MCInstrDesc &Desc = MII.get(Opcode); 3882 3883 // lds_direct register is defined so that it can be used 3884 // with 9-bit operands only. Ignore encodings which do not accept these. 3885 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3886 if ((Desc.TSFlags & Enc) == 0) 3887 return None; 3888 3889 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3890 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3891 if (SrcIdx == -1) 3892 break; 3893 const auto &Src = Inst.getOperand(SrcIdx); 3894 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3895 3896 if (isGFX90A()) 3897 return StringRef("lds_direct is not supported on this GPU"); 3898 3899 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3900 return StringRef("lds_direct cannot be used with this instruction"); 3901 3902 if (SrcName != OpName::src0) 3903 return StringRef("lds_direct may be used as src0 only"); 3904 } 3905 } 3906 3907 return None; 3908 } 3909 3910 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3911 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3912 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3913 if (Op.isFlatOffset()) 3914 return Op.getStartLoc(); 3915 } 3916 return getLoc(); 3917 } 3918 3919 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3920 const OperandVector &Operands) { 3921 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3922 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3923 return true; 3924 3925 auto Opcode = Inst.getOpcode(); 3926 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3927 assert(OpNum != -1); 3928 3929 const auto &Op = Inst.getOperand(OpNum); 3930 if (!hasFlatOffsets() && Op.getImm() != 0) { 3931 Error(getFlatOffsetLoc(Operands), 3932 "flat offset modifier is not supported on this GPU"); 3933 return false; 3934 } 3935 3936 // For FLAT segment the offset must be positive; 3937 // MSB is ignored and forced to zero. 3938 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3939 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3940 if (!isIntN(OffsetSize, Op.getImm())) { 3941 Error(getFlatOffsetLoc(Operands), 3942 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3943 return false; 3944 } 3945 } else { 3946 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3947 if (!isUIntN(OffsetSize, Op.getImm())) { 3948 Error(getFlatOffsetLoc(Operands), 3949 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3950 return false; 3951 } 3952 } 3953 3954 return true; 3955 } 3956 3957 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3958 // Start with second operand because SMEM Offset cannot be dst or src0. 3959 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3960 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3961 if (Op.isSMEMOffset()) 3962 return Op.getStartLoc(); 3963 } 3964 return getLoc(); 3965 } 3966 3967 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3968 const OperandVector &Operands) { 3969 if (isCI() || isSI()) 3970 return true; 3971 3972 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3973 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3974 return true; 3975 3976 auto Opcode = Inst.getOpcode(); 3977 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3978 if (OpNum == -1) 3979 return true; 3980 3981 const auto &Op = Inst.getOperand(OpNum); 3982 if (!Op.isImm()) 3983 return true; 3984 3985 uint64_t Offset = Op.getImm(); 3986 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3987 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3988 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3989 return true; 3990 3991 Error(getSMEMOffsetLoc(Operands), 3992 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3993 "expected a 21-bit signed offset"); 3994 3995 return false; 3996 } 3997 3998 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3999 unsigned Opcode = Inst.getOpcode(); 4000 const MCInstrDesc &Desc = MII.get(Opcode); 4001 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4002 return true; 4003 4004 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4005 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4006 4007 const int OpIndices[] = { Src0Idx, Src1Idx }; 4008 4009 unsigned NumExprs = 0; 4010 unsigned NumLiterals = 0; 4011 uint32_t LiteralValue; 4012 4013 for (int OpIdx : OpIndices) { 4014 if (OpIdx == -1) break; 4015 4016 const MCOperand &MO = Inst.getOperand(OpIdx); 4017 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4018 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4019 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4020 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4021 if (NumLiterals == 0 || LiteralValue != Value) { 4022 LiteralValue = Value; 4023 ++NumLiterals; 4024 } 4025 } else if (MO.isExpr()) { 4026 ++NumExprs; 4027 } 4028 } 4029 } 4030 4031 return NumLiterals + NumExprs <= 1; 4032 } 4033 4034 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4035 const unsigned Opc = Inst.getOpcode(); 4036 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4037 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4038 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4039 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4040 4041 if (OpSel & ~3) 4042 return false; 4043 } 4044 return true; 4045 } 4046 4047 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4048 const OperandVector &Operands) { 4049 const unsigned Opc = Inst.getOpcode(); 4050 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4051 if (DppCtrlIdx < 0) 4052 return true; 4053 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4054 4055 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4056 // DPP64 is supported for row_newbcast only. 4057 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4058 if (Src0Idx >= 0 && 4059 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4060 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4061 Error(S, "64 bit dpp only supports row_newbcast"); 4062 return false; 4063 } 4064 } 4065 4066 return true; 4067 } 4068 4069 // Check if VCC register matches wavefront size 4070 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4071 auto FB = getFeatureBits(); 4072 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4073 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4074 } 4075 4076 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4077 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4078 const OperandVector &Operands) { 4079 unsigned Opcode = Inst.getOpcode(); 4080 const MCInstrDesc &Desc = MII.get(Opcode); 4081 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4082 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4083 ImmIdx == -1) 4084 return true; 4085 4086 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4087 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4088 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4089 4090 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4091 4092 unsigned NumExprs = 0; 4093 unsigned NumLiterals = 0; 4094 uint32_t LiteralValue; 4095 4096 for (int OpIdx : OpIndices) { 4097 if (OpIdx == -1) 4098 continue; 4099 4100 const MCOperand &MO = Inst.getOperand(OpIdx); 4101 if (!MO.isImm() && !MO.isExpr()) 4102 continue; 4103 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4104 continue; 4105 4106 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4107 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4108 Error(getConstLoc(Operands), 4109 "inline constants are not allowed for this operand"); 4110 return false; 4111 } 4112 4113 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4114 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4115 if (NumLiterals == 0 || LiteralValue != Value) { 4116 LiteralValue = Value; 4117 ++NumLiterals; 4118 } 4119 } else if (MO.isExpr()) { 4120 ++NumExprs; 4121 } 4122 } 4123 NumLiterals += NumExprs; 4124 4125 if (!NumLiterals) 4126 return true; 4127 4128 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4129 Error(getLitLoc(Operands), "literal operands are not supported"); 4130 return false; 4131 } 4132 4133 if (NumLiterals > 1) { 4134 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4135 return false; 4136 } 4137 4138 return true; 4139 } 4140 4141 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4142 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4143 const MCRegisterInfo *MRI) { 4144 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4145 if (OpIdx < 0) 4146 return -1; 4147 4148 const MCOperand &Op = Inst.getOperand(OpIdx); 4149 if (!Op.isReg()) 4150 return -1; 4151 4152 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4153 auto Reg = Sub ? Sub : Op.getReg(); 4154 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4155 return AGPR32.contains(Reg) ? 1 : 0; 4156 } 4157 4158 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4159 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4160 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4161 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4162 SIInstrFlags::DS)) == 0) 4163 return true; 4164 4165 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4166 : AMDGPU::OpName::vdata; 4167 4168 const MCRegisterInfo *MRI = getMRI(); 4169 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4170 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4171 4172 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4173 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4174 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4175 return false; 4176 } 4177 4178 auto FB = getFeatureBits(); 4179 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4180 if (DataAreg < 0 || DstAreg < 0) 4181 return true; 4182 return DstAreg == DataAreg; 4183 } 4184 4185 return DstAreg < 1 && DataAreg < 1; 4186 } 4187 4188 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4189 auto FB = getFeatureBits(); 4190 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4191 return true; 4192 4193 const MCRegisterInfo *MRI = getMRI(); 4194 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4195 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4196 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4197 const MCOperand &Op = Inst.getOperand(I); 4198 if (!Op.isReg()) 4199 continue; 4200 4201 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4202 if (!Sub) 4203 continue; 4204 4205 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4206 return false; 4207 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4208 return false; 4209 } 4210 4211 return true; 4212 } 4213 4214 // gfx90a has an undocumented limitation: 4215 // DS_GWS opcodes must use even aligned registers. 4216 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4217 const OperandVector &Operands) { 4218 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4219 return true; 4220 4221 int Opc = Inst.getOpcode(); 4222 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4223 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4224 return true; 4225 4226 const MCRegisterInfo *MRI = getMRI(); 4227 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4228 int Data0Pos = 4229 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4230 assert(Data0Pos != -1); 4231 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4232 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4233 if (RegIdx & 1) { 4234 SMLoc RegLoc = getRegLoc(Reg, Operands); 4235 Error(RegLoc, "vgpr must be even aligned"); 4236 return false; 4237 } 4238 4239 return true; 4240 } 4241 4242 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4243 const OperandVector &Operands, 4244 const SMLoc &IDLoc) { 4245 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4246 AMDGPU::OpName::cpol); 4247 if (CPolPos == -1) 4248 return true; 4249 4250 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4251 4252 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4253 if ((TSFlags & (SIInstrFlags::SMRD)) && 4254 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4255 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4256 return false; 4257 } 4258 4259 if (isGFX90A() && (CPol & CPol::SCC)) { 4260 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4261 StringRef CStr(S.getPointer()); 4262 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4263 Error(S, "scc is not supported on this GPU"); 4264 return false; 4265 } 4266 4267 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4268 return true; 4269 4270 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4271 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4272 Error(IDLoc, "instruction must use glc"); 4273 return false; 4274 } 4275 } else { 4276 if (CPol & CPol::GLC) { 4277 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4278 StringRef CStr(S.getPointer()); 4279 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4280 Error(S, "instruction must not use glc"); 4281 return false; 4282 } 4283 } 4284 4285 return true; 4286 } 4287 4288 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4289 const SMLoc &IDLoc, 4290 const OperandVector &Operands) { 4291 if (auto ErrMsg = validateLdsDirect(Inst)) { 4292 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4293 return false; 4294 } 4295 if (!validateSOPLiteral(Inst)) { 4296 Error(getLitLoc(Operands), 4297 "only one literal operand is allowed"); 4298 return false; 4299 } 4300 if (!validateVOPLiteral(Inst, Operands)) { 4301 return false; 4302 } 4303 if (!validateConstantBusLimitations(Inst, Operands)) { 4304 return false; 4305 } 4306 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4307 return false; 4308 } 4309 if (!validateIntClampSupported(Inst)) { 4310 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4311 "integer clamping is not supported on this GPU"); 4312 return false; 4313 } 4314 if (!validateOpSel(Inst)) { 4315 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4316 "invalid op_sel operand"); 4317 return false; 4318 } 4319 if (!validateDPP(Inst, Operands)) { 4320 return false; 4321 } 4322 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4323 if (!validateMIMGD16(Inst)) { 4324 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4325 "d16 modifier is not supported on this GPU"); 4326 return false; 4327 } 4328 if (!validateMIMGDim(Inst)) { 4329 Error(IDLoc, "dim modifier is required on this GPU"); 4330 return false; 4331 } 4332 if (!validateMIMGMSAA(Inst)) { 4333 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4334 "invalid dim; must be MSAA type"); 4335 return false; 4336 } 4337 if (!validateMIMGDataSize(Inst)) { 4338 Error(IDLoc, 4339 "image data size does not match dmask and tfe"); 4340 return false; 4341 } 4342 if (!validateMIMGAddrSize(Inst)) { 4343 Error(IDLoc, 4344 "image address size does not match dim and a16"); 4345 return false; 4346 } 4347 if (!validateMIMGAtomicDMask(Inst)) { 4348 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4349 "invalid atomic image dmask"); 4350 return false; 4351 } 4352 if (!validateMIMGGatherDMask(Inst)) { 4353 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4354 "invalid image_gather dmask: only one bit must be set"); 4355 return false; 4356 } 4357 if (!validateMovrels(Inst, Operands)) { 4358 return false; 4359 } 4360 if (!validateFlatOffset(Inst, Operands)) { 4361 return false; 4362 } 4363 if (!validateSMEMOffset(Inst, Operands)) { 4364 return false; 4365 } 4366 if (!validateMAIAccWrite(Inst, Operands)) { 4367 return false; 4368 } 4369 if (!validateMFMA(Inst, Operands)) { 4370 return false; 4371 } 4372 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4373 return false; 4374 } 4375 4376 if (!validateAGPRLdSt(Inst)) { 4377 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4378 ? "invalid register class: data and dst should be all VGPR or AGPR" 4379 : "invalid register class: agpr loads and stores not supported on this GPU" 4380 ); 4381 return false; 4382 } 4383 if (!validateVGPRAlign(Inst)) { 4384 Error(IDLoc, 4385 "invalid register class: vgpr tuples must be 64 bit aligned"); 4386 return false; 4387 } 4388 if (!validateGWS(Inst, Operands)) { 4389 return false; 4390 } 4391 4392 if (!validateDivScale(Inst)) { 4393 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4394 return false; 4395 } 4396 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4397 return false; 4398 } 4399 4400 return true; 4401 } 4402 4403 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4404 const FeatureBitset &FBS, 4405 unsigned VariantID = 0); 4406 4407 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4408 const FeatureBitset &AvailableFeatures, 4409 unsigned VariantID); 4410 4411 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4412 const FeatureBitset &FBS) { 4413 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4414 } 4415 4416 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4417 const FeatureBitset &FBS, 4418 ArrayRef<unsigned> Variants) { 4419 for (auto Variant : Variants) { 4420 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4421 return true; 4422 } 4423 4424 return false; 4425 } 4426 4427 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4428 const SMLoc &IDLoc) { 4429 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4430 4431 // Check if requested instruction variant is supported. 4432 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4433 return false; 4434 4435 // This instruction is not supported. 4436 // Clear any other pending errors because they are no longer relevant. 4437 getParser().clearPendingErrors(); 4438 4439 // Requested instruction variant is not supported. 4440 // Check if any other variants are supported. 4441 StringRef VariantName = getMatchedVariantName(); 4442 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4443 return Error(IDLoc, 4444 Twine(VariantName, 4445 " variant of this instruction is not supported")); 4446 } 4447 4448 // Finally check if this instruction is supported on any other GPU. 4449 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4450 return Error(IDLoc, "instruction not supported on this GPU"); 4451 } 4452 4453 // Instruction not supported on any GPU. Probably a typo. 4454 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4455 return Error(IDLoc, "invalid instruction" + Suggestion); 4456 } 4457 4458 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4459 OperandVector &Operands, 4460 MCStreamer &Out, 4461 uint64_t &ErrorInfo, 4462 bool MatchingInlineAsm) { 4463 MCInst Inst; 4464 unsigned Result = Match_Success; 4465 for (auto Variant : getMatchedVariants()) { 4466 uint64_t EI; 4467 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4468 Variant); 4469 // We order match statuses from least to most specific. We use most specific 4470 // status as resulting 4471 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4472 if ((R == Match_Success) || 4473 (R == Match_PreferE32) || 4474 (R == Match_MissingFeature && Result != Match_PreferE32) || 4475 (R == Match_InvalidOperand && Result != Match_MissingFeature 4476 && Result != Match_PreferE32) || 4477 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4478 && Result != Match_MissingFeature 4479 && Result != Match_PreferE32)) { 4480 Result = R; 4481 ErrorInfo = EI; 4482 } 4483 if (R == Match_Success) 4484 break; 4485 } 4486 4487 if (Result == Match_Success) { 4488 if (!validateInstruction(Inst, IDLoc, Operands)) { 4489 return true; 4490 } 4491 Inst.setLoc(IDLoc); 4492 Out.emitInstruction(Inst, getSTI()); 4493 return false; 4494 } 4495 4496 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4497 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4498 return true; 4499 } 4500 4501 switch (Result) { 4502 default: break; 4503 case Match_MissingFeature: 4504 // It has been verified that the specified instruction 4505 // mnemonic is valid. A match was found but it requires 4506 // features which are not supported on this GPU. 4507 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4508 4509 case Match_InvalidOperand: { 4510 SMLoc ErrorLoc = IDLoc; 4511 if (ErrorInfo != ~0ULL) { 4512 if (ErrorInfo >= Operands.size()) { 4513 return Error(IDLoc, "too few operands for instruction"); 4514 } 4515 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4516 if (ErrorLoc == SMLoc()) 4517 ErrorLoc = IDLoc; 4518 } 4519 return Error(ErrorLoc, "invalid operand for instruction"); 4520 } 4521 4522 case Match_PreferE32: 4523 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4524 "should be encoded as e32"); 4525 case Match_MnemonicFail: 4526 llvm_unreachable("Invalid instructions should have been handled already"); 4527 } 4528 llvm_unreachable("Implement any new match types added!"); 4529 } 4530 4531 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4532 int64_t Tmp = -1; 4533 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4534 return true; 4535 } 4536 if (getParser().parseAbsoluteExpression(Tmp)) { 4537 return true; 4538 } 4539 Ret = static_cast<uint32_t>(Tmp); 4540 return false; 4541 } 4542 4543 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4544 uint32_t &Minor) { 4545 if (ParseAsAbsoluteExpression(Major)) 4546 return TokError("invalid major version"); 4547 4548 if (!trySkipToken(AsmToken::Comma)) 4549 return TokError("minor version number required, comma expected"); 4550 4551 if (ParseAsAbsoluteExpression(Minor)) 4552 return TokError("invalid minor version"); 4553 4554 return false; 4555 } 4556 4557 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4558 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4559 return TokError("directive only supported for amdgcn architecture"); 4560 4561 std::string TargetIDDirective; 4562 SMLoc TargetStart = getTok().getLoc(); 4563 if (getParser().parseEscapedString(TargetIDDirective)) 4564 return true; 4565 4566 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4567 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4568 return getParser().Error(TargetRange.Start, 4569 (Twine(".amdgcn_target directive's target id ") + 4570 Twine(TargetIDDirective) + 4571 Twine(" does not match the specified target id ") + 4572 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4573 4574 return false; 4575 } 4576 4577 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4578 return Error(Range.Start, "value out of range", Range); 4579 } 4580 4581 bool AMDGPUAsmParser::calculateGPRBlocks( 4582 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4583 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4584 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4585 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4586 // TODO(scott.linder): These calculations are duplicated from 4587 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4588 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4589 4590 unsigned NumVGPRs = NextFreeVGPR; 4591 unsigned NumSGPRs = NextFreeSGPR; 4592 4593 if (Version.Major >= 10) 4594 NumSGPRs = 0; 4595 else { 4596 unsigned MaxAddressableNumSGPRs = 4597 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4598 4599 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4600 NumSGPRs > MaxAddressableNumSGPRs) 4601 return OutOfRangeError(SGPRRange); 4602 4603 NumSGPRs += 4604 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4605 4606 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4607 NumSGPRs > MaxAddressableNumSGPRs) 4608 return OutOfRangeError(SGPRRange); 4609 4610 if (Features.test(FeatureSGPRInitBug)) 4611 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4612 } 4613 4614 VGPRBlocks = 4615 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4616 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4617 4618 return false; 4619 } 4620 4621 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4622 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4623 return TokError("directive only supported for amdgcn architecture"); 4624 4625 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4626 return TokError("directive only supported for amdhsa OS"); 4627 4628 StringRef KernelName; 4629 if (getParser().parseIdentifier(KernelName)) 4630 return true; 4631 4632 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4633 4634 StringSet<> Seen; 4635 4636 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4637 4638 SMRange VGPRRange; 4639 uint64_t NextFreeVGPR = 0; 4640 uint64_t AccumOffset = 0; 4641 SMRange SGPRRange; 4642 uint64_t NextFreeSGPR = 0; 4643 4644 // Count the number of user SGPRs implied from the enabled feature bits. 4645 unsigned ImpliedUserSGPRCount = 0; 4646 4647 // Track if the asm explicitly contains the directive for the user SGPR 4648 // count. 4649 Optional<unsigned> ExplicitUserSGPRCount; 4650 bool ReserveVCC = true; 4651 bool ReserveFlatScr = true; 4652 Optional<bool> EnableWavefrontSize32; 4653 4654 while (true) { 4655 while (trySkipToken(AsmToken::EndOfStatement)); 4656 4657 StringRef ID; 4658 SMRange IDRange = getTok().getLocRange(); 4659 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4660 return true; 4661 4662 if (ID == ".end_amdhsa_kernel") 4663 break; 4664 4665 if (Seen.find(ID) != Seen.end()) 4666 return TokError(".amdhsa_ directives cannot be repeated"); 4667 Seen.insert(ID); 4668 4669 SMLoc ValStart = getLoc(); 4670 int64_t IVal; 4671 if (getParser().parseAbsoluteExpression(IVal)) 4672 return true; 4673 SMLoc ValEnd = getLoc(); 4674 SMRange ValRange = SMRange(ValStart, ValEnd); 4675 4676 if (IVal < 0) 4677 return OutOfRangeError(ValRange); 4678 4679 uint64_t Val = IVal; 4680 4681 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4682 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4683 return OutOfRangeError(RANGE); \ 4684 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4685 4686 if (ID == ".amdhsa_group_segment_fixed_size") { 4687 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4688 return OutOfRangeError(ValRange); 4689 KD.group_segment_fixed_size = Val; 4690 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4691 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4692 return OutOfRangeError(ValRange); 4693 KD.private_segment_fixed_size = Val; 4694 } else if (ID == ".amdhsa_kernarg_size") { 4695 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4696 return OutOfRangeError(ValRange); 4697 KD.kernarg_size = Val; 4698 } else if (ID == ".amdhsa_user_sgpr_count") { 4699 ExplicitUserSGPRCount = Val; 4700 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4701 if (hasArchitectedFlatScratch()) 4702 return Error(IDRange.Start, 4703 "directive is not supported with architected flat scratch", 4704 IDRange); 4705 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4706 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4707 Val, ValRange); 4708 if (Val) 4709 ImpliedUserSGPRCount += 4; 4710 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4711 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4712 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4713 ValRange); 4714 if (Val) 4715 ImpliedUserSGPRCount += 2; 4716 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4717 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4718 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4719 ValRange); 4720 if (Val) 4721 ImpliedUserSGPRCount += 2; 4722 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4723 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4724 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4725 Val, ValRange); 4726 if (Val) 4727 ImpliedUserSGPRCount += 2; 4728 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4729 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4730 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4731 ValRange); 4732 if (Val) 4733 ImpliedUserSGPRCount += 2; 4734 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4735 if (hasArchitectedFlatScratch()) 4736 return Error(IDRange.Start, 4737 "directive is not supported with architected flat scratch", 4738 IDRange); 4739 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4740 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4741 ValRange); 4742 if (Val) 4743 ImpliedUserSGPRCount += 2; 4744 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4745 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4747 Val, ValRange); 4748 if (Val) 4749 ImpliedUserSGPRCount += 1; 4750 } else if (ID == ".amdhsa_wavefront_size32") { 4751 if (IVersion.Major < 10) 4752 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4753 EnableWavefrontSize32 = Val; 4754 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4755 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4756 Val, ValRange); 4757 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4758 if (hasArchitectedFlatScratch()) 4759 return Error(IDRange.Start, 4760 "directive is not supported with architected flat scratch", 4761 IDRange); 4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4763 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4764 } else if (ID == ".amdhsa_enable_private_segment") { 4765 if (!hasArchitectedFlatScratch()) 4766 return Error( 4767 IDRange.Start, 4768 "directive is not supported without architected flat scratch", 4769 IDRange); 4770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4771 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4772 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4773 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4774 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4775 ValRange); 4776 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4778 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4779 ValRange); 4780 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4782 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4783 ValRange); 4784 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4786 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4787 ValRange); 4788 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4790 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4791 ValRange); 4792 } else if (ID == ".amdhsa_next_free_vgpr") { 4793 VGPRRange = ValRange; 4794 NextFreeVGPR = Val; 4795 } else if (ID == ".amdhsa_next_free_sgpr") { 4796 SGPRRange = ValRange; 4797 NextFreeSGPR = Val; 4798 } else if (ID == ".amdhsa_accum_offset") { 4799 if (!isGFX90A()) 4800 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4801 AccumOffset = Val; 4802 } else if (ID == ".amdhsa_reserve_vcc") { 4803 if (!isUInt<1>(Val)) 4804 return OutOfRangeError(ValRange); 4805 ReserveVCC = Val; 4806 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4807 if (IVersion.Major < 7) 4808 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4809 if (hasArchitectedFlatScratch()) 4810 return Error(IDRange.Start, 4811 "directive is not supported with architected flat scratch", 4812 IDRange); 4813 if (!isUInt<1>(Val)) 4814 return OutOfRangeError(ValRange); 4815 ReserveFlatScr = Val; 4816 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4817 if (IVersion.Major < 8) 4818 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4819 if (!isUInt<1>(Val)) 4820 return OutOfRangeError(ValRange); 4821 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4822 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4823 IDRange); 4824 } else if (ID == ".amdhsa_float_round_mode_32") { 4825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4826 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4827 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4828 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4829 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4830 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4831 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4832 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4833 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4834 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4835 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4836 ValRange); 4837 } else if (ID == ".amdhsa_dx10_clamp") { 4838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4839 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4840 } else if (ID == ".amdhsa_ieee_mode") { 4841 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4842 Val, ValRange); 4843 } else if (ID == ".amdhsa_fp16_overflow") { 4844 if (IVersion.Major < 9) 4845 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4846 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4847 ValRange); 4848 } else if (ID == ".amdhsa_tg_split") { 4849 if (!isGFX90A()) 4850 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4852 ValRange); 4853 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4854 if (IVersion.Major < 10) 4855 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4857 ValRange); 4858 } else if (ID == ".amdhsa_memory_ordered") { 4859 if (IVersion.Major < 10) 4860 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4862 ValRange); 4863 } else if (ID == ".amdhsa_forward_progress") { 4864 if (IVersion.Major < 10) 4865 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4866 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4867 ValRange); 4868 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4869 PARSE_BITS_ENTRY( 4870 KD.compute_pgm_rsrc2, 4871 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4872 ValRange); 4873 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4874 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4875 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4876 Val, ValRange); 4877 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4878 PARSE_BITS_ENTRY( 4879 KD.compute_pgm_rsrc2, 4880 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4881 ValRange); 4882 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4883 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4884 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4885 Val, ValRange); 4886 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4887 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4888 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4889 Val, ValRange); 4890 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4891 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4892 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4893 Val, ValRange); 4894 } else if (ID == ".amdhsa_exception_int_div_zero") { 4895 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4896 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4897 Val, ValRange); 4898 } else { 4899 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4900 } 4901 4902 #undef PARSE_BITS_ENTRY 4903 } 4904 4905 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4906 return TokError(".amdhsa_next_free_vgpr directive is required"); 4907 4908 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4909 return TokError(".amdhsa_next_free_sgpr directive is required"); 4910 4911 unsigned VGPRBlocks; 4912 unsigned SGPRBlocks; 4913 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4914 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4915 EnableWavefrontSize32, NextFreeVGPR, 4916 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4917 SGPRBlocks)) 4918 return true; 4919 4920 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4921 VGPRBlocks)) 4922 return OutOfRangeError(VGPRRange); 4923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4924 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4925 4926 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4927 SGPRBlocks)) 4928 return OutOfRangeError(SGPRRange); 4929 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4930 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4931 SGPRBlocks); 4932 4933 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4934 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4935 "enabled user SGPRs"); 4936 4937 unsigned UserSGPRCount = 4938 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4939 4940 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4941 return TokError("too many user SGPRs enabled"); 4942 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4943 UserSGPRCount); 4944 4945 if (isGFX90A()) { 4946 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4947 return TokError(".amdhsa_accum_offset directive is required"); 4948 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4949 return TokError("accum_offset should be in range [4..256] in " 4950 "increments of 4"); 4951 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4952 return TokError("accum_offset exceeds total VGPR allocation"); 4953 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4954 (AccumOffset / 4 - 1)); 4955 } 4956 4957 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4958 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4959 ReserveFlatScr); 4960 return false; 4961 } 4962 4963 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4964 uint32_t Major; 4965 uint32_t Minor; 4966 4967 if (ParseDirectiveMajorMinor(Major, Minor)) 4968 return true; 4969 4970 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4971 return false; 4972 } 4973 4974 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4975 uint32_t Major; 4976 uint32_t Minor; 4977 uint32_t Stepping; 4978 StringRef VendorName; 4979 StringRef ArchName; 4980 4981 // If this directive has no arguments, then use the ISA version for the 4982 // targeted GPU. 4983 if (isToken(AsmToken::EndOfStatement)) { 4984 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4985 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4986 ISA.Stepping, 4987 "AMD", "AMDGPU"); 4988 return false; 4989 } 4990 4991 if (ParseDirectiveMajorMinor(Major, Minor)) 4992 return true; 4993 4994 if (!trySkipToken(AsmToken::Comma)) 4995 return TokError("stepping version number required, comma expected"); 4996 4997 if (ParseAsAbsoluteExpression(Stepping)) 4998 return TokError("invalid stepping version"); 4999 5000 if (!trySkipToken(AsmToken::Comma)) 5001 return TokError("vendor name required, comma expected"); 5002 5003 if (!parseString(VendorName, "invalid vendor name")) 5004 return true; 5005 5006 if (!trySkipToken(AsmToken::Comma)) 5007 return TokError("arch name required, comma expected"); 5008 5009 if (!parseString(ArchName, "invalid arch name")) 5010 return true; 5011 5012 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5013 VendorName, ArchName); 5014 return false; 5015 } 5016 5017 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5018 amd_kernel_code_t &Header) { 5019 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5020 // assembly for backwards compatibility. 5021 if (ID == "max_scratch_backing_memory_byte_size") { 5022 Parser.eatToEndOfStatement(); 5023 return false; 5024 } 5025 5026 SmallString<40> ErrStr; 5027 raw_svector_ostream Err(ErrStr); 5028 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5029 return TokError(Err.str()); 5030 } 5031 Lex(); 5032 5033 if (ID == "enable_wavefront_size32") { 5034 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5035 if (!isGFX10Plus()) 5036 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5037 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5038 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5039 } else { 5040 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5041 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5042 } 5043 } 5044 5045 if (ID == "wavefront_size") { 5046 if (Header.wavefront_size == 5) { 5047 if (!isGFX10Plus()) 5048 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5049 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5050 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5051 } else if (Header.wavefront_size == 6) { 5052 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5053 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5054 } 5055 } 5056 5057 if (ID == "enable_wgp_mode") { 5058 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5059 !isGFX10Plus()) 5060 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5061 } 5062 5063 if (ID == "enable_mem_ordered") { 5064 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5065 !isGFX10Plus()) 5066 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5067 } 5068 5069 if (ID == "enable_fwd_progress") { 5070 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5071 !isGFX10Plus()) 5072 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5073 } 5074 5075 return false; 5076 } 5077 5078 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5079 amd_kernel_code_t Header; 5080 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5081 5082 while (true) { 5083 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5084 // will set the current token to EndOfStatement. 5085 while(trySkipToken(AsmToken::EndOfStatement)); 5086 5087 StringRef ID; 5088 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5089 return true; 5090 5091 if (ID == ".end_amd_kernel_code_t") 5092 break; 5093 5094 if (ParseAMDKernelCodeTValue(ID, Header)) 5095 return true; 5096 } 5097 5098 getTargetStreamer().EmitAMDKernelCodeT(Header); 5099 5100 return false; 5101 } 5102 5103 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5104 StringRef KernelName; 5105 if (!parseId(KernelName, "expected symbol name")) 5106 return true; 5107 5108 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5109 ELF::STT_AMDGPU_HSA_KERNEL); 5110 5111 KernelScope.initialize(getContext()); 5112 return false; 5113 } 5114 5115 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5116 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5117 return Error(getLoc(), 5118 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5119 "architectures"); 5120 } 5121 5122 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5123 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5124 return Error(getParser().getTok().getLoc(), "target id must match options"); 5125 5126 getTargetStreamer().EmitISAVersion(); 5127 Lex(); 5128 5129 return false; 5130 } 5131 5132 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5133 const char *AssemblerDirectiveBegin; 5134 const char *AssemblerDirectiveEnd; 5135 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5136 isHsaAbiVersion3AndAbove(&getSTI()) 5137 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5138 HSAMD::V3::AssemblerDirectiveEnd) 5139 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5140 HSAMD::AssemblerDirectiveEnd); 5141 5142 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5143 return Error(getLoc(), 5144 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5145 "not available on non-amdhsa OSes")).str()); 5146 } 5147 5148 std::string HSAMetadataString; 5149 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5150 HSAMetadataString)) 5151 return true; 5152 5153 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5154 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5155 return Error(getLoc(), "invalid HSA metadata"); 5156 } else { 5157 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5158 return Error(getLoc(), "invalid HSA metadata"); 5159 } 5160 5161 return false; 5162 } 5163 5164 /// Common code to parse out a block of text (typically YAML) between start and 5165 /// end directives. 5166 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5167 const char *AssemblerDirectiveEnd, 5168 std::string &CollectString) { 5169 5170 raw_string_ostream CollectStream(CollectString); 5171 5172 getLexer().setSkipSpace(false); 5173 5174 bool FoundEnd = false; 5175 while (!isToken(AsmToken::Eof)) { 5176 while (isToken(AsmToken::Space)) { 5177 CollectStream << getTokenStr(); 5178 Lex(); 5179 } 5180 5181 if (trySkipId(AssemblerDirectiveEnd)) { 5182 FoundEnd = true; 5183 break; 5184 } 5185 5186 CollectStream << Parser.parseStringToEndOfStatement() 5187 << getContext().getAsmInfo()->getSeparatorString(); 5188 5189 Parser.eatToEndOfStatement(); 5190 } 5191 5192 getLexer().setSkipSpace(true); 5193 5194 if (isToken(AsmToken::Eof) && !FoundEnd) { 5195 return TokError(Twine("expected directive ") + 5196 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5197 } 5198 5199 CollectStream.flush(); 5200 return false; 5201 } 5202 5203 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5204 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5205 std::string String; 5206 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5207 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5208 return true; 5209 5210 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5211 if (!PALMetadata->setFromString(String)) 5212 return Error(getLoc(), "invalid PAL metadata"); 5213 return false; 5214 } 5215 5216 /// Parse the assembler directive for old linear-format PAL metadata. 5217 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5218 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5219 return Error(getLoc(), 5220 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5221 "not available on non-amdpal OSes")).str()); 5222 } 5223 5224 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5225 PALMetadata->setLegacy(); 5226 for (;;) { 5227 uint32_t Key, Value; 5228 if (ParseAsAbsoluteExpression(Key)) { 5229 return TokError(Twine("invalid value in ") + 5230 Twine(PALMD::AssemblerDirective)); 5231 } 5232 if (!trySkipToken(AsmToken::Comma)) { 5233 return TokError(Twine("expected an even number of values in ") + 5234 Twine(PALMD::AssemblerDirective)); 5235 } 5236 if (ParseAsAbsoluteExpression(Value)) { 5237 return TokError(Twine("invalid value in ") + 5238 Twine(PALMD::AssemblerDirective)); 5239 } 5240 PALMetadata->setRegister(Key, Value); 5241 if (!trySkipToken(AsmToken::Comma)) 5242 break; 5243 } 5244 return false; 5245 } 5246 5247 /// ParseDirectiveAMDGPULDS 5248 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5249 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5250 if (getParser().checkForValidSection()) 5251 return true; 5252 5253 StringRef Name; 5254 SMLoc NameLoc = getLoc(); 5255 if (getParser().parseIdentifier(Name)) 5256 return TokError("expected identifier in directive"); 5257 5258 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5259 if (parseToken(AsmToken::Comma, "expected ','")) 5260 return true; 5261 5262 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5263 5264 int64_t Size; 5265 SMLoc SizeLoc = getLoc(); 5266 if (getParser().parseAbsoluteExpression(Size)) 5267 return true; 5268 if (Size < 0) 5269 return Error(SizeLoc, "size must be non-negative"); 5270 if (Size > LocalMemorySize) 5271 return Error(SizeLoc, "size is too large"); 5272 5273 int64_t Alignment = 4; 5274 if (trySkipToken(AsmToken::Comma)) { 5275 SMLoc AlignLoc = getLoc(); 5276 if (getParser().parseAbsoluteExpression(Alignment)) 5277 return true; 5278 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5279 return Error(AlignLoc, "alignment must be a power of two"); 5280 5281 // Alignment larger than the size of LDS is possible in theory, as long 5282 // as the linker manages to place to symbol at address 0, but we do want 5283 // to make sure the alignment fits nicely into a 32-bit integer. 5284 if (Alignment >= 1u << 31) 5285 return Error(AlignLoc, "alignment is too large"); 5286 } 5287 5288 if (parseToken(AsmToken::EndOfStatement, 5289 "unexpected token in '.amdgpu_lds' directive")) 5290 return true; 5291 5292 Symbol->redefineIfPossible(); 5293 if (!Symbol->isUndefined()) 5294 return Error(NameLoc, "invalid symbol redefinition"); 5295 5296 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5297 return false; 5298 } 5299 5300 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5301 StringRef IDVal = DirectiveID.getString(); 5302 5303 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5304 if (IDVal == ".amdhsa_kernel") 5305 return ParseDirectiveAMDHSAKernel(); 5306 5307 // TODO: Restructure/combine with PAL metadata directive. 5308 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5309 return ParseDirectiveHSAMetadata(); 5310 } else { 5311 if (IDVal == ".hsa_code_object_version") 5312 return ParseDirectiveHSACodeObjectVersion(); 5313 5314 if (IDVal == ".hsa_code_object_isa") 5315 return ParseDirectiveHSACodeObjectISA(); 5316 5317 if (IDVal == ".amd_kernel_code_t") 5318 return ParseDirectiveAMDKernelCodeT(); 5319 5320 if (IDVal == ".amdgpu_hsa_kernel") 5321 return ParseDirectiveAMDGPUHsaKernel(); 5322 5323 if (IDVal == ".amd_amdgpu_isa") 5324 return ParseDirectiveISAVersion(); 5325 5326 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5327 return ParseDirectiveHSAMetadata(); 5328 } 5329 5330 if (IDVal == ".amdgcn_target") 5331 return ParseDirectiveAMDGCNTarget(); 5332 5333 if (IDVal == ".amdgpu_lds") 5334 return ParseDirectiveAMDGPULDS(); 5335 5336 if (IDVal == PALMD::AssemblerDirectiveBegin) 5337 return ParseDirectivePALMetadataBegin(); 5338 5339 if (IDVal == PALMD::AssemblerDirective) 5340 return ParseDirectivePALMetadata(); 5341 5342 return true; 5343 } 5344 5345 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5346 unsigned RegNo) { 5347 5348 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5349 return isGFX9Plus(); 5350 5351 // GFX10 has 2 more SGPRs 104 and 105. 5352 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5353 return hasSGPR104_SGPR105(); 5354 5355 switch (RegNo) { 5356 case AMDGPU::SRC_SHARED_BASE: 5357 case AMDGPU::SRC_SHARED_LIMIT: 5358 case AMDGPU::SRC_PRIVATE_BASE: 5359 case AMDGPU::SRC_PRIVATE_LIMIT: 5360 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5361 return isGFX9Plus(); 5362 case AMDGPU::TBA: 5363 case AMDGPU::TBA_LO: 5364 case AMDGPU::TBA_HI: 5365 case AMDGPU::TMA: 5366 case AMDGPU::TMA_LO: 5367 case AMDGPU::TMA_HI: 5368 return !isGFX9Plus(); 5369 case AMDGPU::XNACK_MASK: 5370 case AMDGPU::XNACK_MASK_LO: 5371 case AMDGPU::XNACK_MASK_HI: 5372 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5373 case AMDGPU::SGPR_NULL: 5374 return isGFX10Plus(); 5375 default: 5376 break; 5377 } 5378 5379 if (isCI()) 5380 return true; 5381 5382 if (isSI() || isGFX10Plus()) { 5383 // No flat_scr on SI. 5384 // On GFX10 flat scratch is not a valid register operand and can only be 5385 // accessed with s_setreg/s_getreg. 5386 switch (RegNo) { 5387 case AMDGPU::FLAT_SCR: 5388 case AMDGPU::FLAT_SCR_LO: 5389 case AMDGPU::FLAT_SCR_HI: 5390 return false; 5391 default: 5392 return true; 5393 } 5394 } 5395 5396 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5397 // SI/CI have. 5398 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5399 return hasSGPR102_SGPR103(); 5400 5401 return true; 5402 } 5403 5404 OperandMatchResultTy 5405 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5406 OperandMode Mode) { 5407 // Try to parse with a custom parser 5408 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5409 5410 // If we successfully parsed the operand or if there as an error parsing, 5411 // we are done. 5412 // 5413 // If we are parsing after we reach EndOfStatement then this means we 5414 // are appending default values to the Operands list. This is only done 5415 // by custom parser, so we shouldn't continue on to the generic parsing. 5416 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5417 isToken(AsmToken::EndOfStatement)) 5418 return ResTy; 5419 5420 SMLoc RBraceLoc; 5421 SMLoc LBraceLoc = getLoc(); 5422 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5423 unsigned Prefix = Operands.size(); 5424 5425 for (;;) { 5426 auto Loc = getLoc(); 5427 ResTy = parseReg(Operands); 5428 if (ResTy == MatchOperand_NoMatch) 5429 Error(Loc, "expected a register"); 5430 if (ResTy != MatchOperand_Success) 5431 return MatchOperand_ParseFail; 5432 5433 RBraceLoc = getLoc(); 5434 if (trySkipToken(AsmToken::RBrac)) 5435 break; 5436 5437 if (!skipToken(AsmToken::Comma, 5438 "expected a comma or a closing square bracket")) { 5439 return MatchOperand_ParseFail; 5440 } 5441 } 5442 5443 if (Operands.size() - Prefix > 1) { 5444 Operands.insert(Operands.begin() + Prefix, 5445 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5446 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5447 } 5448 5449 return MatchOperand_Success; 5450 } 5451 5452 return parseRegOrImm(Operands); 5453 } 5454 5455 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5456 // Clear any forced encodings from the previous instruction. 5457 setForcedEncodingSize(0); 5458 setForcedDPP(false); 5459 setForcedSDWA(false); 5460 5461 if (Name.endswith("_e64")) { 5462 setForcedEncodingSize(64); 5463 return Name.substr(0, Name.size() - 4); 5464 } else if (Name.endswith("_e32")) { 5465 setForcedEncodingSize(32); 5466 return Name.substr(0, Name.size() - 4); 5467 } else if (Name.endswith("_dpp")) { 5468 setForcedDPP(true); 5469 return Name.substr(0, Name.size() - 4); 5470 } else if (Name.endswith("_sdwa")) { 5471 setForcedSDWA(true); 5472 return Name.substr(0, Name.size() - 5); 5473 } 5474 return Name; 5475 } 5476 5477 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5478 StringRef Name, 5479 SMLoc NameLoc, OperandVector &Operands) { 5480 // Add the instruction mnemonic 5481 Name = parseMnemonicSuffix(Name); 5482 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5483 5484 bool IsMIMG = Name.startswith("image_"); 5485 5486 while (!trySkipToken(AsmToken::EndOfStatement)) { 5487 OperandMode Mode = OperandMode_Default; 5488 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5489 Mode = OperandMode_NSA; 5490 CPolSeen = 0; 5491 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5492 5493 if (Res != MatchOperand_Success) { 5494 checkUnsupportedInstruction(Name, NameLoc); 5495 if (!Parser.hasPendingError()) { 5496 // FIXME: use real operand location rather than the current location. 5497 StringRef Msg = 5498 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5499 "not a valid operand."; 5500 Error(getLoc(), Msg); 5501 } 5502 while (!trySkipToken(AsmToken::EndOfStatement)) { 5503 lex(); 5504 } 5505 return true; 5506 } 5507 5508 // Eat the comma or space if there is one. 5509 trySkipToken(AsmToken::Comma); 5510 } 5511 5512 return false; 5513 } 5514 5515 //===----------------------------------------------------------------------===// 5516 // Utility functions 5517 //===----------------------------------------------------------------------===// 5518 5519 OperandMatchResultTy 5520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5521 5522 if (!trySkipId(Prefix, AsmToken::Colon)) 5523 return MatchOperand_NoMatch; 5524 5525 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5526 } 5527 5528 OperandMatchResultTy 5529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5530 AMDGPUOperand::ImmTy ImmTy, 5531 bool (*ConvertResult)(int64_t&)) { 5532 SMLoc S = getLoc(); 5533 int64_t Value = 0; 5534 5535 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5536 if (Res != MatchOperand_Success) 5537 return Res; 5538 5539 if (ConvertResult && !ConvertResult(Value)) { 5540 Error(S, "invalid " + StringRef(Prefix) + " value."); 5541 } 5542 5543 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5544 return MatchOperand_Success; 5545 } 5546 5547 OperandMatchResultTy 5548 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5549 OperandVector &Operands, 5550 AMDGPUOperand::ImmTy ImmTy, 5551 bool (*ConvertResult)(int64_t&)) { 5552 SMLoc S = getLoc(); 5553 if (!trySkipId(Prefix, AsmToken::Colon)) 5554 return MatchOperand_NoMatch; 5555 5556 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5557 return MatchOperand_ParseFail; 5558 5559 unsigned Val = 0; 5560 const unsigned MaxSize = 4; 5561 5562 // FIXME: How to verify the number of elements matches the number of src 5563 // operands? 5564 for (int I = 0; ; ++I) { 5565 int64_t Op; 5566 SMLoc Loc = getLoc(); 5567 if (!parseExpr(Op)) 5568 return MatchOperand_ParseFail; 5569 5570 if (Op != 0 && Op != 1) { 5571 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5572 return MatchOperand_ParseFail; 5573 } 5574 5575 Val |= (Op << I); 5576 5577 if (trySkipToken(AsmToken::RBrac)) 5578 break; 5579 5580 if (I + 1 == MaxSize) { 5581 Error(getLoc(), "expected a closing square bracket"); 5582 return MatchOperand_ParseFail; 5583 } 5584 5585 if (!skipToken(AsmToken::Comma, "expected a comma")) 5586 return MatchOperand_ParseFail; 5587 } 5588 5589 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5590 return MatchOperand_Success; 5591 } 5592 5593 OperandMatchResultTy 5594 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5595 AMDGPUOperand::ImmTy ImmTy) { 5596 int64_t Bit; 5597 SMLoc S = getLoc(); 5598 5599 if (trySkipId(Name)) { 5600 Bit = 1; 5601 } else if (trySkipId("no", Name)) { 5602 Bit = 0; 5603 } else { 5604 return MatchOperand_NoMatch; 5605 } 5606 5607 if (Name == "r128" && !hasMIMG_R128()) { 5608 Error(S, "r128 modifier is not supported on this GPU"); 5609 return MatchOperand_ParseFail; 5610 } 5611 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5612 Error(S, "a16 modifier is not supported on this GPU"); 5613 return MatchOperand_ParseFail; 5614 } 5615 5616 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5617 ImmTy = AMDGPUOperand::ImmTyR128A16; 5618 5619 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5620 return MatchOperand_Success; 5621 } 5622 5623 OperandMatchResultTy 5624 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5625 unsigned CPolOn = 0; 5626 unsigned CPolOff = 0; 5627 SMLoc S = getLoc(); 5628 5629 if (trySkipId("glc")) 5630 CPolOn = AMDGPU::CPol::GLC; 5631 else if (trySkipId("noglc")) 5632 CPolOff = AMDGPU::CPol::GLC; 5633 else if (trySkipId("slc")) 5634 CPolOn = AMDGPU::CPol::SLC; 5635 else if (trySkipId("noslc")) 5636 CPolOff = AMDGPU::CPol::SLC; 5637 else if (trySkipId("dlc")) 5638 CPolOn = AMDGPU::CPol::DLC; 5639 else if (trySkipId("nodlc")) 5640 CPolOff = AMDGPU::CPol::DLC; 5641 else if (trySkipId("scc")) 5642 CPolOn = AMDGPU::CPol::SCC; 5643 else if (trySkipId("noscc")) 5644 CPolOff = AMDGPU::CPol::SCC; 5645 else 5646 return MatchOperand_NoMatch; 5647 5648 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5649 Error(S, "dlc modifier is not supported on this GPU"); 5650 return MatchOperand_ParseFail; 5651 } 5652 5653 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5654 Error(S, "scc modifier is not supported on this GPU"); 5655 return MatchOperand_ParseFail; 5656 } 5657 5658 if (CPolSeen & (CPolOn | CPolOff)) { 5659 Error(S, "duplicate cache policy modifier"); 5660 return MatchOperand_ParseFail; 5661 } 5662 5663 CPolSeen |= (CPolOn | CPolOff); 5664 5665 for (unsigned I = 1; I != Operands.size(); ++I) { 5666 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5667 if (Op.isCPol()) { 5668 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5669 return MatchOperand_Success; 5670 } 5671 } 5672 5673 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5674 AMDGPUOperand::ImmTyCPol)); 5675 5676 return MatchOperand_Success; 5677 } 5678 5679 static void addOptionalImmOperand( 5680 MCInst& Inst, const OperandVector& Operands, 5681 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5682 AMDGPUOperand::ImmTy ImmT, 5683 int64_t Default = 0) { 5684 auto i = OptionalIdx.find(ImmT); 5685 if (i != OptionalIdx.end()) { 5686 unsigned Idx = i->second; 5687 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5688 } else { 5689 Inst.addOperand(MCOperand::createImm(Default)); 5690 } 5691 } 5692 5693 OperandMatchResultTy 5694 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5695 StringRef &Value, 5696 SMLoc &StringLoc) { 5697 if (!trySkipId(Prefix, AsmToken::Colon)) 5698 return MatchOperand_NoMatch; 5699 5700 StringLoc = getLoc(); 5701 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5702 : MatchOperand_ParseFail; 5703 } 5704 5705 //===----------------------------------------------------------------------===// 5706 // MTBUF format 5707 //===----------------------------------------------------------------------===// 5708 5709 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5710 int64_t MaxVal, 5711 int64_t &Fmt) { 5712 int64_t Val; 5713 SMLoc Loc = getLoc(); 5714 5715 auto Res = parseIntWithPrefix(Pref, Val); 5716 if (Res == MatchOperand_ParseFail) 5717 return false; 5718 if (Res == MatchOperand_NoMatch) 5719 return true; 5720 5721 if (Val < 0 || Val > MaxVal) { 5722 Error(Loc, Twine("out of range ", StringRef(Pref))); 5723 return false; 5724 } 5725 5726 Fmt = Val; 5727 return true; 5728 } 5729 5730 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5731 // values to live in a joint format operand in the MCInst encoding. 5732 OperandMatchResultTy 5733 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5734 using namespace llvm::AMDGPU::MTBUFFormat; 5735 5736 int64_t Dfmt = DFMT_UNDEF; 5737 int64_t Nfmt = NFMT_UNDEF; 5738 5739 // dfmt and nfmt can appear in either order, and each is optional. 5740 for (int I = 0; I < 2; ++I) { 5741 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5742 return MatchOperand_ParseFail; 5743 5744 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5745 return MatchOperand_ParseFail; 5746 } 5747 // Skip optional comma between dfmt/nfmt 5748 // but guard against 2 commas following each other. 5749 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5750 !peekToken().is(AsmToken::Comma)) { 5751 trySkipToken(AsmToken::Comma); 5752 } 5753 } 5754 5755 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5756 return MatchOperand_NoMatch; 5757 5758 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5759 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5760 5761 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5762 return MatchOperand_Success; 5763 } 5764 5765 OperandMatchResultTy 5766 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5767 using namespace llvm::AMDGPU::MTBUFFormat; 5768 5769 int64_t Fmt = UFMT_UNDEF; 5770 5771 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5772 return MatchOperand_ParseFail; 5773 5774 if (Fmt == UFMT_UNDEF) 5775 return MatchOperand_NoMatch; 5776 5777 Format = Fmt; 5778 return MatchOperand_Success; 5779 } 5780 5781 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5782 int64_t &Nfmt, 5783 StringRef FormatStr, 5784 SMLoc Loc) { 5785 using namespace llvm::AMDGPU::MTBUFFormat; 5786 int64_t Format; 5787 5788 Format = getDfmt(FormatStr); 5789 if (Format != DFMT_UNDEF) { 5790 Dfmt = Format; 5791 return true; 5792 } 5793 5794 Format = getNfmt(FormatStr, getSTI()); 5795 if (Format != NFMT_UNDEF) { 5796 Nfmt = Format; 5797 return true; 5798 } 5799 5800 Error(Loc, "unsupported format"); 5801 return false; 5802 } 5803 5804 OperandMatchResultTy 5805 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5806 SMLoc FormatLoc, 5807 int64_t &Format) { 5808 using namespace llvm::AMDGPU::MTBUFFormat; 5809 5810 int64_t Dfmt = DFMT_UNDEF; 5811 int64_t Nfmt = NFMT_UNDEF; 5812 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5813 return MatchOperand_ParseFail; 5814 5815 if (trySkipToken(AsmToken::Comma)) { 5816 StringRef Str; 5817 SMLoc Loc = getLoc(); 5818 if (!parseId(Str, "expected a format string") || 5819 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5820 return MatchOperand_ParseFail; 5821 } 5822 if (Dfmt == DFMT_UNDEF) { 5823 Error(Loc, "duplicate numeric format"); 5824 return MatchOperand_ParseFail; 5825 } else if (Nfmt == NFMT_UNDEF) { 5826 Error(Loc, "duplicate data format"); 5827 return MatchOperand_ParseFail; 5828 } 5829 } 5830 5831 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5832 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5833 5834 if (isGFX10Plus()) { 5835 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5836 if (Ufmt == UFMT_UNDEF) { 5837 Error(FormatLoc, "unsupported format"); 5838 return MatchOperand_ParseFail; 5839 } 5840 Format = Ufmt; 5841 } else { 5842 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5843 } 5844 5845 return MatchOperand_Success; 5846 } 5847 5848 OperandMatchResultTy 5849 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5850 SMLoc Loc, 5851 int64_t &Format) { 5852 using namespace llvm::AMDGPU::MTBUFFormat; 5853 5854 auto Id = getUnifiedFormat(FormatStr); 5855 if (Id == UFMT_UNDEF) 5856 return MatchOperand_NoMatch; 5857 5858 if (!isGFX10Plus()) { 5859 Error(Loc, "unified format is not supported on this GPU"); 5860 return MatchOperand_ParseFail; 5861 } 5862 5863 Format = Id; 5864 return MatchOperand_Success; 5865 } 5866 5867 OperandMatchResultTy 5868 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5869 using namespace llvm::AMDGPU::MTBUFFormat; 5870 SMLoc Loc = getLoc(); 5871 5872 if (!parseExpr(Format)) 5873 return MatchOperand_ParseFail; 5874 if (!isValidFormatEncoding(Format, getSTI())) { 5875 Error(Loc, "out of range format"); 5876 return MatchOperand_ParseFail; 5877 } 5878 5879 return MatchOperand_Success; 5880 } 5881 5882 OperandMatchResultTy 5883 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5884 using namespace llvm::AMDGPU::MTBUFFormat; 5885 5886 if (!trySkipId("format", AsmToken::Colon)) 5887 return MatchOperand_NoMatch; 5888 5889 if (trySkipToken(AsmToken::LBrac)) { 5890 StringRef FormatStr; 5891 SMLoc Loc = getLoc(); 5892 if (!parseId(FormatStr, "expected a format string")) 5893 return MatchOperand_ParseFail; 5894 5895 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5896 if (Res == MatchOperand_NoMatch) 5897 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5898 if (Res != MatchOperand_Success) 5899 return Res; 5900 5901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5902 return MatchOperand_ParseFail; 5903 5904 return MatchOperand_Success; 5905 } 5906 5907 return parseNumericFormat(Format); 5908 } 5909 5910 OperandMatchResultTy 5911 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5912 using namespace llvm::AMDGPU::MTBUFFormat; 5913 5914 int64_t Format = getDefaultFormatEncoding(getSTI()); 5915 OperandMatchResultTy Res; 5916 SMLoc Loc = getLoc(); 5917 5918 // Parse legacy format syntax. 5919 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5920 if (Res == MatchOperand_ParseFail) 5921 return Res; 5922 5923 bool FormatFound = (Res == MatchOperand_Success); 5924 5925 Operands.push_back( 5926 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5927 5928 if (FormatFound) 5929 trySkipToken(AsmToken::Comma); 5930 5931 if (isToken(AsmToken::EndOfStatement)) { 5932 // We are expecting an soffset operand, 5933 // but let matcher handle the error. 5934 return MatchOperand_Success; 5935 } 5936 5937 // Parse soffset. 5938 Res = parseRegOrImm(Operands); 5939 if (Res != MatchOperand_Success) 5940 return Res; 5941 5942 trySkipToken(AsmToken::Comma); 5943 5944 if (!FormatFound) { 5945 Res = parseSymbolicOrNumericFormat(Format); 5946 if (Res == MatchOperand_ParseFail) 5947 return Res; 5948 if (Res == MatchOperand_Success) { 5949 auto Size = Operands.size(); 5950 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5951 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5952 Op.setImm(Format); 5953 } 5954 return MatchOperand_Success; 5955 } 5956 5957 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5958 Error(getLoc(), "duplicate format"); 5959 return MatchOperand_ParseFail; 5960 } 5961 return MatchOperand_Success; 5962 } 5963 5964 //===----------------------------------------------------------------------===// 5965 // ds 5966 //===----------------------------------------------------------------------===// 5967 5968 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5969 const OperandVector &Operands) { 5970 OptionalImmIndexMap OptionalIdx; 5971 5972 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5974 5975 // Add the register arguments 5976 if (Op.isReg()) { 5977 Op.addRegOperands(Inst, 1); 5978 continue; 5979 } 5980 5981 // Handle optional arguments 5982 OptionalIdx[Op.getImmTy()] = i; 5983 } 5984 5985 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5986 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5988 5989 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5990 } 5991 5992 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5993 bool IsGdsHardcoded) { 5994 OptionalImmIndexMap OptionalIdx; 5995 5996 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5998 5999 // Add the register arguments 6000 if (Op.isReg()) { 6001 Op.addRegOperands(Inst, 1); 6002 continue; 6003 } 6004 6005 if (Op.isToken() && Op.getToken() == "gds") { 6006 IsGdsHardcoded = true; 6007 continue; 6008 } 6009 6010 // Handle optional arguments 6011 OptionalIdx[Op.getImmTy()] = i; 6012 } 6013 6014 AMDGPUOperand::ImmTy OffsetType = 6015 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6016 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6017 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6018 AMDGPUOperand::ImmTyOffset; 6019 6020 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6021 6022 if (!IsGdsHardcoded) { 6023 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6024 } 6025 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6026 } 6027 6028 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6029 OptionalImmIndexMap OptionalIdx; 6030 6031 unsigned OperandIdx[4]; 6032 unsigned EnMask = 0; 6033 int SrcIdx = 0; 6034 6035 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6036 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6037 6038 // Add the register arguments 6039 if (Op.isReg()) { 6040 assert(SrcIdx < 4); 6041 OperandIdx[SrcIdx] = Inst.size(); 6042 Op.addRegOperands(Inst, 1); 6043 ++SrcIdx; 6044 continue; 6045 } 6046 6047 if (Op.isOff()) { 6048 assert(SrcIdx < 4); 6049 OperandIdx[SrcIdx] = Inst.size(); 6050 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6051 ++SrcIdx; 6052 continue; 6053 } 6054 6055 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6056 Op.addImmOperands(Inst, 1); 6057 continue; 6058 } 6059 6060 if (Op.isToken() && Op.getToken() == "done") 6061 continue; 6062 6063 // Handle optional arguments 6064 OptionalIdx[Op.getImmTy()] = i; 6065 } 6066 6067 assert(SrcIdx == 4); 6068 6069 bool Compr = false; 6070 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6071 Compr = true; 6072 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6073 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6074 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6075 } 6076 6077 for (auto i = 0; i < SrcIdx; ++i) { 6078 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6079 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6080 } 6081 } 6082 6083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6085 6086 Inst.addOperand(MCOperand::createImm(EnMask)); 6087 } 6088 6089 //===----------------------------------------------------------------------===// 6090 // s_waitcnt 6091 //===----------------------------------------------------------------------===// 6092 6093 static bool 6094 encodeCnt( 6095 const AMDGPU::IsaVersion ISA, 6096 int64_t &IntVal, 6097 int64_t CntVal, 6098 bool Saturate, 6099 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6100 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6101 { 6102 bool Failed = false; 6103 6104 IntVal = encode(ISA, IntVal, CntVal); 6105 if (CntVal != decode(ISA, IntVal)) { 6106 if (Saturate) { 6107 IntVal = encode(ISA, IntVal, -1); 6108 } else { 6109 Failed = true; 6110 } 6111 } 6112 return Failed; 6113 } 6114 6115 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6116 6117 SMLoc CntLoc = getLoc(); 6118 StringRef CntName = getTokenStr(); 6119 6120 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6121 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6122 return false; 6123 6124 int64_t CntVal; 6125 SMLoc ValLoc = getLoc(); 6126 if (!parseExpr(CntVal)) 6127 return false; 6128 6129 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6130 6131 bool Failed = true; 6132 bool Sat = CntName.endswith("_sat"); 6133 6134 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6135 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6136 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6137 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6138 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6139 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6140 } else { 6141 Error(CntLoc, "invalid counter name " + CntName); 6142 return false; 6143 } 6144 6145 if (Failed) { 6146 Error(ValLoc, "too large value for " + CntName); 6147 return false; 6148 } 6149 6150 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6151 return false; 6152 6153 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6154 if (isToken(AsmToken::EndOfStatement)) { 6155 Error(getLoc(), "expected a counter name"); 6156 return false; 6157 } 6158 } 6159 6160 return true; 6161 } 6162 6163 OperandMatchResultTy 6164 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6165 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6166 int64_t Waitcnt = getWaitcntBitMask(ISA); 6167 SMLoc S = getLoc(); 6168 6169 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6170 while (!isToken(AsmToken::EndOfStatement)) { 6171 if (!parseCnt(Waitcnt)) 6172 return MatchOperand_ParseFail; 6173 } 6174 } else { 6175 if (!parseExpr(Waitcnt)) 6176 return MatchOperand_ParseFail; 6177 } 6178 6179 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6180 return MatchOperand_Success; 6181 } 6182 6183 bool 6184 AMDGPUOperand::isSWaitCnt() const { 6185 return isImm(); 6186 } 6187 6188 //===----------------------------------------------------------------------===// 6189 // hwreg 6190 //===----------------------------------------------------------------------===// 6191 6192 bool 6193 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6194 OperandInfoTy &Offset, 6195 OperandInfoTy &Width) { 6196 using namespace llvm::AMDGPU::Hwreg; 6197 6198 // The register may be specified by name or using a numeric code 6199 HwReg.Loc = getLoc(); 6200 if (isToken(AsmToken::Identifier) && 6201 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) { 6202 HwReg.IsSymbolic = true; 6203 lex(); // skip register name 6204 } else if (!parseExpr(HwReg.Id, "a register name")) { 6205 return false; 6206 } 6207 6208 if (trySkipToken(AsmToken::RParen)) 6209 return true; 6210 6211 // parse optional params 6212 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6213 return false; 6214 6215 Offset.Loc = getLoc(); 6216 if (!parseExpr(Offset.Id)) 6217 return false; 6218 6219 if (!skipToken(AsmToken::Comma, "expected a comma")) 6220 return false; 6221 6222 Width.Loc = getLoc(); 6223 return parseExpr(Width.Id) && 6224 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6225 } 6226 6227 bool 6228 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6229 const OperandInfoTy &Offset, 6230 const OperandInfoTy &Width) { 6231 6232 using namespace llvm::AMDGPU::Hwreg; 6233 6234 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6235 Error(HwReg.Loc, 6236 "specified hardware register is not supported on this GPU"); 6237 return false; 6238 } 6239 if (!isValidHwreg(HwReg.Id)) { 6240 Error(HwReg.Loc, 6241 "invalid code of hardware register: only 6-bit values are legal"); 6242 return false; 6243 } 6244 if (!isValidHwregOffset(Offset.Id)) { 6245 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6246 return false; 6247 } 6248 if (!isValidHwregWidth(Width.Id)) { 6249 Error(Width.Loc, 6250 "invalid bitfield width: only values from 1 to 32 are legal"); 6251 return false; 6252 } 6253 return true; 6254 } 6255 6256 OperandMatchResultTy 6257 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6258 using namespace llvm::AMDGPU::Hwreg; 6259 6260 int64_t ImmVal = 0; 6261 SMLoc Loc = getLoc(); 6262 6263 if (trySkipId("hwreg", AsmToken::LParen)) { 6264 OperandInfoTy HwReg(ID_UNKNOWN_); 6265 OperandInfoTy Offset(OFFSET_DEFAULT_); 6266 OperandInfoTy Width(WIDTH_DEFAULT_); 6267 if (parseHwregBody(HwReg, Offset, Width) && 6268 validateHwreg(HwReg, Offset, Width)) { 6269 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6270 } else { 6271 return MatchOperand_ParseFail; 6272 } 6273 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6274 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6275 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6276 return MatchOperand_ParseFail; 6277 } 6278 } else { 6279 return MatchOperand_ParseFail; 6280 } 6281 6282 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6283 return MatchOperand_Success; 6284 } 6285 6286 bool AMDGPUOperand::isHwreg() const { 6287 return isImmTy(ImmTyHwreg); 6288 } 6289 6290 //===----------------------------------------------------------------------===// 6291 // sendmsg 6292 //===----------------------------------------------------------------------===// 6293 6294 bool 6295 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6296 OperandInfoTy &Op, 6297 OperandInfoTy &Stream) { 6298 using namespace llvm::AMDGPU::SendMsg; 6299 6300 Msg.Loc = getLoc(); 6301 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6302 Msg.IsSymbolic = true; 6303 lex(); // skip message name 6304 } else if (!parseExpr(Msg.Id, "a message name")) { 6305 return false; 6306 } 6307 6308 if (trySkipToken(AsmToken::Comma)) { 6309 Op.IsDefined = true; 6310 Op.Loc = getLoc(); 6311 if (isToken(AsmToken::Identifier) && 6312 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6313 lex(); // skip operation name 6314 } else if (!parseExpr(Op.Id, "an operation name")) { 6315 return false; 6316 } 6317 6318 if (trySkipToken(AsmToken::Comma)) { 6319 Stream.IsDefined = true; 6320 Stream.Loc = getLoc(); 6321 if (!parseExpr(Stream.Id)) 6322 return false; 6323 } 6324 } 6325 6326 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6327 } 6328 6329 bool 6330 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6331 const OperandInfoTy &Op, 6332 const OperandInfoTy &Stream) { 6333 using namespace llvm::AMDGPU::SendMsg; 6334 6335 // Validation strictness depends on whether message is specified 6336 // in a symbolic or in a numeric form. In the latter case 6337 // only encoding possibility is checked. 6338 bool Strict = Msg.IsSymbolic; 6339 6340 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6341 Error(Msg.Loc, "invalid message id"); 6342 return false; 6343 } 6344 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6345 if (Op.IsDefined) { 6346 Error(Op.Loc, "message does not support operations"); 6347 } else { 6348 Error(Msg.Loc, "missing message operation"); 6349 } 6350 return false; 6351 } 6352 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6353 Error(Op.Loc, "invalid operation id"); 6354 return false; 6355 } 6356 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6357 Error(Stream.Loc, "message operation does not support streams"); 6358 return false; 6359 } 6360 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6361 Error(Stream.Loc, "invalid message stream id"); 6362 return false; 6363 } 6364 return true; 6365 } 6366 6367 OperandMatchResultTy 6368 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6369 using namespace llvm::AMDGPU::SendMsg; 6370 6371 int64_t ImmVal = 0; 6372 SMLoc Loc = getLoc(); 6373 6374 if (trySkipId("sendmsg", AsmToken::LParen)) { 6375 OperandInfoTy Msg(ID_UNKNOWN_); 6376 OperandInfoTy Op(OP_NONE_); 6377 OperandInfoTy Stream(STREAM_ID_NONE_); 6378 if (parseSendMsgBody(Msg, Op, Stream) && 6379 validateSendMsg(Msg, Op, Stream)) { 6380 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6381 } else { 6382 return MatchOperand_ParseFail; 6383 } 6384 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6385 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6386 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6387 return MatchOperand_ParseFail; 6388 } 6389 } else { 6390 return MatchOperand_ParseFail; 6391 } 6392 6393 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6394 return MatchOperand_Success; 6395 } 6396 6397 bool AMDGPUOperand::isSendMsg() const { 6398 return isImmTy(ImmTySendMsg); 6399 } 6400 6401 //===----------------------------------------------------------------------===// 6402 // v_interp 6403 //===----------------------------------------------------------------------===// 6404 6405 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6406 StringRef Str; 6407 SMLoc S = getLoc(); 6408 6409 if (!parseId(Str)) 6410 return MatchOperand_NoMatch; 6411 6412 int Slot = StringSwitch<int>(Str) 6413 .Case("p10", 0) 6414 .Case("p20", 1) 6415 .Case("p0", 2) 6416 .Default(-1); 6417 6418 if (Slot == -1) { 6419 Error(S, "invalid interpolation slot"); 6420 return MatchOperand_ParseFail; 6421 } 6422 6423 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6424 AMDGPUOperand::ImmTyInterpSlot)); 6425 return MatchOperand_Success; 6426 } 6427 6428 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6429 StringRef Str; 6430 SMLoc S = getLoc(); 6431 6432 if (!parseId(Str)) 6433 return MatchOperand_NoMatch; 6434 6435 if (!Str.startswith("attr")) { 6436 Error(S, "invalid interpolation attribute"); 6437 return MatchOperand_ParseFail; 6438 } 6439 6440 StringRef Chan = Str.take_back(2); 6441 int AttrChan = StringSwitch<int>(Chan) 6442 .Case(".x", 0) 6443 .Case(".y", 1) 6444 .Case(".z", 2) 6445 .Case(".w", 3) 6446 .Default(-1); 6447 if (AttrChan == -1) { 6448 Error(S, "invalid or missing interpolation attribute channel"); 6449 return MatchOperand_ParseFail; 6450 } 6451 6452 Str = Str.drop_back(2).drop_front(4); 6453 6454 uint8_t Attr; 6455 if (Str.getAsInteger(10, Attr)) { 6456 Error(S, "invalid or missing interpolation attribute number"); 6457 return MatchOperand_ParseFail; 6458 } 6459 6460 if (Attr > 63) { 6461 Error(S, "out of bounds interpolation attribute number"); 6462 return MatchOperand_ParseFail; 6463 } 6464 6465 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6466 6467 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6468 AMDGPUOperand::ImmTyInterpAttr)); 6469 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6470 AMDGPUOperand::ImmTyAttrChan)); 6471 return MatchOperand_Success; 6472 } 6473 6474 //===----------------------------------------------------------------------===// 6475 // exp 6476 //===----------------------------------------------------------------------===// 6477 6478 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6479 using namespace llvm::AMDGPU::Exp; 6480 6481 StringRef Str; 6482 SMLoc S = getLoc(); 6483 6484 if (!parseId(Str)) 6485 return MatchOperand_NoMatch; 6486 6487 unsigned Id = getTgtId(Str); 6488 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6489 Error(S, (Id == ET_INVALID) ? 6490 "invalid exp target" : 6491 "exp target is not supported on this GPU"); 6492 return MatchOperand_ParseFail; 6493 } 6494 6495 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6496 AMDGPUOperand::ImmTyExpTgt)); 6497 return MatchOperand_Success; 6498 } 6499 6500 //===----------------------------------------------------------------------===// 6501 // parser helpers 6502 //===----------------------------------------------------------------------===// 6503 6504 bool 6505 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6506 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6507 } 6508 6509 bool 6510 AMDGPUAsmParser::isId(const StringRef Id) const { 6511 return isId(getToken(), Id); 6512 } 6513 6514 bool 6515 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6516 return getTokenKind() == Kind; 6517 } 6518 6519 bool 6520 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6521 if (isId(Id)) { 6522 lex(); 6523 return true; 6524 } 6525 return false; 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6530 if (isToken(AsmToken::Identifier)) { 6531 StringRef Tok = getTokenStr(); 6532 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6533 lex(); 6534 return true; 6535 } 6536 } 6537 return false; 6538 } 6539 6540 bool 6541 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6542 if (isId(Id) && peekToken().is(Kind)) { 6543 lex(); 6544 lex(); 6545 return true; 6546 } 6547 return false; 6548 } 6549 6550 bool 6551 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6552 if (isToken(Kind)) { 6553 lex(); 6554 return true; 6555 } 6556 return false; 6557 } 6558 6559 bool 6560 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6561 const StringRef ErrMsg) { 6562 if (!trySkipToken(Kind)) { 6563 Error(getLoc(), ErrMsg); 6564 return false; 6565 } 6566 return true; 6567 } 6568 6569 bool 6570 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6571 SMLoc S = getLoc(); 6572 6573 const MCExpr *Expr; 6574 if (Parser.parseExpression(Expr)) 6575 return false; 6576 6577 if (Expr->evaluateAsAbsolute(Imm)) 6578 return true; 6579 6580 if (Expected.empty()) { 6581 Error(S, "expected absolute expression"); 6582 } else { 6583 Error(S, Twine("expected ", Expected) + 6584 Twine(" or an absolute expression")); 6585 } 6586 return false; 6587 } 6588 6589 bool 6590 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6591 SMLoc S = getLoc(); 6592 6593 const MCExpr *Expr; 6594 if (Parser.parseExpression(Expr)) 6595 return false; 6596 6597 int64_t IntVal; 6598 if (Expr->evaluateAsAbsolute(IntVal)) { 6599 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6600 } else { 6601 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6602 } 6603 return true; 6604 } 6605 6606 bool 6607 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6608 if (isToken(AsmToken::String)) { 6609 Val = getToken().getStringContents(); 6610 lex(); 6611 return true; 6612 } else { 6613 Error(getLoc(), ErrMsg); 6614 return false; 6615 } 6616 } 6617 6618 bool 6619 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6620 if (isToken(AsmToken::Identifier)) { 6621 Val = getTokenStr(); 6622 lex(); 6623 return true; 6624 } else { 6625 if (!ErrMsg.empty()) 6626 Error(getLoc(), ErrMsg); 6627 return false; 6628 } 6629 } 6630 6631 AsmToken 6632 AMDGPUAsmParser::getToken() const { 6633 return Parser.getTok(); 6634 } 6635 6636 AsmToken 6637 AMDGPUAsmParser::peekToken() { 6638 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6639 } 6640 6641 void 6642 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6643 auto TokCount = getLexer().peekTokens(Tokens); 6644 6645 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6646 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6647 } 6648 6649 AsmToken::TokenKind 6650 AMDGPUAsmParser::getTokenKind() const { 6651 return getLexer().getKind(); 6652 } 6653 6654 SMLoc 6655 AMDGPUAsmParser::getLoc() const { 6656 return getToken().getLoc(); 6657 } 6658 6659 StringRef 6660 AMDGPUAsmParser::getTokenStr() const { 6661 return getToken().getString(); 6662 } 6663 6664 void 6665 AMDGPUAsmParser::lex() { 6666 Parser.Lex(); 6667 } 6668 6669 SMLoc 6670 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6671 const OperandVector &Operands) const { 6672 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6673 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6674 if (Test(Op)) 6675 return Op.getStartLoc(); 6676 } 6677 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6678 } 6679 6680 SMLoc 6681 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6682 const OperandVector &Operands) const { 6683 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6684 return getOperandLoc(Test, Operands); 6685 } 6686 6687 SMLoc 6688 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6689 const OperandVector &Operands) const { 6690 auto Test = [=](const AMDGPUOperand& Op) { 6691 return Op.isRegKind() && Op.getReg() == Reg; 6692 }; 6693 return getOperandLoc(Test, Operands); 6694 } 6695 6696 SMLoc 6697 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6698 auto Test = [](const AMDGPUOperand& Op) { 6699 return Op.IsImmKindLiteral() || Op.isExpr(); 6700 }; 6701 return getOperandLoc(Test, Operands); 6702 } 6703 6704 SMLoc 6705 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6706 auto Test = [](const AMDGPUOperand& Op) { 6707 return Op.isImmKindConst(); 6708 }; 6709 return getOperandLoc(Test, Operands); 6710 } 6711 6712 //===----------------------------------------------------------------------===// 6713 // swizzle 6714 //===----------------------------------------------------------------------===// 6715 6716 LLVM_READNONE 6717 static unsigned 6718 encodeBitmaskPerm(const unsigned AndMask, 6719 const unsigned OrMask, 6720 const unsigned XorMask) { 6721 using namespace llvm::AMDGPU::Swizzle; 6722 6723 return BITMASK_PERM_ENC | 6724 (AndMask << BITMASK_AND_SHIFT) | 6725 (OrMask << BITMASK_OR_SHIFT) | 6726 (XorMask << BITMASK_XOR_SHIFT); 6727 } 6728 6729 bool 6730 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6731 const unsigned MinVal, 6732 const unsigned MaxVal, 6733 const StringRef ErrMsg, 6734 SMLoc &Loc) { 6735 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6736 return false; 6737 } 6738 Loc = getLoc(); 6739 if (!parseExpr(Op)) { 6740 return false; 6741 } 6742 if (Op < MinVal || Op > MaxVal) { 6743 Error(Loc, ErrMsg); 6744 return false; 6745 } 6746 6747 return true; 6748 } 6749 6750 bool 6751 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6752 const unsigned MinVal, 6753 const unsigned MaxVal, 6754 const StringRef ErrMsg) { 6755 SMLoc Loc; 6756 for (unsigned i = 0; i < OpNum; ++i) { 6757 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6758 return false; 6759 } 6760 6761 return true; 6762 } 6763 6764 bool 6765 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6766 using namespace llvm::AMDGPU::Swizzle; 6767 6768 int64_t Lane[LANE_NUM]; 6769 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6770 "expected a 2-bit lane id")) { 6771 Imm = QUAD_PERM_ENC; 6772 for (unsigned I = 0; I < LANE_NUM; ++I) { 6773 Imm |= Lane[I] << (LANE_SHIFT * I); 6774 } 6775 return true; 6776 } 6777 return false; 6778 } 6779 6780 bool 6781 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6782 using namespace llvm::AMDGPU::Swizzle; 6783 6784 SMLoc Loc; 6785 int64_t GroupSize; 6786 int64_t LaneIdx; 6787 6788 if (!parseSwizzleOperand(GroupSize, 6789 2, 32, 6790 "group size must be in the interval [2,32]", 6791 Loc)) { 6792 return false; 6793 } 6794 if (!isPowerOf2_64(GroupSize)) { 6795 Error(Loc, "group size must be a power of two"); 6796 return false; 6797 } 6798 if (parseSwizzleOperand(LaneIdx, 6799 0, GroupSize - 1, 6800 "lane id must be in the interval [0,group size - 1]", 6801 Loc)) { 6802 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6803 return true; 6804 } 6805 return false; 6806 } 6807 6808 bool 6809 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6810 using namespace llvm::AMDGPU::Swizzle; 6811 6812 SMLoc Loc; 6813 int64_t GroupSize; 6814 6815 if (!parseSwizzleOperand(GroupSize, 6816 2, 32, 6817 "group size must be in the interval [2,32]", 6818 Loc)) { 6819 return false; 6820 } 6821 if (!isPowerOf2_64(GroupSize)) { 6822 Error(Loc, "group size must be a power of two"); 6823 return false; 6824 } 6825 6826 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6827 return true; 6828 } 6829 6830 bool 6831 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6832 using namespace llvm::AMDGPU::Swizzle; 6833 6834 SMLoc Loc; 6835 int64_t GroupSize; 6836 6837 if (!parseSwizzleOperand(GroupSize, 6838 1, 16, 6839 "group size must be in the interval [1,16]", 6840 Loc)) { 6841 return false; 6842 } 6843 if (!isPowerOf2_64(GroupSize)) { 6844 Error(Loc, "group size must be a power of two"); 6845 return false; 6846 } 6847 6848 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6849 return true; 6850 } 6851 6852 bool 6853 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6854 using namespace llvm::AMDGPU::Swizzle; 6855 6856 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6857 return false; 6858 } 6859 6860 StringRef Ctl; 6861 SMLoc StrLoc = getLoc(); 6862 if (!parseString(Ctl)) { 6863 return false; 6864 } 6865 if (Ctl.size() != BITMASK_WIDTH) { 6866 Error(StrLoc, "expected a 5-character mask"); 6867 return false; 6868 } 6869 6870 unsigned AndMask = 0; 6871 unsigned OrMask = 0; 6872 unsigned XorMask = 0; 6873 6874 for (size_t i = 0; i < Ctl.size(); ++i) { 6875 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6876 switch(Ctl[i]) { 6877 default: 6878 Error(StrLoc, "invalid mask"); 6879 return false; 6880 case '0': 6881 break; 6882 case '1': 6883 OrMask |= Mask; 6884 break; 6885 case 'p': 6886 AndMask |= Mask; 6887 break; 6888 case 'i': 6889 AndMask |= Mask; 6890 XorMask |= Mask; 6891 break; 6892 } 6893 } 6894 6895 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6896 return true; 6897 } 6898 6899 bool 6900 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6901 6902 SMLoc OffsetLoc = getLoc(); 6903 6904 if (!parseExpr(Imm, "a swizzle macro")) { 6905 return false; 6906 } 6907 if (!isUInt<16>(Imm)) { 6908 Error(OffsetLoc, "expected a 16-bit offset"); 6909 return false; 6910 } 6911 return true; 6912 } 6913 6914 bool 6915 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6916 using namespace llvm::AMDGPU::Swizzle; 6917 6918 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6919 6920 SMLoc ModeLoc = getLoc(); 6921 bool Ok = false; 6922 6923 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6924 Ok = parseSwizzleQuadPerm(Imm); 6925 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6926 Ok = parseSwizzleBitmaskPerm(Imm); 6927 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6928 Ok = parseSwizzleBroadcast(Imm); 6929 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6930 Ok = parseSwizzleSwap(Imm); 6931 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6932 Ok = parseSwizzleReverse(Imm); 6933 } else { 6934 Error(ModeLoc, "expected a swizzle mode"); 6935 } 6936 6937 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6938 } 6939 6940 return false; 6941 } 6942 6943 OperandMatchResultTy 6944 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6945 SMLoc S = getLoc(); 6946 int64_t Imm = 0; 6947 6948 if (trySkipId("offset")) { 6949 6950 bool Ok = false; 6951 if (skipToken(AsmToken::Colon, "expected a colon")) { 6952 if (trySkipId("swizzle")) { 6953 Ok = parseSwizzleMacro(Imm); 6954 } else { 6955 Ok = parseSwizzleOffset(Imm); 6956 } 6957 } 6958 6959 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6960 6961 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6962 } else { 6963 // Swizzle "offset" operand is optional. 6964 // If it is omitted, try parsing other optional operands. 6965 return parseOptionalOpr(Operands); 6966 } 6967 } 6968 6969 bool 6970 AMDGPUOperand::isSwizzle() const { 6971 return isImmTy(ImmTySwizzle); 6972 } 6973 6974 //===----------------------------------------------------------------------===// 6975 // VGPR Index Mode 6976 //===----------------------------------------------------------------------===// 6977 6978 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6979 6980 using namespace llvm::AMDGPU::VGPRIndexMode; 6981 6982 if (trySkipToken(AsmToken::RParen)) { 6983 return OFF; 6984 } 6985 6986 int64_t Imm = 0; 6987 6988 while (true) { 6989 unsigned Mode = 0; 6990 SMLoc S = getLoc(); 6991 6992 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6993 if (trySkipId(IdSymbolic[ModeId])) { 6994 Mode = 1 << ModeId; 6995 break; 6996 } 6997 } 6998 6999 if (Mode == 0) { 7000 Error(S, (Imm == 0)? 7001 "expected a VGPR index mode or a closing parenthesis" : 7002 "expected a VGPR index mode"); 7003 return UNDEF; 7004 } 7005 7006 if (Imm & Mode) { 7007 Error(S, "duplicate VGPR index mode"); 7008 return UNDEF; 7009 } 7010 Imm |= Mode; 7011 7012 if (trySkipToken(AsmToken::RParen)) 7013 break; 7014 if (!skipToken(AsmToken::Comma, 7015 "expected a comma or a closing parenthesis")) 7016 return UNDEF; 7017 } 7018 7019 return Imm; 7020 } 7021 7022 OperandMatchResultTy 7023 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7024 7025 using namespace llvm::AMDGPU::VGPRIndexMode; 7026 7027 int64_t Imm = 0; 7028 SMLoc S = getLoc(); 7029 7030 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7031 Imm = parseGPRIdxMacro(); 7032 if (Imm == UNDEF) 7033 return MatchOperand_ParseFail; 7034 } else { 7035 if (getParser().parseAbsoluteExpression(Imm)) 7036 return MatchOperand_ParseFail; 7037 if (Imm < 0 || !isUInt<4>(Imm)) { 7038 Error(S, "invalid immediate: only 4-bit values are legal"); 7039 return MatchOperand_ParseFail; 7040 } 7041 } 7042 7043 Operands.push_back( 7044 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7045 return MatchOperand_Success; 7046 } 7047 7048 bool AMDGPUOperand::isGPRIdxMode() const { 7049 return isImmTy(ImmTyGprIdxMode); 7050 } 7051 7052 //===----------------------------------------------------------------------===// 7053 // sopp branch targets 7054 //===----------------------------------------------------------------------===// 7055 7056 OperandMatchResultTy 7057 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7058 7059 // Make sure we are not parsing something 7060 // that looks like a label or an expression but is not. 7061 // This will improve error messages. 7062 if (isRegister() || isModifier()) 7063 return MatchOperand_NoMatch; 7064 7065 if (!parseExpr(Operands)) 7066 return MatchOperand_ParseFail; 7067 7068 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7069 assert(Opr.isImm() || Opr.isExpr()); 7070 SMLoc Loc = Opr.getStartLoc(); 7071 7072 // Currently we do not support arbitrary expressions as branch targets. 7073 // Only labels and absolute expressions are accepted. 7074 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7075 Error(Loc, "expected an absolute expression or a label"); 7076 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7077 Error(Loc, "expected a 16-bit signed jump offset"); 7078 } 7079 7080 return MatchOperand_Success; 7081 } 7082 7083 //===----------------------------------------------------------------------===// 7084 // Boolean holding registers 7085 //===----------------------------------------------------------------------===// 7086 7087 OperandMatchResultTy 7088 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7089 return parseReg(Operands); 7090 } 7091 7092 //===----------------------------------------------------------------------===// 7093 // mubuf 7094 //===----------------------------------------------------------------------===// 7095 7096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7097 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7098 } 7099 7100 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7101 const OperandVector &Operands, 7102 bool IsAtomic, 7103 bool IsLds) { 7104 bool IsLdsOpcode = IsLds; 7105 bool HasLdsModifier = false; 7106 OptionalImmIndexMap OptionalIdx; 7107 unsigned FirstOperandIdx = 1; 7108 bool IsAtomicReturn = false; 7109 7110 if (IsAtomic) { 7111 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7112 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7113 if (!Op.isCPol()) 7114 continue; 7115 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7116 break; 7117 } 7118 7119 if (!IsAtomicReturn) { 7120 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7121 if (NewOpc != -1) 7122 Inst.setOpcode(NewOpc); 7123 } 7124 7125 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7126 SIInstrFlags::IsAtomicRet; 7127 } 7128 7129 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7130 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7131 7132 // Add the register arguments 7133 if (Op.isReg()) { 7134 Op.addRegOperands(Inst, 1); 7135 // Insert a tied src for atomic return dst. 7136 // This cannot be postponed as subsequent calls to 7137 // addImmOperands rely on correct number of MC operands. 7138 if (IsAtomicReturn && i == FirstOperandIdx) 7139 Op.addRegOperands(Inst, 1); 7140 continue; 7141 } 7142 7143 // Handle the case where soffset is an immediate 7144 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7145 Op.addImmOperands(Inst, 1); 7146 continue; 7147 } 7148 7149 HasLdsModifier |= Op.isLDS(); 7150 7151 // Handle tokens like 'offen' which are sometimes hard-coded into the 7152 // asm string. There are no MCInst operands for these. 7153 if (Op.isToken()) { 7154 continue; 7155 } 7156 assert(Op.isImm()); 7157 7158 // Handle optional arguments 7159 OptionalIdx[Op.getImmTy()] = i; 7160 } 7161 7162 // This is a workaround for an llvm quirk which may result in an 7163 // incorrect instruction selection. Lds and non-lds versions of 7164 // MUBUF instructions are identical except that lds versions 7165 // have mandatory 'lds' modifier. However this modifier follows 7166 // optional modifiers and llvm asm matcher regards this 'lds' 7167 // modifier as an optional one. As a result, an lds version 7168 // of opcode may be selected even if it has no 'lds' modifier. 7169 if (IsLdsOpcode && !HasLdsModifier) { 7170 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7171 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7172 Inst.setOpcode(NoLdsOpcode); 7173 IsLdsOpcode = false; 7174 } 7175 } 7176 7177 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7179 7180 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7182 } 7183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7184 } 7185 7186 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7187 OptionalImmIndexMap OptionalIdx; 7188 7189 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7190 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7191 7192 // Add the register arguments 7193 if (Op.isReg()) { 7194 Op.addRegOperands(Inst, 1); 7195 continue; 7196 } 7197 7198 // Handle the case where soffset is an immediate 7199 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7200 Op.addImmOperands(Inst, 1); 7201 continue; 7202 } 7203 7204 // Handle tokens like 'offen' which are sometimes hard-coded into the 7205 // asm string. There are no MCInst operands for these. 7206 if (Op.isToken()) { 7207 continue; 7208 } 7209 assert(Op.isImm()); 7210 7211 // Handle optional arguments 7212 OptionalIdx[Op.getImmTy()] = i; 7213 } 7214 7215 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7216 AMDGPUOperand::ImmTyOffset); 7217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7221 } 7222 7223 //===----------------------------------------------------------------------===// 7224 // mimg 7225 //===----------------------------------------------------------------------===// 7226 7227 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7228 bool IsAtomic) { 7229 unsigned I = 1; 7230 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7231 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7232 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7233 } 7234 7235 if (IsAtomic) { 7236 // Add src, same as dst 7237 assert(Desc.getNumDefs() == 1); 7238 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7239 } 7240 7241 OptionalImmIndexMap OptionalIdx; 7242 7243 for (unsigned E = Operands.size(); I != E; ++I) { 7244 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7245 7246 // Add the register arguments 7247 if (Op.isReg()) { 7248 Op.addRegOperands(Inst, 1); 7249 } else if (Op.isImmModifier()) { 7250 OptionalIdx[Op.getImmTy()] = I; 7251 } else if (!Op.isToken()) { 7252 llvm_unreachable("unexpected operand type"); 7253 } 7254 } 7255 7256 bool IsGFX10Plus = isGFX10Plus(); 7257 7258 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7259 if (IsGFX10Plus) 7260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7264 if (IsGFX10Plus) 7265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7266 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7269 if (!IsGFX10Plus) 7270 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7271 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7272 } 7273 7274 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7275 cvtMIMG(Inst, Operands, true); 7276 } 7277 7278 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7279 OptionalImmIndexMap OptionalIdx; 7280 bool IsAtomicReturn = false; 7281 7282 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7283 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7284 if (!Op.isCPol()) 7285 continue; 7286 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7287 break; 7288 } 7289 7290 if (!IsAtomicReturn) { 7291 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7292 if (NewOpc != -1) 7293 Inst.setOpcode(NewOpc); 7294 } 7295 7296 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7297 SIInstrFlags::IsAtomicRet; 7298 7299 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7301 7302 // Add the register arguments 7303 if (Op.isReg()) { 7304 Op.addRegOperands(Inst, 1); 7305 if (IsAtomicReturn && i == 1) 7306 Op.addRegOperands(Inst, 1); 7307 continue; 7308 } 7309 7310 // Handle the case where soffset is an immediate 7311 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7312 Op.addImmOperands(Inst, 1); 7313 continue; 7314 } 7315 7316 // Handle tokens like 'offen' which are sometimes hard-coded into the 7317 // asm string. There are no MCInst operands for these. 7318 if (Op.isToken()) { 7319 continue; 7320 } 7321 assert(Op.isImm()); 7322 7323 // Handle optional arguments 7324 OptionalIdx[Op.getImmTy()] = i; 7325 } 7326 7327 if ((int)Inst.getNumOperands() <= 7328 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7331 } 7332 7333 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7334 const OperandVector &Operands) { 7335 for (unsigned I = 1; I < Operands.size(); ++I) { 7336 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7337 if (Operand.isReg()) 7338 Operand.addRegOperands(Inst, 1); 7339 } 7340 7341 Inst.addOperand(MCOperand::createImm(1)); // a16 7342 } 7343 7344 //===----------------------------------------------------------------------===// 7345 // smrd 7346 //===----------------------------------------------------------------------===// 7347 7348 bool AMDGPUOperand::isSMRDOffset8() const { 7349 return isImm() && isUInt<8>(getImm()); 7350 } 7351 7352 bool AMDGPUOperand::isSMEMOffset() const { 7353 return isImm(); // Offset range is checked later by validator. 7354 } 7355 7356 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7357 // 32-bit literals are only supported on CI and we only want to use them 7358 // when the offset is > 8-bits. 7359 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7360 } 7361 7362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7363 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7364 } 7365 7366 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7367 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7368 } 7369 7370 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7371 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7372 } 7373 7374 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7375 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7376 } 7377 7378 //===----------------------------------------------------------------------===// 7379 // vop3 7380 //===----------------------------------------------------------------------===// 7381 7382 static bool ConvertOmodMul(int64_t &Mul) { 7383 if (Mul != 1 && Mul != 2 && Mul != 4) 7384 return false; 7385 7386 Mul >>= 1; 7387 return true; 7388 } 7389 7390 static bool ConvertOmodDiv(int64_t &Div) { 7391 if (Div == 1) { 7392 Div = 0; 7393 return true; 7394 } 7395 7396 if (Div == 2) { 7397 Div = 3; 7398 return true; 7399 } 7400 7401 return false; 7402 } 7403 7404 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7405 // This is intentional and ensures compatibility with sp3. 7406 // See bug 35397 for details. 7407 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7408 if (BoundCtrl == 0 || BoundCtrl == 1) { 7409 BoundCtrl = 1; 7410 return true; 7411 } 7412 return false; 7413 } 7414 7415 // Note: the order in this table matches the order of operands in AsmString. 7416 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7417 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7418 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7419 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7420 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7421 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7422 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7423 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7424 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7425 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7426 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7427 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7428 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7429 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7430 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7431 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7432 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7433 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7434 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7435 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7436 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7437 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7438 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7439 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7440 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7441 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7442 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7443 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7444 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7445 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7446 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7447 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7448 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7449 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7450 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7451 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7452 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7453 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7454 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7455 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7456 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7457 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7458 }; 7459 7460 void AMDGPUAsmParser::onBeginOfFile() { 7461 if (!getParser().getStreamer().getTargetStreamer() || 7462 getSTI().getTargetTriple().getArch() == Triple::r600) 7463 return; 7464 7465 if (!getTargetStreamer().getTargetID()) 7466 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7467 7468 if (isHsaAbiVersion3AndAbove(&getSTI())) 7469 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7470 } 7471 7472 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7473 7474 OperandMatchResultTy res = parseOptionalOpr(Operands); 7475 7476 // This is a hack to enable hardcoded mandatory operands which follow 7477 // optional operands. 7478 // 7479 // Current design assumes that all operands after the first optional operand 7480 // are also optional. However implementation of some instructions violates 7481 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7482 // 7483 // To alleviate this problem, we have to (implicitly) parse extra operands 7484 // to make sure autogenerated parser of custom operands never hit hardcoded 7485 // mandatory operands. 7486 7487 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7488 if (res != MatchOperand_Success || 7489 isToken(AsmToken::EndOfStatement)) 7490 break; 7491 7492 trySkipToken(AsmToken::Comma); 7493 res = parseOptionalOpr(Operands); 7494 } 7495 7496 return res; 7497 } 7498 7499 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7500 OperandMatchResultTy res; 7501 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7502 // try to parse any optional operand here 7503 if (Op.IsBit) { 7504 res = parseNamedBit(Op.Name, Operands, Op.Type); 7505 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7506 res = parseOModOperand(Operands); 7507 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7508 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7509 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7510 res = parseSDWASel(Operands, Op.Name, Op.Type); 7511 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7512 res = parseSDWADstUnused(Operands); 7513 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7514 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7515 Op.Type == AMDGPUOperand::ImmTyNegLo || 7516 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7517 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7518 Op.ConvertResult); 7519 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7520 res = parseDim(Operands); 7521 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7522 res = parseCPol(Operands); 7523 } else { 7524 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7525 } 7526 if (res != MatchOperand_NoMatch) { 7527 return res; 7528 } 7529 } 7530 return MatchOperand_NoMatch; 7531 } 7532 7533 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7534 StringRef Name = getTokenStr(); 7535 if (Name == "mul") { 7536 return parseIntWithPrefix("mul", Operands, 7537 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7538 } 7539 7540 if (Name == "div") { 7541 return parseIntWithPrefix("div", Operands, 7542 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7543 } 7544 7545 return MatchOperand_NoMatch; 7546 } 7547 7548 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7549 cvtVOP3P(Inst, Operands); 7550 7551 int Opc = Inst.getOpcode(); 7552 7553 int SrcNum; 7554 const int Ops[] = { AMDGPU::OpName::src0, 7555 AMDGPU::OpName::src1, 7556 AMDGPU::OpName::src2 }; 7557 for (SrcNum = 0; 7558 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7559 ++SrcNum); 7560 assert(SrcNum > 0); 7561 7562 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7563 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7564 7565 if ((OpSel & (1 << SrcNum)) != 0) { 7566 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7567 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7568 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7569 } 7570 } 7571 7572 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7573 // 1. This operand is input modifiers 7574 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7575 // 2. This is not last operand 7576 && Desc.NumOperands > (OpNum + 1) 7577 // 3. Next operand is register class 7578 && Desc.OpInfo[OpNum + 1].RegClass != -1 7579 // 4. Next register is not tied to any other operand 7580 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7581 } 7582 7583 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7584 { 7585 OptionalImmIndexMap OptionalIdx; 7586 unsigned Opc = Inst.getOpcode(); 7587 7588 unsigned I = 1; 7589 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7590 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7591 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7592 } 7593 7594 for (unsigned E = Operands.size(); I != E; ++I) { 7595 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7596 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7597 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7598 } else if (Op.isInterpSlot() || 7599 Op.isInterpAttr() || 7600 Op.isAttrChan()) { 7601 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7602 } else if (Op.isImmModifier()) { 7603 OptionalIdx[Op.getImmTy()] = I; 7604 } else { 7605 llvm_unreachable("unhandled operand type"); 7606 } 7607 } 7608 7609 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7611 } 7612 7613 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7615 } 7616 7617 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7619 } 7620 } 7621 7622 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7623 OptionalImmIndexMap &OptionalIdx) { 7624 unsigned Opc = Inst.getOpcode(); 7625 7626 unsigned I = 1; 7627 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7628 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7629 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7630 } 7631 7632 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7633 // This instruction has src modifiers 7634 for (unsigned E = Operands.size(); I != E; ++I) { 7635 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7636 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7637 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7638 } else if (Op.isImmModifier()) { 7639 OptionalIdx[Op.getImmTy()] = I; 7640 } else if (Op.isRegOrImm()) { 7641 Op.addRegOrImmOperands(Inst, 1); 7642 } else { 7643 llvm_unreachable("unhandled operand type"); 7644 } 7645 } 7646 } else { 7647 // No src modifiers 7648 for (unsigned E = Operands.size(); I != E; ++I) { 7649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7650 if (Op.isMod()) { 7651 OptionalIdx[Op.getImmTy()] = I; 7652 } else { 7653 Op.addRegOrImmOperands(Inst, 1); 7654 } 7655 } 7656 } 7657 7658 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7660 } 7661 7662 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7664 } 7665 7666 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7667 // it has src2 register operand that is tied to dst operand 7668 // we don't allow modifiers for this operand in assembler so src2_modifiers 7669 // should be 0. 7670 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7671 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7672 Opc == AMDGPU::V_MAC_F32_e64_vi || 7673 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7674 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7675 Opc == AMDGPU::V_MAC_F16_e64_vi || 7676 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7677 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7678 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7679 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7680 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7681 auto it = Inst.begin(); 7682 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7683 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7684 ++it; 7685 // Copy the operand to ensure it's not invalidated when Inst grows. 7686 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7687 } 7688 } 7689 7690 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7691 OptionalImmIndexMap OptionalIdx; 7692 cvtVOP3(Inst, Operands, OptionalIdx); 7693 } 7694 7695 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7696 OptionalImmIndexMap &OptIdx) { 7697 const int Opc = Inst.getOpcode(); 7698 const MCInstrDesc &Desc = MII.get(Opc); 7699 7700 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7701 7702 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7703 assert(!IsPacked); 7704 Inst.addOperand(Inst.getOperand(0)); 7705 } 7706 7707 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7708 // instruction, and then figure out where to actually put the modifiers 7709 7710 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7711 if (OpSelIdx != -1) { 7712 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7713 } 7714 7715 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7716 if (OpSelHiIdx != -1) { 7717 int DefaultVal = IsPacked ? -1 : 0; 7718 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7719 DefaultVal); 7720 } 7721 7722 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7723 if (NegLoIdx != -1) { 7724 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7725 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7726 } 7727 7728 const int Ops[] = { AMDGPU::OpName::src0, 7729 AMDGPU::OpName::src1, 7730 AMDGPU::OpName::src2 }; 7731 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7732 AMDGPU::OpName::src1_modifiers, 7733 AMDGPU::OpName::src2_modifiers }; 7734 7735 unsigned OpSel = 0; 7736 unsigned OpSelHi = 0; 7737 unsigned NegLo = 0; 7738 unsigned NegHi = 0; 7739 7740 if (OpSelIdx != -1) 7741 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7742 7743 if (OpSelHiIdx != -1) 7744 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7745 7746 if (NegLoIdx != -1) { 7747 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7748 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7749 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7750 } 7751 7752 for (int J = 0; J < 3; ++J) { 7753 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7754 if (OpIdx == -1) 7755 break; 7756 7757 uint32_t ModVal = 0; 7758 7759 if ((OpSel & (1 << J)) != 0) 7760 ModVal |= SISrcMods::OP_SEL_0; 7761 7762 if ((OpSelHi & (1 << J)) != 0) 7763 ModVal |= SISrcMods::OP_SEL_1; 7764 7765 if ((NegLo & (1 << J)) != 0) 7766 ModVal |= SISrcMods::NEG; 7767 7768 if ((NegHi & (1 << J)) != 0) 7769 ModVal |= SISrcMods::NEG_HI; 7770 7771 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7772 7773 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7774 } 7775 } 7776 7777 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7778 OptionalImmIndexMap OptIdx; 7779 cvtVOP3(Inst, Operands, OptIdx); 7780 cvtVOP3P(Inst, Operands, OptIdx); 7781 } 7782 7783 //===----------------------------------------------------------------------===// 7784 // dpp 7785 //===----------------------------------------------------------------------===// 7786 7787 bool AMDGPUOperand::isDPP8() const { 7788 return isImmTy(ImmTyDPP8); 7789 } 7790 7791 bool AMDGPUOperand::isDPPCtrl() const { 7792 using namespace AMDGPU::DPP; 7793 7794 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7795 if (result) { 7796 int64_t Imm = getImm(); 7797 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7798 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7799 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7800 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7801 (Imm == DppCtrl::WAVE_SHL1) || 7802 (Imm == DppCtrl::WAVE_ROL1) || 7803 (Imm == DppCtrl::WAVE_SHR1) || 7804 (Imm == DppCtrl::WAVE_ROR1) || 7805 (Imm == DppCtrl::ROW_MIRROR) || 7806 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7807 (Imm == DppCtrl::BCAST15) || 7808 (Imm == DppCtrl::BCAST31) || 7809 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7810 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7811 } 7812 return false; 7813 } 7814 7815 //===----------------------------------------------------------------------===// 7816 // mAI 7817 //===----------------------------------------------------------------------===// 7818 7819 bool AMDGPUOperand::isBLGP() const { 7820 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7821 } 7822 7823 bool AMDGPUOperand::isCBSZ() const { 7824 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7825 } 7826 7827 bool AMDGPUOperand::isABID() const { 7828 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7829 } 7830 7831 bool AMDGPUOperand::isS16Imm() const { 7832 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7833 } 7834 7835 bool AMDGPUOperand::isU16Imm() const { 7836 return isImm() && isUInt<16>(getImm()); 7837 } 7838 7839 //===----------------------------------------------------------------------===// 7840 // dim 7841 //===----------------------------------------------------------------------===// 7842 7843 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7844 // We want to allow "dim:1D" etc., 7845 // but the initial 1 is tokenized as an integer. 7846 std::string Token; 7847 if (isToken(AsmToken::Integer)) { 7848 SMLoc Loc = getToken().getEndLoc(); 7849 Token = std::string(getTokenStr()); 7850 lex(); 7851 if (getLoc() != Loc) 7852 return false; 7853 } 7854 7855 StringRef Suffix; 7856 if (!parseId(Suffix)) 7857 return false; 7858 Token += Suffix; 7859 7860 StringRef DimId = Token; 7861 if (DimId.startswith("SQ_RSRC_IMG_")) 7862 DimId = DimId.drop_front(12); 7863 7864 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7865 if (!DimInfo) 7866 return false; 7867 7868 Encoding = DimInfo->Encoding; 7869 return true; 7870 } 7871 7872 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7873 if (!isGFX10Plus()) 7874 return MatchOperand_NoMatch; 7875 7876 SMLoc S = getLoc(); 7877 7878 if (!trySkipId("dim", AsmToken::Colon)) 7879 return MatchOperand_NoMatch; 7880 7881 unsigned Encoding; 7882 SMLoc Loc = getLoc(); 7883 if (!parseDimId(Encoding)) { 7884 Error(Loc, "invalid dim value"); 7885 return MatchOperand_ParseFail; 7886 } 7887 7888 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7889 AMDGPUOperand::ImmTyDim)); 7890 return MatchOperand_Success; 7891 } 7892 7893 //===----------------------------------------------------------------------===// 7894 // dpp 7895 //===----------------------------------------------------------------------===// 7896 7897 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7898 SMLoc S = getLoc(); 7899 7900 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7901 return MatchOperand_NoMatch; 7902 7903 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7904 7905 int64_t Sels[8]; 7906 7907 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7908 return MatchOperand_ParseFail; 7909 7910 for (size_t i = 0; i < 8; ++i) { 7911 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7912 return MatchOperand_ParseFail; 7913 7914 SMLoc Loc = getLoc(); 7915 if (getParser().parseAbsoluteExpression(Sels[i])) 7916 return MatchOperand_ParseFail; 7917 if (0 > Sels[i] || 7 < Sels[i]) { 7918 Error(Loc, "expected a 3-bit value"); 7919 return MatchOperand_ParseFail; 7920 } 7921 } 7922 7923 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7924 return MatchOperand_ParseFail; 7925 7926 unsigned DPP8 = 0; 7927 for (size_t i = 0; i < 8; ++i) 7928 DPP8 |= (Sels[i] << (i * 3)); 7929 7930 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7931 return MatchOperand_Success; 7932 } 7933 7934 bool 7935 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7936 const OperandVector &Operands) { 7937 if (Ctrl == "row_newbcast") 7938 return isGFX90A(); 7939 7940 if (Ctrl == "row_share" || 7941 Ctrl == "row_xmask") 7942 return isGFX10Plus(); 7943 7944 if (Ctrl == "wave_shl" || 7945 Ctrl == "wave_shr" || 7946 Ctrl == "wave_rol" || 7947 Ctrl == "wave_ror" || 7948 Ctrl == "row_bcast") 7949 return isVI() || isGFX9(); 7950 7951 return Ctrl == "row_mirror" || 7952 Ctrl == "row_half_mirror" || 7953 Ctrl == "quad_perm" || 7954 Ctrl == "row_shl" || 7955 Ctrl == "row_shr" || 7956 Ctrl == "row_ror"; 7957 } 7958 7959 int64_t 7960 AMDGPUAsmParser::parseDPPCtrlPerm() { 7961 // quad_perm:[%d,%d,%d,%d] 7962 7963 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7964 return -1; 7965 7966 int64_t Val = 0; 7967 for (int i = 0; i < 4; ++i) { 7968 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7969 return -1; 7970 7971 int64_t Temp; 7972 SMLoc Loc = getLoc(); 7973 if (getParser().parseAbsoluteExpression(Temp)) 7974 return -1; 7975 if (Temp < 0 || Temp > 3) { 7976 Error(Loc, "expected a 2-bit value"); 7977 return -1; 7978 } 7979 7980 Val += (Temp << i * 2); 7981 } 7982 7983 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7984 return -1; 7985 7986 return Val; 7987 } 7988 7989 int64_t 7990 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7991 using namespace AMDGPU::DPP; 7992 7993 // sel:%d 7994 7995 int64_t Val; 7996 SMLoc Loc = getLoc(); 7997 7998 if (getParser().parseAbsoluteExpression(Val)) 7999 return -1; 8000 8001 struct DppCtrlCheck { 8002 int64_t Ctrl; 8003 int Lo; 8004 int Hi; 8005 }; 8006 8007 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8008 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8009 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8010 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8011 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8012 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8013 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8014 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8015 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8016 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8017 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8018 .Default({-1, 0, 0}); 8019 8020 bool Valid; 8021 if (Check.Ctrl == -1) { 8022 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8023 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8024 } else { 8025 Valid = Check.Lo <= Val && Val <= Check.Hi; 8026 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8027 } 8028 8029 if (!Valid) { 8030 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8031 return -1; 8032 } 8033 8034 return Val; 8035 } 8036 8037 OperandMatchResultTy 8038 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8039 using namespace AMDGPU::DPP; 8040 8041 if (!isToken(AsmToken::Identifier) || 8042 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8043 return MatchOperand_NoMatch; 8044 8045 SMLoc S = getLoc(); 8046 int64_t Val = -1; 8047 StringRef Ctrl; 8048 8049 parseId(Ctrl); 8050 8051 if (Ctrl == "row_mirror") { 8052 Val = DppCtrl::ROW_MIRROR; 8053 } else if (Ctrl == "row_half_mirror") { 8054 Val = DppCtrl::ROW_HALF_MIRROR; 8055 } else { 8056 if (skipToken(AsmToken::Colon, "expected a colon")) { 8057 if (Ctrl == "quad_perm") { 8058 Val = parseDPPCtrlPerm(); 8059 } else { 8060 Val = parseDPPCtrlSel(Ctrl); 8061 } 8062 } 8063 } 8064 8065 if (Val == -1) 8066 return MatchOperand_ParseFail; 8067 8068 Operands.push_back( 8069 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8070 return MatchOperand_Success; 8071 } 8072 8073 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8074 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8075 } 8076 8077 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8078 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8079 } 8080 8081 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8082 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8083 } 8084 8085 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8086 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8087 } 8088 8089 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8090 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8091 } 8092 8093 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8094 OptionalImmIndexMap OptionalIdx; 8095 8096 unsigned Opc = Inst.getOpcode(); 8097 bool HasModifiers = 8098 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8099 unsigned I = 1; 8100 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8101 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8102 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8103 } 8104 8105 int Fi = 0; 8106 for (unsigned E = Operands.size(); I != E; ++I) { 8107 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8108 MCOI::TIED_TO); 8109 if (TiedTo != -1) { 8110 assert((unsigned)TiedTo < Inst.getNumOperands()); 8111 // handle tied old or src2 for MAC instructions 8112 Inst.addOperand(Inst.getOperand(TiedTo)); 8113 } 8114 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8115 // Add the register arguments 8116 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8117 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8118 // Skip it. 8119 continue; 8120 } 8121 8122 if (IsDPP8) { 8123 if (Op.isDPP8()) { 8124 Op.addImmOperands(Inst, 1); 8125 } else if (HasModifiers && 8126 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8127 Op.addRegWithFPInputModsOperands(Inst, 2); 8128 } else if (Op.isFI()) { 8129 Fi = Op.getImm(); 8130 } else if (Op.isReg()) { 8131 Op.addRegOperands(Inst, 1); 8132 } else { 8133 llvm_unreachable("Invalid operand type"); 8134 } 8135 } else { 8136 if (HasModifiers && 8137 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8138 Op.addRegWithFPInputModsOperands(Inst, 2); 8139 } else if (Op.isReg()) { 8140 Op.addRegOperands(Inst, 1); 8141 } else if (Op.isDPPCtrl()) { 8142 Op.addImmOperands(Inst, 1); 8143 } else if (Op.isImm()) { 8144 // Handle optional arguments 8145 OptionalIdx[Op.getImmTy()] = I; 8146 } else { 8147 llvm_unreachable("Invalid operand type"); 8148 } 8149 } 8150 } 8151 8152 if (IsDPP8) { 8153 using namespace llvm::AMDGPU::DPP; 8154 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8155 } else { 8156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8159 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8161 } 8162 } 8163 } 8164 8165 //===----------------------------------------------------------------------===// 8166 // sdwa 8167 //===----------------------------------------------------------------------===// 8168 8169 OperandMatchResultTy 8170 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8171 AMDGPUOperand::ImmTy Type) { 8172 using namespace llvm::AMDGPU::SDWA; 8173 8174 SMLoc S = getLoc(); 8175 StringRef Value; 8176 OperandMatchResultTy res; 8177 8178 SMLoc StringLoc; 8179 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8180 if (res != MatchOperand_Success) { 8181 return res; 8182 } 8183 8184 int64_t Int; 8185 Int = StringSwitch<int64_t>(Value) 8186 .Case("BYTE_0", SdwaSel::BYTE_0) 8187 .Case("BYTE_1", SdwaSel::BYTE_1) 8188 .Case("BYTE_2", SdwaSel::BYTE_2) 8189 .Case("BYTE_3", SdwaSel::BYTE_3) 8190 .Case("WORD_0", SdwaSel::WORD_0) 8191 .Case("WORD_1", SdwaSel::WORD_1) 8192 .Case("DWORD", SdwaSel::DWORD) 8193 .Default(0xffffffff); 8194 8195 if (Int == 0xffffffff) { 8196 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8197 return MatchOperand_ParseFail; 8198 } 8199 8200 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8201 return MatchOperand_Success; 8202 } 8203 8204 OperandMatchResultTy 8205 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8206 using namespace llvm::AMDGPU::SDWA; 8207 8208 SMLoc S = getLoc(); 8209 StringRef Value; 8210 OperandMatchResultTy res; 8211 8212 SMLoc StringLoc; 8213 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8214 if (res != MatchOperand_Success) { 8215 return res; 8216 } 8217 8218 int64_t Int; 8219 Int = StringSwitch<int64_t>(Value) 8220 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8221 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8222 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8223 .Default(0xffffffff); 8224 8225 if (Int == 0xffffffff) { 8226 Error(StringLoc, "invalid dst_unused value"); 8227 return MatchOperand_ParseFail; 8228 } 8229 8230 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8231 return MatchOperand_Success; 8232 } 8233 8234 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8235 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8236 } 8237 8238 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8239 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8240 } 8241 8242 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8243 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8244 } 8245 8246 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8247 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8248 } 8249 8250 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8251 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8252 } 8253 8254 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8255 uint64_t BasicInstType, 8256 bool SkipDstVcc, 8257 bool SkipSrcVcc) { 8258 using namespace llvm::AMDGPU::SDWA; 8259 8260 OptionalImmIndexMap OptionalIdx; 8261 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8262 bool SkippedVcc = false; 8263 8264 unsigned I = 1; 8265 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8266 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8267 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8268 } 8269 8270 for (unsigned E = Operands.size(); I != E; ++I) { 8271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8272 if (SkipVcc && !SkippedVcc && Op.isReg() && 8273 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8274 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8275 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8276 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8277 // Skip VCC only if we didn't skip it on previous iteration. 8278 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8279 if (BasicInstType == SIInstrFlags::VOP2 && 8280 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8281 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8282 SkippedVcc = true; 8283 continue; 8284 } else if (BasicInstType == SIInstrFlags::VOPC && 8285 Inst.getNumOperands() == 0) { 8286 SkippedVcc = true; 8287 continue; 8288 } 8289 } 8290 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8291 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8292 } else if (Op.isImm()) { 8293 // Handle optional arguments 8294 OptionalIdx[Op.getImmTy()] = I; 8295 } else { 8296 llvm_unreachable("Invalid operand type"); 8297 } 8298 SkippedVcc = false; 8299 } 8300 8301 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8302 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8303 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8304 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8305 switch (BasicInstType) { 8306 case SIInstrFlags::VOP1: 8307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8308 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8310 } 8311 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8312 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8314 break; 8315 8316 case SIInstrFlags::VOP2: 8317 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8318 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8319 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8320 } 8321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8325 break; 8326 8327 case SIInstrFlags::VOPC: 8328 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8332 break; 8333 8334 default: 8335 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8336 } 8337 } 8338 8339 // special case v_mac_{f16, f32}: 8340 // it has src2 register operand that is tied to dst operand 8341 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8342 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8343 auto it = Inst.begin(); 8344 std::advance( 8345 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8346 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8347 } 8348 } 8349 8350 //===----------------------------------------------------------------------===// 8351 // mAI 8352 //===----------------------------------------------------------------------===// 8353 8354 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8355 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8356 } 8357 8358 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8359 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8360 } 8361 8362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8363 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8364 } 8365 8366 /// Force static initialization. 8367 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8368 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8369 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8370 } 8371 8372 #define GET_REGISTER_MATCHER 8373 #define GET_MATCHER_IMPLEMENTATION 8374 #define GET_MNEMONIC_SPELL_CHECKER 8375 #define GET_MNEMONIC_CHECKER 8376 #include "AMDGPUGenAsmMatcher.inc" 8377 8378 // This function should be defined after auto-generated include so that we have 8379 // MatchClassKind enum defined 8380 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8381 unsigned Kind) { 8382 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8383 // But MatchInstructionImpl() expects to meet token and fails to validate 8384 // operand. This method checks if we are given immediate operand but expect to 8385 // get corresponding token. 8386 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8387 switch (Kind) { 8388 case MCK_addr64: 8389 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8390 case MCK_gds: 8391 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8392 case MCK_lds: 8393 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8394 case MCK_idxen: 8395 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8396 case MCK_offen: 8397 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8398 case MCK_SSrcB32: 8399 // When operands have expression values, they will return true for isToken, 8400 // because it is not possible to distinguish between a token and an 8401 // expression at parse time. MatchInstructionImpl() will always try to 8402 // match an operand as a token, when isToken returns true, and when the 8403 // name of the expression is not a valid token, the match will fail, 8404 // so we need to handle it here. 8405 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8406 case MCK_SSrcF32: 8407 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8408 case MCK_SoppBrTarget: 8409 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8410 case MCK_VReg32OrOff: 8411 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8412 case MCK_InterpSlot: 8413 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8414 case MCK_Attr: 8415 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8416 case MCK_AttrChan: 8417 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8418 case MCK_ImmSMEMOffset: 8419 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8420 case MCK_SReg_64: 8421 case MCK_SReg_64_XEXEC: 8422 // Null is defined as a 32-bit register but 8423 // it should also be enabled with 64-bit operands. 8424 // The following code enables it for SReg_64 operands 8425 // used as source and destination. Remaining source 8426 // operands are handled in isInlinableImm. 8427 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8428 default: 8429 return Match_InvalidOperand; 8430 } 8431 } 8432 8433 //===----------------------------------------------------------------------===// 8434 // endpgm 8435 //===----------------------------------------------------------------------===// 8436 8437 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8438 SMLoc S = getLoc(); 8439 int64_t Imm = 0; 8440 8441 if (!parseExpr(Imm)) { 8442 // The operand is optional, if not present default to 0 8443 Imm = 0; 8444 } 8445 8446 if (!isUInt<16>(Imm)) { 8447 Error(S, "expected a 16-bit value"); 8448 return MatchOperand_ParseFail; 8449 } 8450 8451 Operands.push_back( 8452 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8453 return MatchOperand_Success; 8454 } 8455 8456 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8457