1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1390 bool isGFX90A() const { 1391 return AMDGPU::isGFX90A(getSTI()); 1392 } 1393 1394 bool isGFX940() const { 1395 return AMDGPU::isGFX940(getSTI()); 1396 } 1397 1398 bool isGFX9Plus() const { 1399 return AMDGPU::isGFX9Plus(getSTI()); 1400 } 1401 1402 bool isGFX10() const { 1403 return AMDGPU::isGFX10(getSTI()); 1404 } 1405 1406 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1407 1408 bool isGFX10_BEncoding() const { 1409 return AMDGPU::isGFX10_BEncoding(getSTI()); 1410 } 1411 1412 bool hasInv2PiInlineImm() const { 1413 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1414 } 1415 1416 bool hasFlatOffsets() const { 1417 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1418 } 1419 1420 bool hasArchitectedFlatScratch() const { 1421 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1422 } 1423 1424 bool hasSGPR102_SGPR103() const { 1425 return !isVI() && !isGFX9(); 1426 } 1427 1428 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1429 1430 bool hasIntClamp() const { 1431 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1432 } 1433 1434 AMDGPUTargetStreamer &getTargetStreamer() { 1435 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1436 return static_cast<AMDGPUTargetStreamer &>(TS); 1437 } 1438 1439 const MCRegisterInfo *getMRI() const { 1440 // We need this const_cast because for some reason getContext() is not const 1441 // in MCAsmParser. 1442 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1443 } 1444 1445 const MCInstrInfo *getMII() const { 1446 return &MII; 1447 } 1448 1449 const FeatureBitset &getFeatureBits() const { 1450 return getSTI().getFeatureBits(); 1451 } 1452 1453 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1454 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1455 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1456 1457 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1458 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1459 bool isForcedDPP() const { return ForcedDPP; } 1460 bool isForcedSDWA() const { return ForcedSDWA; } 1461 ArrayRef<unsigned> getMatchedVariants() const; 1462 StringRef getMatchedVariantName() const; 1463 1464 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1465 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1466 bool RestoreOnFailure); 1467 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1468 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1469 SMLoc &EndLoc) override; 1470 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1471 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1472 unsigned Kind) override; 1473 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1474 OperandVector &Operands, MCStreamer &Out, 1475 uint64_t &ErrorInfo, 1476 bool MatchingInlineAsm) override; 1477 bool ParseDirective(AsmToken DirectiveID) override; 1478 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1479 OperandMode Mode = OperandMode_Default); 1480 StringRef parseMnemonicSuffix(StringRef Name); 1481 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1482 SMLoc NameLoc, OperandVector &Operands) override; 1483 //bool ProcessInstruction(MCInst &Inst); 1484 1485 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1486 1487 OperandMatchResultTy 1488 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1489 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1490 bool (*ConvertResult)(int64_t &) = nullptr); 1491 1492 OperandMatchResultTy 1493 parseOperandArrayWithPrefix(const char *Prefix, 1494 OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1496 bool (*ConvertResult)(int64_t&) = nullptr); 1497 1498 OperandMatchResultTy 1499 parseNamedBit(StringRef Name, OperandVector &Operands, 1500 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1501 OperandMatchResultTy parseCPol(OperandVector &Operands); 1502 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1503 StringRef &Value, 1504 SMLoc &StringLoc); 1505 1506 bool isModifier(); 1507 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1508 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1509 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1510 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1511 bool parseSP3NegModifier(); 1512 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1513 OperandMatchResultTy parseReg(OperandVector &Operands); 1514 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1515 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1516 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1517 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1518 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1519 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1520 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1521 OperandMatchResultTy parseUfmt(int64_t &Format); 1522 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1523 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1524 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1525 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1526 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1527 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1528 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1529 1530 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1531 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1532 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1533 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1534 1535 bool parseCnt(int64_t &IntVal); 1536 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1537 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1538 1539 private: 1540 struct OperandInfoTy { 1541 SMLoc Loc; 1542 int64_t Id; 1543 bool IsSymbolic = false; 1544 bool IsDefined = false; 1545 1546 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1547 }; 1548 1549 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1550 bool validateSendMsg(const OperandInfoTy &Msg, 1551 const OperandInfoTy &Op, 1552 const OperandInfoTy &Stream); 1553 1554 bool parseHwregBody(OperandInfoTy &HwReg, 1555 OperandInfoTy &Offset, 1556 OperandInfoTy &Width); 1557 bool validateHwreg(const OperandInfoTy &HwReg, 1558 const OperandInfoTy &Offset, 1559 const OperandInfoTy &Width); 1560 1561 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1562 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1563 1564 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1565 const OperandVector &Operands) const; 1566 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1567 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1568 SMLoc getLitLoc(const OperandVector &Operands) const; 1569 SMLoc getConstLoc(const OperandVector &Operands) const; 1570 1571 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1572 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1573 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1574 bool validateSOPLiteral(const MCInst &Inst) const; 1575 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1576 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1577 bool validateIntClampSupported(const MCInst &Inst); 1578 bool validateMIMGAtomicDMask(const MCInst &Inst); 1579 bool validateMIMGGatherDMask(const MCInst &Inst); 1580 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1581 bool validateMIMGDataSize(const MCInst &Inst); 1582 bool validateMIMGAddrSize(const MCInst &Inst); 1583 bool validateMIMGD16(const MCInst &Inst); 1584 bool validateMIMGDim(const MCInst &Inst); 1585 bool validateMIMGMSAA(const MCInst &Inst); 1586 bool validateOpSel(const MCInst &Inst); 1587 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1588 bool validateVccOperand(unsigned Reg) const; 1589 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1591 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateAGPRLdSt(const MCInst &Inst) const; 1593 bool validateVGPRAlign(const MCInst &Inst) const; 1594 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1595 bool validateDivScale(const MCInst &Inst); 1596 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1597 const SMLoc &IDLoc); 1598 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1599 unsigned getConstantBusLimit(unsigned Opcode) const; 1600 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1601 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1602 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1603 1604 bool isSupportedMnemo(StringRef Mnemo, 1605 const FeatureBitset &FBS); 1606 bool isSupportedMnemo(StringRef Mnemo, 1607 const FeatureBitset &FBS, 1608 ArrayRef<unsigned> Variants); 1609 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1610 1611 bool isId(const StringRef Id) const; 1612 bool isId(const AsmToken &Token, const StringRef Id) const; 1613 bool isToken(const AsmToken::TokenKind Kind) const; 1614 bool trySkipId(const StringRef Id); 1615 bool trySkipId(const StringRef Pref, const StringRef Id); 1616 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1617 bool trySkipToken(const AsmToken::TokenKind Kind); 1618 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1619 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1620 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1621 1622 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1623 AsmToken::TokenKind getTokenKind() const; 1624 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1625 bool parseExpr(OperandVector &Operands); 1626 StringRef getTokenStr() const; 1627 AsmToken peekToken(); 1628 AsmToken getToken() const; 1629 SMLoc getLoc() const; 1630 void lex(); 1631 1632 public: 1633 void onBeginOfFile() override; 1634 1635 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1636 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1637 1638 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1639 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1640 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1641 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1642 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1643 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1644 1645 bool parseSwizzleOperand(int64_t &Op, 1646 const unsigned MinVal, 1647 const unsigned MaxVal, 1648 const StringRef ErrMsg, 1649 SMLoc &Loc); 1650 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1651 const unsigned MinVal, 1652 const unsigned MaxVal, 1653 const StringRef ErrMsg); 1654 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1655 bool parseSwizzleOffset(int64_t &Imm); 1656 bool parseSwizzleMacro(int64_t &Imm); 1657 bool parseSwizzleQuadPerm(int64_t &Imm); 1658 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1659 bool parseSwizzleBroadcast(int64_t &Imm); 1660 bool parseSwizzleSwap(int64_t &Imm); 1661 bool parseSwizzleReverse(int64_t &Imm); 1662 1663 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1664 int64_t parseGPRIdxMacro(); 1665 1666 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1667 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1668 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1669 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1670 1671 AMDGPUOperand::Ptr defaultCPol() const; 1672 1673 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1674 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1675 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1676 AMDGPUOperand::Ptr defaultFlatOffset() const; 1677 1678 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1679 1680 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1681 OptionalImmIndexMap &OptionalIdx); 1682 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1683 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1684 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1685 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1686 OptionalImmIndexMap &OptionalIdx); 1687 1688 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1689 1690 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1691 bool IsAtomic = false); 1692 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1693 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1694 1695 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1696 1697 bool parseDimId(unsigned &Encoding); 1698 OperandMatchResultTy parseDim(OperandVector &Operands); 1699 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1700 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1701 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1702 int64_t parseDPPCtrlSel(StringRef Ctrl); 1703 int64_t parseDPPCtrlPerm(); 1704 AMDGPUOperand::Ptr defaultRowMask() const; 1705 AMDGPUOperand::Ptr defaultBankMask() const; 1706 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1707 AMDGPUOperand::Ptr defaultFI() const; 1708 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1709 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1710 1711 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1712 AMDGPUOperand::ImmTy Type); 1713 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1714 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1715 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1716 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1717 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1718 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1719 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1720 uint64_t BasicInstType, 1721 bool SkipDstVcc = false, 1722 bool SkipSrcVcc = false); 1723 1724 AMDGPUOperand::Ptr defaultBLGP() const; 1725 AMDGPUOperand::Ptr defaultCBSZ() const; 1726 AMDGPUOperand::Ptr defaultABID() const; 1727 1728 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1729 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1730 }; 1731 1732 struct OptionalOperand { 1733 const char *Name; 1734 AMDGPUOperand::ImmTy Type; 1735 bool IsBit; 1736 bool (*ConvertResult)(int64_t&); 1737 }; 1738 1739 } // end anonymous namespace 1740 1741 // May be called with integer type with equivalent bitwidth. 1742 static const fltSemantics *getFltSemantics(unsigned Size) { 1743 switch (Size) { 1744 case 4: 1745 return &APFloat::IEEEsingle(); 1746 case 8: 1747 return &APFloat::IEEEdouble(); 1748 case 2: 1749 return &APFloat::IEEEhalf(); 1750 default: 1751 llvm_unreachable("unsupported fp type"); 1752 } 1753 } 1754 1755 static const fltSemantics *getFltSemantics(MVT VT) { 1756 return getFltSemantics(VT.getSizeInBits() / 8); 1757 } 1758 1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1760 switch (OperandType) { 1761 case AMDGPU::OPERAND_REG_IMM_INT32: 1762 case AMDGPU::OPERAND_REG_IMM_FP32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1771 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1772 case AMDGPU::OPERAND_KIMM32: 1773 return &APFloat::IEEEsingle(); 1774 case AMDGPU::OPERAND_REG_IMM_INT64: 1775 case AMDGPU::OPERAND_REG_IMM_FP64: 1776 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1779 return &APFloat::IEEEdouble(); 1780 case AMDGPU::OPERAND_REG_IMM_INT16: 1781 case AMDGPU::OPERAND_REG_IMM_FP16: 1782 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1783 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1784 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1786 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1787 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1788 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1791 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1792 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1793 case AMDGPU::OPERAND_KIMM16: 1794 return &APFloat::IEEEhalf(); 1795 default: 1796 llvm_unreachable("unsupported fp type"); 1797 } 1798 } 1799 1800 //===----------------------------------------------------------------------===// 1801 // Operand 1802 //===----------------------------------------------------------------------===// 1803 1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1805 bool Lost; 1806 1807 // Convert literal to single precision 1808 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1809 APFloat::rmNearestTiesToEven, 1810 &Lost); 1811 // We allow precision lost but not overflow or underflow 1812 if (Status != APFloat::opOK && 1813 Lost && 1814 ((Status & APFloat::opOverflow) != 0 || 1815 (Status & APFloat::opUnderflow) != 0)) { 1816 return false; 1817 } 1818 1819 return true; 1820 } 1821 1822 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1823 return isUIntN(Size, Val) || isIntN(Size, Val); 1824 } 1825 1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1827 if (VT.getScalarType() == MVT::i16) { 1828 // FP immediate values are broken. 1829 return isInlinableIntLiteral(Val); 1830 } 1831 1832 // f16/v2f16 operands work correctly for all values. 1833 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1834 } 1835 1836 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1837 1838 // This is a hack to enable named inline values like 1839 // shared_base with both 32-bit and 64-bit operands. 1840 // Note that these values are defined as 1841 // 32-bit operands only. 1842 if (isInlineValue()) { 1843 return true; 1844 } 1845 1846 if (!isImmTy(ImmTyNone)) { 1847 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1848 return false; 1849 } 1850 // TODO: We should avoid using host float here. It would be better to 1851 // check the float bit values which is what a few other places do. 1852 // We've had bot failures before due to weird NaN support on mips hosts. 1853 1854 APInt Literal(64, Imm.Val); 1855 1856 if (Imm.IsFPImm) { // We got fp literal token 1857 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1858 return AMDGPU::isInlinableLiteral64(Imm.Val, 1859 AsmParser->hasInv2PiInlineImm()); 1860 } 1861 1862 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1863 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1864 return false; 1865 1866 if (type.getScalarSizeInBits() == 16) { 1867 return isInlineableLiteralOp16( 1868 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1869 type, AsmParser->hasInv2PiInlineImm()); 1870 } 1871 1872 // Check if single precision literal is inlinable 1873 return AMDGPU::isInlinableLiteral32( 1874 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1875 AsmParser->hasInv2PiInlineImm()); 1876 } 1877 1878 // We got int literal token. 1879 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1880 return AMDGPU::isInlinableLiteral64(Imm.Val, 1881 AsmParser->hasInv2PiInlineImm()); 1882 } 1883 1884 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1885 return false; 1886 } 1887 1888 if (type.getScalarSizeInBits() == 16) { 1889 return isInlineableLiteralOp16( 1890 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1891 type, AsmParser->hasInv2PiInlineImm()); 1892 } 1893 1894 return AMDGPU::isInlinableLiteral32( 1895 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1896 AsmParser->hasInv2PiInlineImm()); 1897 } 1898 1899 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1900 // Check that this immediate can be added as literal 1901 if (!isImmTy(ImmTyNone)) { 1902 return false; 1903 } 1904 1905 if (!Imm.IsFPImm) { 1906 // We got int literal token. 1907 1908 if (type == MVT::f64 && hasFPModifiers()) { 1909 // Cannot apply fp modifiers to int literals preserving the same semantics 1910 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1911 // disable these cases. 1912 return false; 1913 } 1914 1915 unsigned Size = type.getSizeInBits(); 1916 if (Size == 64) 1917 Size = 32; 1918 1919 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1920 // types. 1921 return isSafeTruncation(Imm.Val, Size); 1922 } 1923 1924 // We got fp literal token 1925 if (type == MVT::f64) { // Expected 64-bit fp operand 1926 // We would set low 64-bits of literal to zeroes but we accept this literals 1927 return true; 1928 } 1929 1930 if (type == MVT::i64) { // Expected 64-bit int operand 1931 // We don't allow fp literals in 64-bit integer instructions. It is 1932 // unclear how we should encode them. 1933 return false; 1934 } 1935 1936 // We allow fp literals with f16x2 operands assuming that the specified 1937 // literal goes into the lower half and the upper half is zero. We also 1938 // require that the literal may be losslessly converted to f16. 1939 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1940 (type == MVT::v2i16)? MVT::i16 : 1941 (type == MVT::v2f32)? MVT::f32 : type; 1942 1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1944 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1945 } 1946 1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1948 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1949 } 1950 1951 bool AMDGPUOperand::isVRegWithInputMods() const { 1952 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1953 // GFX90A allows DPP on 64-bit operands. 1954 (isRegClass(AMDGPU::VReg_64RegClassID) && 1955 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1956 } 1957 1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1959 if (AsmParser->isVI()) 1960 return isVReg32(); 1961 else if (AsmParser->isGFX9Plus()) 1962 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1963 else 1964 return false; 1965 } 1966 1967 bool AMDGPUOperand::isSDWAFP16Operand() const { 1968 return isSDWAOperand(MVT::f16); 1969 } 1970 1971 bool AMDGPUOperand::isSDWAFP32Operand() const { 1972 return isSDWAOperand(MVT::f32); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAInt16Operand() const { 1976 return isSDWAOperand(MVT::i16); 1977 } 1978 1979 bool AMDGPUOperand::isSDWAInt32Operand() const { 1980 return isSDWAOperand(MVT::i32); 1981 } 1982 1983 bool AMDGPUOperand::isBoolReg() const { 1984 auto FB = AsmParser->getFeatureBits(); 1985 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1986 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1987 } 1988 1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1990 { 1991 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1992 assert(Size == 2 || Size == 4 || Size == 8); 1993 1994 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1995 1996 if (Imm.Mods.Abs) { 1997 Val &= ~FpSignMask; 1998 } 1999 if (Imm.Mods.Neg) { 2000 Val ^= FpSignMask; 2001 } 2002 2003 return Val; 2004 } 2005 2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2007 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2008 Inst.getNumOperands())) { 2009 addLiteralImmOperand(Inst, Imm.Val, 2010 ApplyModifiers & 2011 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2012 } else { 2013 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2014 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2015 setImmKindNone(); 2016 } 2017 } 2018 2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2020 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2021 auto OpNum = Inst.getNumOperands(); 2022 // Check that this operand accepts literals 2023 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2024 2025 if (ApplyModifiers) { 2026 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2027 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2028 Val = applyInputFPModifiers(Val, Size); 2029 } 2030 2031 APInt Literal(64, Val); 2032 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2033 2034 if (Imm.IsFPImm) { // We got fp literal token 2035 switch (OpTy) { 2036 case AMDGPU::OPERAND_REG_IMM_INT64: 2037 case AMDGPU::OPERAND_REG_IMM_FP64: 2038 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2039 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2040 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2041 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2042 AsmParser->hasInv2PiInlineImm())) { 2043 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2044 setImmKindConst(); 2045 return; 2046 } 2047 2048 // Non-inlineable 2049 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2050 // For fp operands we check if low 32 bits are zeros 2051 if (Literal.getLoBits(32) != 0) { 2052 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2053 "Can't encode literal as exact 64-bit floating-point operand. " 2054 "Low 32-bits will be set to zero"); 2055 } 2056 2057 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2058 setImmKindLiteral(); 2059 return; 2060 } 2061 2062 // We don't allow fp literals in 64-bit integer instructions. It is 2063 // unclear how we should encode them. This case should be checked earlier 2064 // in predicate methods (isLiteralImm()) 2065 llvm_unreachable("fp literal in 64-bit integer instruction."); 2066 2067 case AMDGPU::OPERAND_REG_IMM_INT32: 2068 case AMDGPU::OPERAND_REG_IMM_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2070 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2071 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2073 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2074 case AMDGPU::OPERAND_REG_IMM_INT16: 2075 case AMDGPU::OPERAND_REG_IMM_FP16: 2076 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2077 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2078 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2079 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2080 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2086 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2087 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2090 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2091 case AMDGPU::OPERAND_KIMM32: 2092 case AMDGPU::OPERAND_KIMM16: { 2093 bool lost; 2094 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2095 // Convert literal to single precision 2096 FPLiteral.convert(*getOpFltSemantics(OpTy), 2097 APFloat::rmNearestTiesToEven, &lost); 2098 // We allow precision lost but not overflow or underflow. This should be 2099 // checked earlier in isLiteralImm() 2100 2101 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2102 Inst.addOperand(MCOperand::createImm(ImmVal)); 2103 setImmKindLiteral(); 2104 return; 2105 } 2106 default: 2107 llvm_unreachable("invalid operand size"); 2108 } 2109 2110 return; 2111 } 2112 2113 // We got int literal token. 2114 // Only sign extend inline immediates. 2115 switch (OpTy) { 2116 case AMDGPU::OPERAND_REG_IMM_INT32: 2117 case AMDGPU::OPERAND_REG_IMM_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2119 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2120 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2123 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2124 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2125 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2126 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2127 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2129 if (isSafeTruncation(Val, 32) && 2130 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2131 AsmParser->hasInv2PiInlineImm())) { 2132 Inst.addOperand(MCOperand::createImm(Val)); 2133 setImmKindConst(); 2134 return; 2135 } 2136 2137 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2138 setImmKindLiteral(); 2139 return; 2140 2141 case AMDGPU::OPERAND_REG_IMM_INT64: 2142 case AMDGPU::OPERAND_REG_IMM_FP64: 2143 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2144 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2145 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2146 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2147 Inst.addOperand(MCOperand::createImm(Val)); 2148 setImmKindConst(); 2149 return; 2150 } 2151 2152 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2153 setImmKindLiteral(); 2154 return; 2155 2156 case AMDGPU::OPERAND_REG_IMM_INT16: 2157 case AMDGPU::OPERAND_REG_IMM_FP16: 2158 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2159 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2160 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2163 if (isSafeTruncation(Val, 16) && 2164 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2165 AsmParser->hasInv2PiInlineImm())) { 2166 Inst.addOperand(MCOperand::createImm(Val)); 2167 setImmKindConst(); 2168 return; 2169 } 2170 2171 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2172 setImmKindLiteral(); 2173 return; 2174 2175 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2176 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2177 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2179 assert(isSafeTruncation(Val, 16)); 2180 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2181 AsmParser->hasInv2PiInlineImm())); 2182 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 return; 2185 } 2186 case AMDGPU::OPERAND_KIMM32: 2187 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2188 setImmKindNone(); 2189 return; 2190 case AMDGPU::OPERAND_KIMM16: 2191 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2192 setImmKindNone(); 2193 return; 2194 default: 2195 llvm_unreachable("invalid operand size"); 2196 } 2197 } 2198 2199 template <unsigned Bitwidth> 2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2201 APInt Literal(64, Imm.Val); 2202 setImmKindNone(); 2203 2204 if (!Imm.IsFPImm) { 2205 // We got int literal token. 2206 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2207 return; 2208 } 2209 2210 bool Lost; 2211 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2212 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2213 APFloat::rmNearestTiesToEven, &Lost); 2214 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2215 } 2216 2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2218 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2219 } 2220 2221 static bool isInlineValue(unsigned Reg) { 2222 switch (Reg) { 2223 case AMDGPU::SRC_SHARED_BASE: 2224 case AMDGPU::SRC_SHARED_LIMIT: 2225 case AMDGPU::SRC_PRIVATE_BASE: 2226 case AMDGPU::SRC_PRIVATE_LIMIT: 2227 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2228 return true; 2229 case AMDGPU::SRC_VCCZ: 2230 case AMDGPU::SRC_EXECZ: 2231 case AMDGPU::SRC_SCC: 2232 return true; 2233 case AMDGPU::SGPR_NULL: 2234 return true; 2235 default: 2236 return false; 2237 } 2238 } 2239 2240 bool AMDGPUOperand::isInlineValue() const { 2241 return isRegKind() && ::isInlineValue(getReg()); 2242 } 2243 2244 //===----------------------------------------------------------------------===// 2245 // AsmParser 2246 //===----------------------------------------------------------------------===// 2247 2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2249 if (Is == IS_VGPR) { 2250 switch (RegWidth) { 2251 default: return -1; 2252 case 1: return AMDGPU::VGPR_32RegClassID; 2253 case 2: return AMDGPU::VReg_64RegClassID; 2254 case 3: return AMDGPU::VReg_96RegClassID; 2255 case 4: return AMDGPU::VReg_128RegClassID; 2256 case 5: return AMDGPU::VReg_160RegClassID; 2257 case 6: return AMDGPU::VReg_192RegClassID; 2258 case 7: return AMDGPU::VReg_224RegClassID; 2259 case 8: return AMDGPU::VReg_256RegClassID; 2260 case 16: return AMDGPU::VReg_512RegClassID; 2261 case 32: return AMDGPU::VReg_1024RegClassID; 2262 } 2263 } else if (Is == IS_TTMP) { 2264 switch (RegWidth) { 2265 default: return -1; 2266 case 1: return AMDGPU::TTMP_32RegClassID; 2267 case 2: return AMDGPU::TTMP_64RegClassID; 2268 case 4: return AMDGPU::TTMP_128RegClassID; 2269 case 8: return AMDGPU::TTMP_256RegClassID; 2270 case 16: return AMDGPU::TTMP_512RegClassID; 2271 } 2272 } else if (Is == IS_SGPR) { 2273 switch (RegWidth) { 2274 default: return -1; 2275 case 1: return AMDGPU::SGPR_32RegClassID; 2276 case 2: return AMDGPU::SGPR_64RegClassID; 2277 case 3: return AMDGPU::SGPR_96RegClassID; 2278 case 4: return AMDGPU::SGPR_128RegClassID; 2279 case 5: return AMDGPU::SGPR_160RegClassID; 2280 case 6: return AMDGPU::SGPR_192RegClassID; 2281 case 7: return AMDGPU::SGPR_224RegClassID; 2282 case 8: return AMDGPU::SGPR_256RegClassID; 2283 case 16: return AMDGPU::SGPR_512RegClassID; 2284 } 2285 } else if (Is == IS_AGPR) { 2286 switch (RegWidth) { 2287 default: return -1; 2288 case 1: return AMDGPU::AGPR_32RegClassID; 2289 case 2: return AMDGPU::AReg_64RegClassID; 2290 case 3: return AMDGPU::AReg_96RegClassID; 2291 case 4: return AMDGPU::AReg_128RegClassID; 2292 case 5: return AMDGPU::AReg_160RegClassID; 2293 case 6: return AMDGPU::AReg_192RegClassID; 2294 case 7: return AMDGPU::AReg_224RegClassID; 2295 case 8: return AMDGPU::AReg_256RegClassID; 2296 case 16: return AMDGPU::AReg_512RegClassID; 2297 case 32: return AMDGPU::AReg_1024RegClassID; 2298 } 2299 } 2300 return -1; 2301 } 2302 2303 static unsigned getSpecialRegForName(StringRef RegName) { 2304 return StringSwitch<unsigned>(RegName) 2305 .Case("exec", AMDGPU::EXEC) 2306 .Case("vcc", AMDGPU::VCC) 2307 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2308 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2309 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2310 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2311 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2312 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2313 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2314 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2315 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2316 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2317 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2318 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2319 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2320 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2321 .Case("m0", AMDGPU::M0) 2322 .Case("vccz", AMDGPU::SRC_VCCZ) 2323 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2324 .Case("execz", AMDGPU::SRC_EXECZ) 2325 .Case("src_execz", AMDGPU::SRC_EXECZ) 2326 .Case("scc", AMDGPU::SRC_SCC) 2327 .Case("src_scc", AMDGPU::SRC_SCC) 2328 .Case("tba", AMDGPU::TBA) 2329 .Case("tma", AMDGPU::TMA) 2330 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2331 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2332 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2333 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2334 .Case("vcc_lo", AMDGPU::VCC_LO) 2335 .Case("vcc_hi", AMDGPU::VCC_HI) 2336 .Case("exec_lo", AMDGPU::EXEC_LO) 2337 .Case("exec_hi", AMDGPU::EXEC_HI) 2338 .Case("tma_lo", AMDGPU::TMA_LO) 2339 .Case("tma_hi", AMDGPU::TMA_HI) 2340 .Case("tba_lo", AMDGPU::TBA_LO) 2341 .Case("tba_hi", AMDGPU::TBA_HI) 2342 .Case("pc", AMDGPU::PC_REG) 2343 .Case("null", AMDGPU::SGPR_NULL) 2344 .Default(AMDGPU::NoRegister); 2345 } 2346 2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2348 SMLoc &EndLoc, bool RestoreOnFailure) { 2349 auto R = parseRegister(); 2350 if (!R) return true; 2351 assert(R->isReg()); 2352 RegNo = R->getReg(); 2353 StartLoc = R->getStartLoc(); 2354 EndLoc = R->getEndLoc(); 2355 return false; 2356 } 2357 2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2359 SMLoc &EndLoc) { 2360 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2361 } 2362 2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2364 SMLoc &StartLoc, 2365 SMLoc &EndLoc) { 2366 bool Result = 2367 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2368 bool PendingErrors = getParser().hasPendingError(); 2369 getParser().clearPendingErrors(); 2370 if (PendingErrors) 2371 return MatchOperand_ParseFail; 2372 if (Result) 2373 return MatchOperand_NoMatch; 2374 return MatchOperand_Success; 2375 } 2376 2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2378 RegisterKind RegKind, unsigned Reg1, 2379 SMLoc Loc) { 2380 switch (RegKind) { 2381 case IS_SPECIAL: 2382 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2383 Reg = AMDGPU::EXEC; 2384 RegWidth = 2; 2385 return true; 2386 } 2387 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2388 Reg = AMDGPU::FLAT_SCR; 2389 RegWidth = 2; 2390 return true; 2391 } 2392 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2393 Reg = AMDGPU::XNACK_MASK; 2394 RegWidth = 2; 2395 return true; 2396 } 2397 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2398 Reg = AMDGPU::VCC; 2399 RegWidth = 2; 2400 return true; 2401 } 2402 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2403 Reg = AMDGPU::TBA; 2404 RegWidth = 2; 2405 return true; 2406 } 2407 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2408 Reg = AMDGPU::TMA; 2409 RegWidth = 2; 2410 return true; 2411 } 2412 Error(Loc, "register does not fit in the list"); 2413 return false; 2414 case IS_VGPR: 2415 case IS_SGPR: 2416 case IS_AGPR: 2417 case IS_TTMP: 2418 if (Reg1 != Reg + RegWidth) { 2419 Error(Loc, "registers in a list must have consecutive indices"); 2420 return false; 2421 } 2422 RegWidth++; 2423 return true; 2424 default: 2425 llvm_unreachable("unexpected register kind"); 2426 } 2427 } 2428 2429 struct RegInfo { 2430 StringLiteral Name; 2431 RegisterKind Kind; 2432 }; 2433 2434 static constexpr RegInfo RegularRegisters[] = { 2435 {{"v"}, IS_VGPR}, 2436 {{"s"}, IS_SGPR}, 2437 {{"ttmp"}, IS_TTMP}, 2438 {{"acc"}, IS_AGPR}, 2439 {{"a"}, IS_AGPR}, 2440 }; 2441 2442 static bool isRegularReg(RegisterKind Kind) { 2443 return Kind == IS_VGPR || 2444 Kind == IS_SGPR || 2445 Kind == IS_TTMP || 2446 Kind == IS_AGPR; 2447 } 2448 2449 static const RegInfo* getRegularRegInfo(StringRef Str) { 2450 for (const RegInfo &Reg : RegularRegisters) 2451 if (Str.startswith(Reg.Name)) 2452 return &Reg; 2453 return nullptr; 2454 } 2455 2456 static bool getRegNum(StringRef Str, unsigned& Num) { 2457 return !Str.getAsInteger(10, Num); 2458 } 2459 2460 bool 2461 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2462 const AsmToken &NextToken) const { 2463 2464 // A list of consecutive registers: [s0,s1,s2,s3] 2465 if (Token.is(AsmToken::LBrac)) 2466 return true; 2467 2468 if (!Token.is(AsmToken::Identifier)) 2469 return false; 2470 2471 // A single register like s0 or a range of registers like s[0:1] 2472 2473 StringRef Str = Token.getString(); 2474 const RegInfo *Reg = getRegularRegInfo(Str); 2475 if (Reg) { 2476 StringRef RegName = Reg->Name; 2477 StringRef RegSuffix = Str.substr(RegName.size()); 2478 if (!RegSuffix.empty()) { 2479 unsigned Num; 2480 // A single register with an index: rXX 2481 if (getRegNum(RegSuffix, Num)) 2482 return true; 2483 } else { 2484 // A range of registers: r[XX:YY]. 2485 if (NextToken.is(AsmToken::LBrac)) 2486 return true; 2487 } 2488 } 2489 2490 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2491 } 2492 2493 bool 2494 AMDGPUAsmParser::isRegister() 2495 { 2496 return isRegister(getToken(), peekToken()); 2497 } 2498 2499 unsigned 2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2501 unsigned RegNum, 2502 unsigned RegWidth, 2503 SMLoc Loc) { 2504 2505 assert(isRegularReg(RegKind)); 2506 2507 unsigned AlignSize = 1; 2508 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2509 // SGPR and TTMP registers must be aligned. 2510 // Max required alignment is 4 dwords. 2511 AlignSize = std::min(RegWidth, 4u); 2512 } 2513 2514 if (RegNum % AlignSize != 0) { 2515 Error(Loc, "invalid register alignment"); 2516 return AMDGPU::NoRegister; 2517 } 2518 2519 unsigned RegIdx = RegNum / AlignSize; 2520 int RCID = getRegClass(RegKind, RegWidth); 2521 if (RCID == -1) { 2522 Error(Loc, "invalid or unsupported register size"); 2523 return AMDGPU::NoRegister; 2524 } 2525 2526 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2527 const MCRegisterClass RC = TRI->getRegClass(RCID); 2528 if (RegIdx >= RC.getNumRegs()) { 2529 Error(Loc, "register index is out of range"); 2530 return AMDGPU::NoRegister; 2531 } 2532 2533 return RC.getRegister(RegIdx); 2534 } 2535 2536 bool 2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2538 int64_t RegLo, RegHi; 2539 if (!skipToken(AsmToken::LBrac, "missing register index")) 2540 return false; 2541 2542 SMLoc FirstIdxLoc = getLoc(); 2543 SMLoc SecondIdxLoc; 2544 2545 if (!parseExpr(RegLo)) 2546 return false; 2547 2548 if (trySkipToken(AsmToken::Colon)) { 2549 SecondIdxLoc = getLoc(); 2550 if (!parseExpr(RegHi)) 2551 return false; 2552 } else { 2553 RegHi = RegLo; 2554 } 2555 2556 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2557 return false; 2558 2559 if (!isUInt<32>(RegLo)) { 2560 Error(FirstIdxLoc, "invalid register index"); 2561 return false; 2562 } 2563 2564 if (!isUInt<32>(RegHi)) { 2565 Error(SecondIdxLoc, "invalid register index"); 2566 return false; 2567 } 2568 2569 if (RegLo > RegHi) { 2570 Error(FirstIdxLoc, "first register index should not exceed second index"); 2571 return false; 2572 } 2573 2574 Num = static_cast<unsigned>(RegLo); 2575 Width = (RegHi - RegLo) + 1; 2576 return true; 2577 } 2578 2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2580 unsigned &RegNum, unsigned &RegWidth, 2581 SmallVectorImpl<AsmToken> &Tokens) { 2582 assert(isToken(AsmToken::Identifier)); 2583 unsigned Reg = getSpecialRegForName(getTokenStr()); 2584 if (Reg) { 2585 RegNum = 0; 2586 RegWidth = 1; 2587 RegKind = IS_SPECIAL; 2588 Tokens.push_back(getToken()); 2589 lex(); // skip register name 2590 } 2591 return Reg; 2592 } 2593 2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2595 unsigned &RegNum, unsigned &RegWidth, 2596 SmallVectorImpl<AsmToken> &Tokens) { 2597 assert(isToken(AsmToken::Identifier)); 2598 StringRef RegName = getTokenStr(); 2599 auto Loc = getLoc(); 2600 2601 const RegInfo *RI = getRegularRegInfo(RegName); 2602 if (!RI) { 2603 Error(Loc, "invalid register name"); 2604 return AMDGPU::NoRegister; 2605 } 2606 2607 Tokens.push_back(getToken()); 2608 lex(); // skip register name 2609 2610 RegKind = RI->Kind; 2611 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2612 if (!RegSuffix.empty()) { 2613 // Single 32-bit register: vXX. 2614 if (!getRegNum(RegSuffix, RegNum)) { 2615 Error(Loc, "invalid register index"); 2616 return AMDGPU::NoRegister; 2617 } 2618 RegWidth = 1; 2619 } else { 2620 // Range of registers: v[XX:YY]. ":YY" is optional. 2621 if (!ParseRegRange(RegNum, RegWidth)) 2622 return AMDGPU::NoRegister; 2623 } 2624 2625 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2626 } 2627 2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2629 unsigned &RegWidth, 2630 SmallVectorImpl<AsmToken> &Tokens) { 2631 unsigned Reg = AMDGPU::NoRegister; 2632 auto ListLoc = getLoc(); 2633 2634 if (!skipToken(AsmToken::LBrac, 2635 "expected a register or a list of registers")) { 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2640 2641 auto Loc = getLoc(); 2642 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2643 return AMDGPU::NoRegister; 2644 if (RegWidth != 1) { 2645 Error(Loc, "expected a single 32-bit register"); 2646 return AMDGPU::NoRegister; 2647 } 2648 2649 for (; trySkipToken(AsmToken::Comma); ) { 2650 RegisterKind NextRegKind; 2651 unsigned NextReg, NextRegNum, NextRegWidth; 2652 Loc = getLoc(); 2653 2654 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2655 NextRegNum, NextRegWidth, 2656 Tokens)) { 2657 return AMDGPU::NoRegister; 2658 } 2659 if (NextRegWidth != 1) { 2660 Error(Loc, "expected a single 32-bit register"); 2661 return AMDGPU::NoRegister; 2662 } 2663 if (NextRegKind != RegKind) { 2664 Error(Loc, "registers in a list must be of the same kind"); 2665 return AMDGPU::NoRegister; 2666 } 2667 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 if (!skipToken(AsmToken::RBrac, 2672 "expected a comma or a closing square bracket")) { 2673 return AMDGPU::NoRegister; 2674 } 2675 2676 if (isRegularReg(RegKind)) 2677 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2678 2679 return Reg; 2680 } 2681 2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2683 unsigned &RegNum, unsigned &RegWidth, 2684 SmallVectorImpl<AsmToken> &Tokens) { 2685 auto Loc = getLoc(); 2686 Reg = AMDGPU::NoRegister; 2687 2688 if (isToken(AsmToken::Identifier)) { 2689 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2690 if (Reg == AMDGPU::NoRegister) 2691 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2692 } else { 2693 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2694 } 2695 2696 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2697 if (Reg == AMDGPU::NoRegister) { 2698 assert(Parser.hasPendingError()); 2699 return false; 2700 } 2701 2702 if (!subtargetHasRegister(*TRI, Reg)) { 2703 if (Reg == AMDGPU::SGPR_NULL) { 2704 Error(Loc, "'null' operand is not supported on this GPU"); 2705 } else { 2706 Error(Loc, "register not available on this GPU"); 2707 } 2708 return false; 2709 } 2710 2711 return true; 2712 } 2713 2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2715 unsigned &RegNum, unsigned &RegWidth, 2716 bool RestoreOnFailure /*=false*/) { 2717 Reg = AMDGPU::NoRegister; 2718 2719 SmallVector<AsmToken, 1> Tokens; 2720 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2721 if (RestoreOnFailure) { 2722 while (!Tokens.empty()) { 2723 getLexer().UnLex(Tokens.pop_back_val()); 2724 } 2725 } 2726 return true; 2727 } 2728 return false; 2729 } 2730 2731 Optional<StringRef> 2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2733 switch (RegKind) { 2734 case IS_VGPR: 2735 return StringRef(".amdgcn.next_free_vgpr"); 2736 case IS_SGPR: 2737 return StringRef(".amdgcn.next_free_sgpr"); 2738 default: 2739 return None; 2740 } 2741 } 2742 2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2744 auto SymbolName = getGprCountSymbolName(RegKind); 2745 assert(SymbolName && "initializing invalid register kind"); 2746 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2747 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2748 } 2749 2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2751 unsigned DwordRegIndex, 2752 unsigned RegWidth) { 2753 // Symbols are only defined for GCN targets 2754 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2755 return true; 2756 2757 auto SymbolName = getGprCountSymbolName(RegKind); 2758 if (!SymbolName) 2759 return true; 2760 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2761 2762 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2763 int64_t OldCount; 2764 2765 if (!Sym->isVariable()) 2766 return !Error(getLoc(), 2767 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2768 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2769 return !Error( 2770 getLoc(), 2771 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2772 2773 if (OldCount <= NewMax) 2774 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2775 2776 return true; 2777 } 2778 2779 std::unique_ptr<AMDGPUOperand> 2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2781 const auto &Tok = getToken(); 2782 SMLoc StartLoc = Tok.getLoc(); 2783 SMLoc EndLoc = Tok.getEndLoc(); 2784 RegisterKind RegKind; 2785 unsigned Reg, RegNum, RegWidth; 2786 2787 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2788 return nullptr; 2789 } 2790 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2791 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2792 return nullptr; 2793 } else 2794 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2795 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2800 // TODO: add syntactic sugar for 1/(2*PI) 2801 2802 assert(!isRegister()); 2803 assert(!isModifier()); 2804 2805 const auto& Tok = getToken(); 2806 const auto& NextTok = peekToken(); 2807 bool IsReal = Tok.is(AsmToken::Real); 2808 SMLoc S = getLoc(); 2809 bool Negate = false; 2810 2811 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2812 lex(); 2813 IsReal = true; 2814 Negate = true; 2815 } 2816 2817 if (IsReal) { 2818 // Floating-point expressions are not supported. 2819 // Can only allow floating-point literals with an 2820 // optional sign. 2821 2822 StringRef Num = getTokenStr(); 2823 lex(); 2824 2825 APFloat RealVal(APFloat::IEEEdouble()); 2826 auto roundMode = APFloat::rmNearestTiesToEven; 2827 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2828 return MatchOperand_ParseFail; 2829 } 2830 if (Negate) 2831 RealVal.changeSign(); 2832 2833 Operands.push_back( 2834 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2835 AMDGPUOperand::ImmTyNone, true)); 2836 2837 return MatchOperand_Success; 2838 2839 } else { 2840 int64_t IntVal; 2841 const MCExpr *Expr; 2842 SMLoc S = getLoc(); 2843 2844 if (HasSP3AbsModifier) { 2845 // This is a workaround for handling expressions 2846 // as arguments of SP3 'abs' modifier, for example: 2847 // |1.0| 2848 // |-1| 2849 // |1+x| 2850 // This syntax is not compatible with syntax of standard 2851 // MC expressions (due to the trailing '|'). 2852 SMLoc EndLoc; 2853 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2854 return MatchOperand_ParseFail; 2855 } else { 2856 if (Parser.parseExpression(Expr)) 2857 return MatchOperand_ParseFail; 2858 } 2859 2860 if (Expr->evaluateAsAbsolute(IntVal)) { 2861 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2862 } else { 2863 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2864 } 2865 2866 return MatchOperand_Success; 2867 } 2868 2869 return MatchOperand_NoMatch; 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2874 if (!isRegister()) 2875 return MatchOperand_NoMatch; 2876 2877 if (auto R = parseRegister()) { 2878 assert(R->isReg()); 2879 Operands.push_back(std::move(R)); 2880 return MatchOperand_Success; 2881 } 2882 return MatchOperand_ParseFail; 2883 } 2884 2885 OperandMatchResultTy 2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2887 auto res = parseReg(Operands); 2888 if (res != MatchOperand_NoMatch) { 2889 return res; 2890 } else if (isModifier()) { 2891 return MatchOperand_NoMatch; 2892 } else { 2893 return parseImm(Operands, HasSP3AbsMod); 2894 } 2895 } 2896 2897 bool 2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2899 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2900 const auto &str = Token.getString(); 2901 return str == "abs" || str == "neg" || str == "sext"; 2902 } 2903 return false; 2904 } 2905 2906 bool 2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2908 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2909 } 2910 2911 bool 2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2913 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2914 } 2915 2916 bool 2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2918 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2919 } 2920 2921 // Check if this is an operand modifier or an opcode modifier 2922 // which may look like an expression but it is not. We should 2923 // avoid parsing these modifiers as expressions. Currently 2924 // recognized sequences are: 2925 // |...| 2926 // abs(...) 2927 // neg(...) 2928 // sext(...) 2929 // -reg 2930 // -|...| 2931 // -abs(...) 2932 // name:... 2933 // Note that simple opcode modifiers like 'gds' may be parsed as 2934 // expressions; this is a special case. See getExpressionAsToken. 2935 // 2936 bool 2937 AMDGPUAsmParser::isModifier() { 2938 2939 AsmToken Tok = getToken(); 2940 AsmToken NextToken[2]; 2941 peekTokens(NextToken); 2942 2943 return isOperandModifier(Tok, NextToken[0]) || 2944 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2945 isOpcodeModifierWithVal(Tok, NextToken[0]); 2946 } 2947 2948 // Check if the current token is an SP3 'neg' modifier. 2949 // Currently this modifier is allowed in the following context: 2950 // 2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2952 // 2. Before an 'abs' modifier: -abs(...) 2953 // 3. Before an SP3 'abs' modifier: -|...| 2954 // 2955 // In all other cases "-" is handled as a part 2956 // of an expression that follows the sign. 2957 // 2958 // Note: When "-" is followed by an integer literal, 2959 // this is interpreted as integer negation rather 2960 // than a floating-point NEG modifier applied to N. 2961 // Beside being contr-intuitive, such use of floating-point 2962 // NEG modifier would have resulted in different meaning 2963 // of integer literals used with VOP1/2/C and VOP3, 2964 // for example: 2965 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2966 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2967 // Negative fp literals with preceding "-" are 2968 // handled likewise for uniformity 2969 // 2970 bool 2971 AMDGPUAsmParser::parseSP3NegModifier() { 2972 2973 AsmToken NextToken[2]; 2974 peekTokens(NextToken); 2975 2976 if (isToken(AsmToken::Minus) && 2977 (isRegister(NextToken[0], NextToken[1]) || 2978 NextToken[0].is(AsmToken::Pipe) || 2979 isId(NextToken[0], "abs"))) { 2980 lex(); 2981 return true; 2982 } 2983 2984 return false; 2985 } 2986 2987 OperandMatchResultTy 2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2989 bool AllowImm) { 2990 bool Neg, SP3Neg; 2991 bool Abs, SP3Abs; 2992 SMLoc Loc; 2993 2994 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2995 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2996 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2997 return MatchOperand_ParseFail; 2998 } 2999 3000 SP3Neg = parseSP3NegModifier(); 3001 3002 Loc = getLoc(); 3003 Neg = trySkipId("neg"); 3004 if (Neg && SP3Neg) { 3005 Error(Loc, "expected register or immediate"); 3006 return MatchOperand_ParseFail; 3007 } 3008 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3009 return MatchOperand_ParseFail; 3010 3011 Abs = trySkipId("abs"); 3012 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3013 return MatchOperand_ParseFail; 3014 3015 Loc = getLoc(); 3016 SP3Abs = trySkipToken(AsmToken::Pipe); 3017 if (Abs && SP3Abs) { 3018 Error(Loc, "expected register or immediate"); 3019 return MatchOperand_ParseFail; 3020 } 3021 3022 OperandMatchResultTy Res; 3023 if (AllowImm) { 3024 Res = parseRegOrImm(Operands, SP3Abs); 3025 } else { 3026 Res = parseReg(Operands); 3027 } 3028 if (Res != MatchOperand_Success) { 3029 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3030 } 3031 3032 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3033 return MatchOperand_ParseFail; 3034 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3035 return MatchOperand_ParseFail; 3036 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3037 return MatchOperand_ParseFail; 3038 3039 AMDGPUOperand::Modifiers Mods; 3040 Mods.Abs = Abs || SP3Abs; 3041 Mods.Neg = Neg || SP3Neg; 3042 3043 if (Mods.hasFPModifiers()) { 3044 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3045 if (Op.isExpr()) { 3046 Error(Op.getStartLoc(), "expected an absolute expression"); 3047 return MatchOperand_ParseFail; 3048 } 3049 Op.setModifiers(Mods); 3050 } 3051 return MatchOperand_Success; 3052 } 3053 3054 OperandMatchResultTy 3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3056 bool AllowImm) { 3057 bool Sext = trySkipId("sext"); 3058 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3059 return MatchOperand_ParseFail; 3060 3061 OperandMatchResultTy Res; 3062 if (AllowImm) { 3063 Res = parseRegOrImm(Operands); 3064 } else { 3065 Res = parseReg(Operands); 3066 } 3067 if (Res != MatchOperand_Success) { 3068 return Sext? MatchOperand_ParseFail : Res; 3069 } 3070 3071 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3072 return MatchOperand_ParseFail; 3073 3074 AMDGPUOperand::Modifiers Mods; 3075 Mods.Sext = Sext; 3076 3077 if (Mods.hasIntModifiers()) { 3078 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3079 if (Op.isExpr()) { 3080 Error(Op.getStartLoc(), "expected an absolute expression"); 3081 return MatchOperand_ParseFail; 3082 } 3083 Op.setModifiers(Mods); 3084 } 3085 3086 return MatchOperand_Success; 3087 } 3088 3089 OperandMatchResultTy 3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3091 return parseRegOrImmWithFPInputMods(Operands, false); 3092 } 3093 3094 OperandMatchResultTy 3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3096 return parseRegOrImmWithIntInputMods(Operands, false); 3097 } 3098 3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3100 auto Loc = getLoc(); 3101 if (trySkipId("off")) { 3102 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3103 AMDGPUOperand::ImmTyOff, false)); 3104 return MatchOperand_Success; 3105 } 3106 3107 if (!isRegister()) 3108 return MatchOperand_NoMatch; 3109 3110 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3111 if (Reg) { 3112 Operands.push_back(std::move(Reg)); 3113 return MatchOperand_Success; 3114 } 3115 3116 return MatchOperand_ParseFail; 3117 3118 } 3119 3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3122 3123 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3124 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3125 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3126 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3127 return Match_InvalidOperand; 3128 3129 if ((TSFlags & SIInstrFlags::VOP3) && 3130 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3131 getForcedEncodingSize() != 64) 3132 return Match_PreferE32; 3133 3134 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3135 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3136 // v_mac_f32/16 allow only dst_sel == DWORD; 3137 auto OpNum = 3138 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3139 const auto &Op = Inst.getOperand(OpNum); 3140 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3141 return Match_InvalidOperand; 3142 } 3143 } 3144 3145 return Match_Success; 3146 } 3147 3148 static ArrayRef<unsigned> getAllVariants() { 3149 static const unsigned Variants[] = { 3150 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3151 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3152 }; 3153 3154 return makeArrayRef(Variants); 3155 } 3156 3157 // What asm variants we should check 3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3159 if (getForcedEncodingSize() == 32) { 3160 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3161 return makeArrayRef(Variants); 3162 } 3163 3164 if (isForcedVOP3()) { 3165 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3166 return makeArrayRef(Variants); 3167 } 3168 3169 if (isForcedSDWA()) { 3170 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3171 AMDGPUAsmVariants::SDWA9}; 3172 return makeArrayRef(Variants); 3173 } 3174 3175 if (isForcedDPP()) { 3176 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3177 return makeArrayRef(Variants); 3178 } 3179 3180 return getAllVariants(); 3181 } 3182 3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3184 if (getForcedEncodingSize() == 32) 3185 return "e32"; 3186 3187 if (isForcedVOP3()) 3188 return "e64"; 3189 3190 if (isForcedSDWA()) 3191 return "sdwa"; 3192 3193 if (isForcedDPP()) 3194 return "dpp"; 3195 3196 return ""; 3197 } 3198 3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3200 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3201 const unsigned Num = Desc.getNumImplicitUses(); 3202 for (unsigned i = 0; i < Num; ++i) { 3203 unsigned Reg = Desc.ImplicitUses[i]; 3204 switch (Reg) { 3205 case AMDGPU::FLAT_SCR: 3206 case AMDGPU::VCC: 3207 case AMDGPU::VCC_LO: 3208 case AMDGPU::VCC_HI: 3209 case AMDGPU::M0: 3210 return Reg; 3211 default: 3212 break; 3213 } 3214 } 3215 return AMDGPU::NoRegister; 3216 } 3217 3218 // NB: This code is correct only when used to check constant 3219 // bus limitations because GFX7 support no f16 inline constants. 3220 // Note that there are no cases when a GFX7 opcode violates 3221 // constant bus limitations due to the use of an f16 constant. 3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3223 unsigned OpIdx) const { 3224 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3225 3226 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3227 return false; 3228 } 3229 3230 const MCOperand &MO = Inst.getOperand(OpIdx); 3231 3232 int64_t Val = MO.getImm(); 3233 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3234 3235 switch (OpSize) { // expected operand size 3236 case 8: 3237 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3238 case 4: 3239 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3240 case 2: { 3241 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3242 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3243 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3245 return AMDGPU::isInlinableIntLiteral(Val); 3246 3247 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3248 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3249 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3250 return AMDGPU::isInlinableIntLiteralV216(Val); 3251 3252 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3253 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3254 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3255 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3256 3257 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3258 } 3259 default: 3260 llvm_unreachable("invalid operand size"); 3261 } 3262 } 3263 3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3265 if (!isGFX10Plus()) 3266 return 1; 3267 3268 switch (Opcode) { 3269 // 64-bit shift instructions can use only one scalar value input 3270 case AMDGPU::V_LSHLREV_B64_e64: 3271 case AMDGPU::V_LSHLREV_B64_gfx10: 3272 case AMDGPU::V_LSHRREV_B64_e64: 3273 case AMDGPU::V_LSHRREV_B64_gfx10: 3274 case AMDGPU::V_ASHRREV_I64_e64: 3275 case AMDGPU::V_ASHRREV_I64_gfx10: 3276 case AMDGPU::V_LSHL_B64_e64: 3277 case AMDGPU::V_LSHR_B64_e64: 3278 case AMDGPU::V_ASHR_I64_e64: 3279 return 1; 3280 default: 3281 return 2; 3282 } 3283 } 3284 3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3286 const MCOperand &MO = Inst.getOperand(OpIdx); 3287 if (MO.isImm()) { 3288 return !isInlineConstant(Inst, OpIdx); 3289 } else if (MO.isReg()) { 3290 auto Reg = MO.getReg(); 3291 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3292 auto PReg = mc2PseudoReg(Reg); 3293 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3294 } else { 3295 return true; 3296 } 3297 } 3298 3299 bool 3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3301 const OperandVector &Operands) { 3302 const unsigned Opcode = Inst.getOpcode(); 3303 const MCInstrDesc &Desc = MII.get(Opcode); 3304 unsigned LastSGPR = AMDGPU::NoRegister; 3305 unsigned ConstantBusUseCount = 0; 3306 unsigned NumLiterals = 0; 3307 unsigned LiteralSize; 3308 3309 if (Desc.TSFlags & 3310 (SIInstrFlags::VOPC | 3311 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3312 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3313 SIInstrFlags::SDWA)) { 3314 // Check special imm operands (used by madmk, etc) 3315 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3316 ++NumLiterals; 3317 LiteralSize = 4; 3318 } 3319 3320 SmallDenseSet<unsigned> SGPRsUsed; 3321 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3322 if (SGPRUsed != AMDGPU::NoRegister) { 3323 SGPRsUsed.insert(SGPRUsed); 3324 ++ConstantBusUseCount; 3325 } 3326 3327 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3328 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3329 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3330 3331 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3332 3333 for (int OpIdx : OpIndices) { 3334 if (OpIdx == -1) break; 3335 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (usesConstantBus(Inst, OpIdx)) { 3338 if (MO.isReg()) { 3339 LastSGPR = mc2PseudoReg(MO.getReg()); 3340 // Pairs of registers with a partial intersections like these 3341 // s0, s[0:1] 3342 // flat_scratch_lo, flat_scratch 3343 // flat_scratch_lo, flat_scratch_hi 3344 // are theoretically valid but they are disabled anyway. 3345 // Note that this code mimics SIInstrInfo::verifyInstruction 3346 if (!SGPRsUsed.count(LastSGPR)) { 3347 SGPRsUsed.insert(LastSGPR); 3348 ++ConstantBusUseCount; 3349 } 3350 } else { // Expression or a literal 3351 3352 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3353 continue; // special operand like VINTERP attr_chan 3354 3355 // An instruction may use only one literal. 3356 // This has been validated on the previous step. 3357 // See validateVOPLiteral. 3358 // This literal may be used as more than one operand. 3359 // If all these operands are of the same size, 3360 // this literal counts as one scalar value. 3361 // Otherwise it counts as 2 scalar values. 3362 // See "GFX10 Shader Programming", section 3.6.2.3. 3363 3364 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3365 if (Size < 4) Size = 4; 3366 3367 if (NumLiterals == 0) { 3368 NumLiterals = 1; 3369 LiteralSize = Size; 3370 } else if (LiteralSize != Size) { 3371 NumLiterals = 2; 3372 } 3373 } 3374 } 3375 } 3376 } 3377 ConstantBusUseCount += NumLiterals; 3378 3379 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3380 return true; 3381 3382 SMLoc LitLoc = getLitLoc(Operands); 3383 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3384 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3385 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3386 return false; 3387 } 3388 3389 bool 3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3391 const OperandVector &Operands) { 3392 const unsigned Opcode = Inst.getOpcode(); 3393 const MCInstrDesc &Desc = MII.get(Opcode); 3394 3395 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3396 if (DstIdx == -1 || 3397 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3398 return true; 3399 } 3400 3401 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3402 3403 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3404 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3405 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3406 3407 assert(DstIdx != -1); 3408 const MCOperand &Dst = Inst.getOperand(DstIdx); 3409 assert(Dst.isReg()); 3410 3411 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3412 3413 for (int SrcIdx : SrcIndices) { 3414 if (SrcIdx == -1) break; 3415 const MCOperand &Src = Inst.getOperand(SrcIdx); 3416 if (Src.isReg()) { 3417 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3418 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3419 Error(getRegLoc(SrcReg, Operands), 3420 "destination must be different than all sources"); 3421 return false; 3422 } 3423 } 3424 } 3425 3426 return true; 3427 } 3428 3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3430 3431 const unsigned Opc = Inst.getOpcode(); 3432 const MCInstrDesc &Desc = MII.get(Opc); 3433 3434 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3435 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3436 assert(ClampIdx != -1); 3437 return Inst.getOperand(ClampIdx).getImm() == 0; 3438 } 3439 3440 return true; 3441 } 3442 3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3444 3445 const unsigned Opc = Inst.getOpcode(); 3446 const MCInstrDesc &Desc = MII.get(Opc); 3447 3448 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3449 return true; 3450 3451 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3452 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3453 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3454 3455 assert(VDataIdx != -1); 3456 3457 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3458 return true; 3459 3460 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3461 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3462 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3463 if (DMask == 0) 3464 DMask = 1; 3465 3466 unsigned DataSize = 3467 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3468 if (hasPackedD16()) { 3469 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3470 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3471 DataSize = (DataSize + 1) / 2; 3472 } 3473 3474 return (VDataSize / 4) == DataSize + TFESize; 3475 } 3476 3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3478 const unsigned Opc = Inst.getOpcode(); 3479 const MCInstrDesc &Desc = MII.get(Opc); 3480 3481 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3482 return true; 3483 3484 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3485 3486 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3487 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3488 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3489 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3490 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3491 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3492 3493 assert(VAddr0Idx != -1); 3494 assert(SrsrcIdx != -1); 3495 assert(SrsrcIdx > VAddr0Idx); 3496 3497 if (DimIdx == -1) 3498 return true; // intersect_ray 3499 3500 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3501 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3502 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3503 unsigned ActualAddrSize = 3504 IsNSA ? SrsrcIdx - VAddr0Idx 3505 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3506 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3507 3508 unsigned ExpectedAddrSize = 3509 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3510 3511 if (!IsNSA) { 3512 if (ExpectedAddrSize > 8) 3513 ExpectedAddrSize = 16; 3514 3515 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3516 // This provides backward compatibility for assembly created 3517 // before 160b/192b/224b types were directly supported. 3518 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3519 return true; 3520 } 3521 3522 return ActualAddrSize == ExpectedAddrSize; 3523 } 3524 3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3526 3527 const unsigned Opc = Inst.getOpcode(); 3528 const MCInstrDesc &Desc = MII.get(Opc); 3529 3530 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3531 return true; 3532 if (!Desc.mayLoad() || !Desc.mayStore()) 3533 return true; // Not atomic 3534 3535 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3536 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3537 3538 // This is an incomplete check because image_atomic_cmpswap 3539 // may only use 0x3 and 0xf while other atomic operations 3540 // may use 0x1 and 0x3. However these limitations are 3541 // verified when we check that dmask matches dst size. 3542 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3543 } 3544 3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3546 3547 const unsigned Opc = Inst.getOpcode(); 3548 const MCInstrDesc &Desc = MII.get(Opc); 3549 3550 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3551 return true; 3552 3553 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3554 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3555 3556 // GATHER4 instructions use dmask in a different fashion compared to 3557 // other MIMG instructions. The only useful DMASK values are 3558 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3559 // (red,red,red,red) etc.) The ISA document doesn't mention 3560 // this. 3561 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3562 } 3563 3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return true; 3570 3571 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3572 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3573 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3574 3575 if (!BaseOpcode->MSAA) 3576 return true; 3577 3578 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3579 assert(DimIdx != -1); 3580 3581 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3582 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3583 3584 return DimInfo->MSAA; 3585 } 3586 3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3588 { 3589 switch (Opcode) { 3590 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3591 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3592 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3593 return true; 3594 default: 3595 return false; 3596 } 3597 } 3598 3599 // movrels* opcodes should only allow VGPRS as src0. 3600 // This is specified in .td description for vop1/vop3, 3601 // but sdwa is handled differently. See isSDWAOperand. 3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3603 const OperandVector &Operands) { 3604 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3609 return true; 3610 3611 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3612 assert(Src0Idx != -1); 3613 3614 SMLoc ErrLoc; 3615 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3616 if (Src0.isReg()) { 3617 auto Reg = mc2PseudoReg(Src0.getReg()); 3618 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3619 if (!isSGPR(Reg, TRI)) 3620 return true; 3621 ErrLoc = getRegLoc(Reg, Operands); 3622 } else { 3623 ErrLoc = getConstLoc(Operands); 3624 } 3625 3626 Error(ErrLoc, "source operand must be a VGPR"); 3627 return false; 3628 } 3629 3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3631 const OperandVector &Operands) { 3632 3633 const unsigned Opc = Inst.getOpcode(); 3634 3635 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3636 return true; 3637 3638 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3639 assert(Src0Idx != -1); 3640 3641 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3642 if (!Src0.isReg()) 3643 return true; 3644 3645 auto Reg = mc2PseudoReg(Src0.getReg()); 3646 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3647 if (isSGPR(Reg, TRI)) { 3648 Error(getRegLoc(Reg, Operands), 3649 "source operand must be either a VGPR or an inline constant"); 3650 return false; 3651 } 3652 3653 return true; 3654 } 3655 3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3657 const OperandVector &Operands) { 3658 const unsigned Opc = Inst.getOpcode(); 3659 const MCInstrDesc &Desc = MII.get(Opc); 3660 3661 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3662 return true; 3663 3664 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3665 if (Src2Idx == -1) 3666 return true; 3667 3668 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3669 if (!Src2.isReg()) 3670 return true; 3671 3672 MCRegister Src2Reg = Src2.getReg(); 3673 MCRegister DstReg = Inst.getOperand(0).getReg(); 3674 if (Src2Reg == DstReg) 3675 return true; 3676 3677 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3678 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3679 return true; 3680 3681 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3682 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3683 "source 2 operand must not partially overlap with dst"); 3684 return false; 3685 } 3686 3687 return true; 3688 } 3689 3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3691 switch (Inst.getOpcode()) { 3692 default: 3693 return true; 3694 case V_DIV_SCALE_F32_gfx6_gfx7: 3695 case V_DIV_SCALE_F32_vi: 3696 case V_DIV_SCALE_F32_gfx10: 3697 case V_DIV_SCALE_F64_gfx6_gfx7: 3698 case V_DIV_SCALE_F64_vi: 3699 case V_DIV_SCALE_F64_gfx10: 3700 break; 3701 } 3702 3703 // TODO: Check that src0 = src1 or src2. 3704 3705 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3706 AMDGPU::OpName::src2_modifiers, 3707 AMDGPU::OpName::src2_modifiers}) { 3708 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3709 .getImm() & 3710 SISrcMods::ABS) { 3711 return false; 3712 } 3713 } 3714 3715 return true; 3716 } 3717 3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3719 3720 const unsigned Opc = Inst.getOpcode(); 3721 const MCInstrDesc &Desc = MII.get(Opc); 3722 3723 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3724 return true; 3725 3726 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3727 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3728 if (isCI() || isSI()) 3729 return false; 3730 } 3731 3732 return true; 3733 } 3734 3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3736 const unsigned Opc = Inst.getOpcode(); 3737 const MCInstrDesc &Desc = MII.get(Opc); 3738 3739 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3740 return true; 3741 3742 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3743 if (DimIdx < 0) 3744 return true; 3745 3746 long Imm = Inst.getOperand(DimIdx).getImm(); 3747 if (Imm < 0 || Imm >= 8) 3748 return false; 3749 3750 return true; 3751 } 3752 3753 static bool IsRevOpcode(const unsigned Opcode) 3754 { 3755 switch (Opcode) { 3756 case AMDGPU::V_SUBREV_F32_e32: 3757 case AMDGPU::V_SUBREV_F32_e64: 3758 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3759 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3760 case AMDGPU::V_SUBREV_F32_e32_vi: 3761 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3762 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3763 case AMDGPU::V_SUBREV_F32_e64_vi: 3764 3765 case AMDGPU::V_SUBREV_CO_U32_e32: 3766 case AMDGPU::V_SUBREV_CO_U32_e64: 3767 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3768 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3769 3770 case AMDGPU::V_SUBBREV_U32_e32: 3771 case AMDGPU::V_SUBBREV_U32_e64: 3772 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3773 case AMDGPU::V_SUBBREV_U32_e32_vi: 3774 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3775 case AMDGPU::V_SUBBREV_U32_e64_vi: 3776 3777 case AMDGPU::V_SUBREV_U32_e32: 3778 case AMDGPU::V_SUBREV_U32_e64: 3779 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3780 case AMDGPU::V_SUBREV_U32_e32_vi: 3781 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3782 case AMDGPU::V_SUBREV_U32_e64_vi: 3783 3784 case AMDGPU::V_SUBREV_F16_e32: 3785 case AMDGPU::V_SUBREV_F16_e64: 3786 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3787 case AMDGPU::V_SUBREV_F16_e32_vi: 3788 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3789 case AMDGPU::V_SUBREV_F16_e64_vi: 3790 3791 case AMDGPU::V_SUBREV_U16_e32: 3792 case AMDGPU::V_SUBREV_U16_e64: 3793 case AMDGPU::V_SUBREV_U16_e32_vi: 3794 case AMDGPU::V_SUBREV_U16_e64_vi: 3795 3796 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3797 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3798 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3799 3800 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3801 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3802 3803 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3804 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3805 3806 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3807 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3808 3809 case AMDGPU::V_LSHRREV_B32_e32: 3810 case AMDGPU::V_LSHRREV_B32_e64: 3811 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3812 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3813 case AMDGPU::V_LSHRREV_B32_e32_vi: 3814 case AMDGPU::V_LSHRREV_B32_e64_vi: 3815 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3816 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3817 3818 case AMDGPU::V_ASHRREV_I32_e32: 3819 case AMDGPU::V_ASHRREV_I32_e64: 3820 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3821 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3822 case AMDGPU::V_ASHRREV_I32_e32_vi: 3823 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3824 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3825 case AMDGPU::V_ASHRREV_I32_e64_vi: 3826 3827 case AMDGPU::V_LSHLREV_B32_e32: 3828 case AMDGPU::V_LSHLREV_B32_e64: 3829 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3830 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3831 case AMDGPU::V_LSHLREV_B32_e32_vi: 3832 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3833 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3834 case AMDGPU::V_LSHLREV_B32_e64_vi: 3835 3836 case AMDGPU::V_LSHLREV_B16_e32: 3837 case AMDGPU::V_LSHLREV_B16_e64: 3838 case AMDGPU::V_LSHLREV_B16_e32_vi: 3839 case AMDGPU::V_LSHLREV_B16_e64_vi: 3840 case AMDGPU::V_LSHLREV_B16_gfx10: 3841 3842 case AMDGPU::V_LSHRREV_B16_e32: 3843 case AMDGPU::V_LSHRREV_B16_e64: 3844 case AMDGPU::V_LSHRREV_B16_e32_vi: 3845 case AMDGPU::V_LSHRREV_B16_e64_vi: 3846 case AMDGPU::V_LSHRREV_B16_gfx10: 3847 3848 case AMDGPU::V_ASHRREV_I16_e32: 3849 case AMDGPU::V_ASHRREV_I16_e64: 3850 case AMDGPU::V_ASHRREV_I16_e32_vi: 3851 case AMDGPU::V_ASHRREV_I16_e64_vi: 3852 case AMDGPU::V_ASHRREV_I16_gfx10: 3853 3854 case AMDGPU::V_LSHLREV_B64_e64: 3855 case AMDGPU::V_LSHLREV_B64_gfx10: 3856 case AMDGPU::V_LSHLREV_B64_vi: 3857 3858 case AMDGPU::V_LSHRREV_B64_e64: 3859 case AMDGPU::V_LSHRREV_B64_gfx10: 3860 case AMDGPU::V_LSHRREV_B64_vi: 3861 3862 case AMDGPU::V_ASHRREV_I64_e64: 3863 case AMDGPU::V_ASHRREV_I64_gfx10: 3864 case AMDGPU::V_ASHRREV_I64_vi: 3865 3866 case AMDGPU::V_PK_LSHLREV_B16: 3867 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3868 case AMDGPU::V_PK_LSHLREV_B16_vi: 3869 3870 case AMDGPU::V_PK_LSHRREV_B16: 3871 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3872 case AMDGPU::V_PK_LSHRREV_B16_vi: 3873 case AMDGPU::V_PK_ASHRREV_I16: 3874 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3875 case AMDGPU::V_PK_ASHRREV_I16_vi: 3876 return true; 3877 default: 3878 return false; 3879 } 3880 } 3881 3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3883 3884 using namespace SIInstrFlags; 3885 const unsigned Opcode = Inst.getOpcode(); 3886 const MCInstrDesc &Desc = MII.get(Opcode); 3887 3888 // lds_direct register is defined so that it can be used 3889 // with 9-bit operands only. Ignore encodings which do not accept these. 3890 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3891 if ((Desc.TSFlags & Enc) == 0) 3892 return None; 3893 3894 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3895 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3896 if (SrcIdx == -1) 3897 break; 3898 const auto &Src = Inst.getOperand(SrcIdx); 3899 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3900 3901 if (isGFX90A()) 3902 return StringRef("lds_direct is not supported on this GPU"); 3903 3904 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3905 return StringRef("lds_direct cannot be used with this instruction"); 3906 3907 if (SrcName != OpName::src0) 3908 return StringRef("lds_direct may be used as src0 only"); 3909 } 3910 } 3911 3912 return None; 3913 } 3914 3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3916 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3917 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3918 if (Op.isFlatOffset()) 3919 return Op.getStartLoc(); 3920 } 3921 return getLoc(); 3922 } 3923 3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3925 const OperandVector &Operands) { 3926 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3927 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3928 return true; 3929 3930 auto Opcode = Inst.getOpcode(); 3931 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3932 assert(OpNum != -1); 3933 3934 const auto &Op = Inst.getOperand(OpNum); 3935 if (!hasFlatOffsets() && Op.getImm() != 0) { 3936 Error(getFlatOffsetLoc(Operands), 3937 "flat offset modifier is not supported on this GPU"); 3938 return false; 3939 } 3940 3941 // For FLAT segment the offset must be positive; 3942 // MSB is ignored and forced to zero. 3943 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3944 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3945 if (!isIntN(OffsetSize, Op.getImm())) { 3946 Error(getFlatOffsetLoc(Operands), 3947 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3948 return false; 3949 } 3950 } else { 3951 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3952 if (!isUIntN(OffsetSize, Op.getImm())) { 3953 Error(getFlatOffsetLoc(Operands), 3954 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3955 return false; 3956 } 3957 } 3958 3959 return true; 3960 } 3961 3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3963 // Start with second operand because SMEM Offset cannot be dst or src0. 3964 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3965 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3966 if (Op.isSMEMOffset()) 3967 return Op.getStartLoc(); 3968 } 3969 return getLoc(); 3970 } 3971 3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3973 const OperandVector &Operands) { 3974 if (isCI() || isSI()) 3975 return true; 3976 3977 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3978 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3979 return true; 3980 3981 auto Opcode = Inst.getOpcode(); 3982 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3983 if (OpNum == -1) 3984 return true; 3985 3986 const auto &Op = Inst.getOperand(OpNum); 3987 if (!Op.isImm()) 3988 return true; 3989 3990 uint64_t Offset = Op.getImm(); 3991 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3992 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3993 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3994 return true; 3995 3996 Error(getSMEMOffsetLoc(Operands), 3997 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3998 "expected a 21-bit signed offset"); 3999 4000 return false; 4001 } 4002 4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4004 unsigned Opcode = Inst.getOpcode(); 4005 const MCInstrDesc &Desc = MII.get(Opcode); 4006 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4007 return true; 4008 4009 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4010 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4011 4012 const int OpIndices[] = { Src0Idx, Src1Idx }; 4013 4014 unsigned NumExprs = 0; 4015 unsigned NumLiterals = 0; 4016 uint32_t LiteralValue; 4017 4018 for (int OpIdx : OpIndices) { 4019 if (OpIdx == -1) break; 4020 4021 const MCOperand &MO = Inst.getOperand(OpIdx); 4022 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4023 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4024 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4025 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4026 if (NumLiterals == 0 || LiteralValue != Value) { 4027 LiteralValue = Value; 4028 ++NumLiterals; 4029 } 4030 } else if (MO.isExpr()) { 4031 ++NumExprs; 4032 } 4033 } 4034 } 4035 4036 return NumLiterals + NumExprs <= 1; 4037 } 4038 4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4040 const unsigned Opc = Inst.getOpcode(); 4041 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4042 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4043 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4044 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4045 4046 if (OpSel & ~3) 4047 return false; 4048 } 4049 return true; 4050 } 4051 4052 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4053 const OperandVector &Operands) { 4054 const unsigned Opc = Inst.getOpcode(); 4055 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4056 if (DppCtrlIdx < 0) 4057 return true; 4058 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4059 4060 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4061 // DPP64 is supported for row_newbcast only. 4062 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4063 if (Src0Idx >= 0 && 4064 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4065 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4066 Error(S, "64 bit dpp only supports row_newbcast"); 4067 return false; 4068 } 4069 } 4070 4071 return true; 4072 } 4073 4074 // Check if VCC register matches wavefront size 4075 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4076 auto FB = getFeatureBits(); 4077 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4078 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4079 } 4080 4081 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4082 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4083 const OperandVector &Operands) { 4084 unsigned Opcode = Inst.getOpcode(); 4085 const MCInstrDesc &Desc = MII.get(Opcode); 4086 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4087 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4088 ImmIdx == -1) 4089 return true; 4090 4091 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4092 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4093 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4094 4095 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4096 4097 unsigned NumExprs = 0; 4098 unsigned NumLiterals = 0; 4099 uint32_t LiteralValue; 4100 4101 for (int OpIdx : OpIndices) { 4102 if (OpIdx == -1) 4103 continue; 4104 4105 const MCOperand &MO = Inst.getOperand(OpIdx); 4106 if (!MO.isImm() && !MO.isExpr()) 4107 continue; 4108 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4109 continue; 4110 4111 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4112 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4113 Error(getConstLoc(Operands), 4114 "inline constants are not allowed for this operand"); 4115 return false; 4116 } 4117 4118 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4119 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4120 if (NumLiterals == 0 || LiteralValue != Value) { 4121 LiteralValue = Value; 4122 ++NumLiterals; 4123 } 4124 } else if (MO.isExpr()) { 4125 ++NumExprs; 4126 } 4127 } 4128 NumLiterals += NumExprs; 4129 4130 if (!NumLiterals) 4131 return true; 4132 4133 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4134 Error(getLitLoc(Operands), "literal operands are not supported"); 4135 return false; 4136 } 4137 4138 if (NumLiterals > 1) { 4139 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4140 return false; 4141 } 4142 4143 return true; 4144 } 4145 4146 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4147 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4148 const MCRegisterInfo *MRI) { 4149 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4150 if (OpIdx < 0) 4151 return -1; 4152 4153 const MCOperand &Op = Inst.getOperand(OpIdx); 4154 if (!Op.isReg()) 4155 return -1; 4156 4157 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4158 auto Reg = Sub ? Sub : Op.getReg(); 4159 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4160 return AGPR32.contains(Reg) ? 1 : 0; 4161 } 4162 4163 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4164 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4165 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4166 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4167 SIInstrFlags::DS)) == 0) 4168 return true; 4169 4170 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4171 : AMDGPU::OpName::vdata; 4172 4173 const MCRegisterInfo *MRI = getMRI(); 4174 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4175 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4176 4177 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4178 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4179 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4180 return false; 4181 } 4182 4183 auto FB = getFeatureBits(); 4184 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4185 if (DataAreg < 0 || DstAreg < 0) 4186 return true; 4187 return DstAreg == DataAreg; 4188 } 4189 4190 return DstAreg < 1 && DataAreg < 1; 4191 } 4192 4193 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4194 auto FB = getFeatureBits(); 4195 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4196 return true; 4197 4198 const MCRegisterInfo *MRI = getMRI(); 4199 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4200 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4201 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4202 const MCOperand &Op = Inst.getOperand(I); 4203 if (!Op.isReg()) 4204 continue; 4205 4206 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4207 if (!Sub) 4208 continue; 4209 4210 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4211 return false; 4212 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4213 return false; 4214 } 4215 4216 return true; 4217 } 4218 4219 // gfx90a has an undocumented limitation: 4220 // DS_GWS opcodes must use even aligned registers. 4221 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4222 const OperandVector &Operands) { 4223 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4224 return true; 4225 4226 int Opc = Inst.getOpcode(); 4227 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4228 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4229 return true; 4230 4231 const MCRegisterInfo *MRI = getMRI(); 4232 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4233 int Data0Pos = 4234 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4235 assert(Data0Pos != -1); 4236 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4237 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4238 if (RegIdx & 1) { 4239 SMLoc RegLoc = getRegLoc(Reg, Operands); 4240 Error(RegLoc, "vgpr must be even aligned"); 4241 return false; 4242 } 4243 4244 return true; 4245 } 4246 4247 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4248 const OperandVector &Operands, 4249 const SMLoc &IDLoc) { 4250 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4251 AMDGPU::OpName::cpol); 4252 if (CPolPos == -1) 4253 return true; 4254 4255 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4256 4257 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4258 if ((TSFlags & (SIInstrFlags::SMRD)) && 4259 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4260 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4261 return false; 4262 } 4263 4264 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4265 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4266 StringRef CStr(S.getPointer()); 4267 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4268 Error(S, "scc is not supported on this GPU"); 4269 return false; 4270 } 4271 4272 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4273 return true; 4274 4275 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4276 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4277 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4278 : "instruction must use glc"); 4279 return false; 4280 } 4281 } else { 4282 if (CPol & CPol::GLC) { 4283 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4284 StringRef CStr(S.getPointer()); 4285 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4286 Error(S, isGFX940() ? "instruction must not use sc0" 4287 : "instruction must not use glc"); 4288 return false; 4289 } 4290 } 4291 4292 return true; 4293 } 4294 4295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4296 const SMLoc &IDLoc, 4297 const OperandVector &Operands) { 4298 if (auto ErrMsg = validateLdsDirect(Inst)) { 4299 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4300 return false; 4301 } 4302 if (!validateSOPLiteral(Inst)) { 4303 Error(getLitLoc(Operands), 4304 "only one literal operand is allowed"); 4305 return false; 4306 } 4307 if (!validateVOPLiteral(Inst, Operands)) { 4308 return false; 4309 } 4310 if (!validateConstantBusLimitations(Inst, Operands)) { 4311 return false; 4312 } 4313 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4314 return false; 4315 } 4316 if (!validateIntClampSupported(Inst)) { 4317 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4318 "integer clamping is not supported on this GPU"); 4319 return false; 4320 } 4321 if (!validateOpSel(Inst)) { 4322 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4323 "invalid op_sel operand"); 4324 return false; 4325 } 4326 if (!validateDPP(Inst, Operands)) { 4327 return false; 4328 } 4329 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4330 if (!validateMIMGD16(Inst)) { 4331 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4332 "d16 modifier is not supported on this GPU"); 4333 return false; 4334 } 4335 if (!validateMIMGDim(Inst)) { 4336 Error(IDLoc, "dim modifier is required on this GPU"); 4337 return false; 4338 } 4339 if (!validateMIMGMSAA(Inst)) { 4340 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4341 "invalid dim; must be MSAA type"); 4342 return false; 4343 } 4344 if (!validateMIMGDataSize(Inst)) { 4345 Error(IDLoc, 4346 "image data size does not match dmask and tfe"); 4347 return false; 4348 } 4349 if (!validateMIMGAddrSize(Inst)) { 4350 Error(IDLoc, 4351 "image address size does not match dim and a16"); 4352 return false; 4353 } 4354 if (!validateMIMGAtomicDMask(Inst)) { 4355 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4356 "invalid atomic image dmask"); 4357 return false; 4358 } 4359 if (!validateMIMGGatherDMask(Inst)) { 4360 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4361 "invalid image_gather dmask: only one bit must be set"); 4362 return false; 4363 } 4364 if (!validateMovrels(Inst, Operands)) { 4365 return false; 4366 } 4367 if (!validateFlatOffset(Inst, Operands)) { 4368 return false; 4369 } 4370 if (!validateSMEMOffset(Inst, Operands)) { 4371 return false; 4372 } 4373 if (!validateMAIAccWrite(Inst, Operands)) { 4374 return false; 4375 } 4376 if (!validateMFMA(Inst, Operands)) { 4377 return false; 4378 } 4379 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4380 return false; 4381 } 4382 4383 if (!validateAGPRLdSt(Inst)) { 4384 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4385 ? "invalid register class: data and dst should be all VGPR or AGPR" 4386 : "invalid register class: agpr loads and stores not supported on this GPU" 4387 ); 4388 return false; 4389 } 4390 if (!validateVGPRAlign(Inst)) { 4391 Error(IDLoc, 4392 "invalid register class: vgpr tuples must be 64 bit aligned"); 4393 return false; 4394 } 4395 if (!validateGWS(Inst, Operands)) { 4396 return false; 4397 } 4398 4399 if (!validateDivScale(Inst)) { 4400 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4401 return false; 4402 } 4403 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4404 return false; 4405 } 4406 4407 return true; 4408 } 4409 4410 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4411 const FeatureBitset &FBS, 4412 unsigned VariantID = 0); 4413 4414 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4415 const FeatureBitset &AvailableFeatures, 4416 unsigned VariantID); 4417 4418 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4419 const FeatureBitset &FBS) { 4420 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4421 } 4422 4423 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4424 const FeatureBitset &FBS, 4425 ArrayRef<unsigned> Variants) { 4426 for (auto Variant : Variants) { 4427 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4428 return true; 4429 } 4430 4431 return false; 4432 } 4433 4434 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4435 const SMLoc &IDLoc) { 4436 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4437 4438 // Check if requested instruction variant is supported. 4439 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4440 return false; 4441 4442 // This instruction is not supported. 4443 // Clear any other pending errors because they are no longer relevant. 4444 getParser().clearPendingErrors(); 4445 4446 // Requested instruction variant is not supported. 4447 // Check if any other variants are supported. 4448 StringRef VariantName = getMatchedVariantName(); 4449 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4450 return Error(IDLoc, 4451 Twine(VariantName, 4452 " variant of this instruction is not supported")); 4453 } 4454 4455 // Finally check if this instruction is supported on any other GPU. 4456 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4457 return Error(IDLoc, "instruction not supported on this GPU"); 4458 } 4459 4460 // Instruction not supported on any GPU. Probably a typo. 4461 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4462 return Error(IDLoc, "invalid instruction" + Suggestion); 4463 } 4464 4465 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4466 OperandVector &Operands, 4467 MCStreamer &Out, 4468 uint64_t &ErrorInfo, 4469 bool MatchingInlineAsm) { 4470 MCInst Inst; 4471 unsigned Result = Match_Success; 4472 for (auto Variant : getMatchedVariants()) { 4473 uint64_t EI; 4474 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4475 Variant); 4476 // We order match statuses from least to most specific. We use most specific 4477 // status as resulting 4478 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4479 if ((R == Match_Success) || 4480 (R == Match_PreferE32) || 4481 (R == Match_MissingFeature && Result != Match_PreferE32) || 4482 (R == Match_InvalidOperand && Result != Match_MissingFeature 4483 && Result != Match_PreferE32) || 4484 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4485 && Result != Match_MissingFeature 4486 && Result != Match_PreferE32)) { 4487 Result = R; 4488 ErrorInfo = EI; 4489 } 4490 if (R == Match_Success) 4491 break; 4492 } 4493 4494 if (Result == Match_Success) { 4495 if (!validateInstruction(Inst, IDLoc, Operands)) { 4496 return true; 4497 } 4498 Inst.setLoc(IDLoc); 4499 Out.emitInstruction(Inst, getSTI()); 4500 return false; 4501 } 4502 4503 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4504 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4505 return true; 4506 } 4507 4508 switch (Result) { 4509 default: break; 4510 case Match_MissingFeature: 4511 // It has been verified that the specified instruction 4512 // mnemonic is valid. A match was found but it requires 4513 // features which are not supported on this GPU. 4514 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4515 4516 case Match_InvalidOperand: { 4517 SMLoc ErrorLoc = IDLoc; 4518 if (ErrorInfo != ~0ULL) { 4519 if (ErrorInfo >= Operands.size()) { 4520 return Error(IDLoc, "too few operands for instruction"); 4521 } 4522 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4523 if (ErrorLoc == SMLoc()) 4524 ErrorLoc = IDLoc; 4525 } 4526 return Error(ErrorLoc, "invalid operand for instruction"); 4527 } 4528 4529 case Match_PreferE32: 4530 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4531 "should be encoded as e32"); 4532 case Match_MnemonicFail: 4533 llvm_unreachable("Invalid instructions should have been handled already"); 4534 } 4535 llvm_unreachable("Implement any new match types added!"); 4536 } 4537 4538 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4539 int64_t Tmp = -1; 4540 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4541 return true; 4542 } 4543 if (getParser().parseAbsoluteExpression(Tmp)) { 4544 return true; 4545 } 4546 Ret = static_cast<uint32_t>(Tmp); 4547 return false; 4548 } 4549 4550 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4551 uint32_t &Minor) { 4552 if (ParseAsAbsoluteExpression(Major)) 4553 return TokError("invalid major version"); 4554 4555 if (!trySkipToken(AsmToken::Comma)) 4556 return TokError("minor version number required, comma expected"); 4557 4558 if (ParseAsAbsoluteExpression(Minor)) 4559 return TokError("invalid minor version"); 4560 4561 return false; 4562 } 4563 4564 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4565 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4566 return TokError("directive only supported for amdgcn architecture"); 4567 4568 std::string TargetIDDirective; 4569 SMLoc TargetStart = getTok().getLoc(); 4570 if (getParser().parseEscapedString(TargetIDDirective)) 4571 return true; 4572 4573 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4574 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4575 return getParser().Error(TargetRange.Start, 4576 (Twine(".amdgcn_target directive's target id ") + 4577 Twine(TargetIDDirective) + 4578 Twine(" does not match the specified target id ") + 4579 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4580 4581 return false; 4582 } 4583 4584 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4585 return Error(Range.Start, "value out of range", Range); 4586 } 4587 4588 bool AMDGPUAsmParser::calculateGPRBlocks( 4589 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4590 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4591 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4592 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4593 // TODO(scott.linder): These calculations are duplicated from 4594 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4595 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4596 4597 unsigned NumVGPRs = NextFreeVGPR; 4598 unsigned NumSGPRs = NextFreeSGPR; 4599 4600 if (Version.Major >= 10) 4601 NumSGPRs = 0; 4602 else { 4603 unsigned MaxAddressableNumSGPRs = 4604 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4605 4606 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4607 NumSGPRs > MaxAddressableNumSGPRs) 4608 return OutOfRangeError(SGPRRange); 4609 4610 NumSGPRs += 4611 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4612 4613 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4614 NumSGPRs > MaxAddressableNumSGPRs) 4615 return OutOfRangeError(SGPRRange); 4616 4617 if (Features.test(FeatureSGPRInitBug)) 4618 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4619 } 4620 4621 VGPRBlocks = 4622 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4623 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4624 4625 return false; 4626 } 4627 4628 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4629 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4630 return TokError("directive only supported for amdgcn architecture"); 4631 4632 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4633 return TokError("directive only supported for amdhsa OS"); 4634 4635 StringRef KernelName; 4636 if (getParser().parseIdentifier(KernelName)) 4637 return true; 4638 4639 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4640 4641 StringSet<> Seen; 4642 4643 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4644 4645 SMRange VGPRRange; 4646 uint64_t NextFreeVGPR = 0; 4647 uint64_t AccumOffset = 0; 4648 uint64_t SharedVGPRCount = 0; 4649 SMRange SGPRRange; 4650 uint64_t NextFreeSGPR = 0; 4651 4652 // Count the number of user SGPRs implied from the enabled feature bits. 4653 unsigned ImpliedUserSGPRCount = 0; 4654 4655 // Track if the asm explicitly contains the directive for the user SGPR 4656 // count. 4657 Optional<unsigned> ExplicitUserSGPRCount; 4658 bool ReserveVCC = true; 4659 bool ReserveFlatScr = true; 4660 Optional<bool> EnableWavefrontSize32; 4661 4662 while (true) { 4663 while (trySkipToken(AsmToken::EndOfStatement)); 4664 4665 StringRef ID; 4666 SMRange IDRange = getTok().getLocRange(); 4667 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4668 return true; 4669 4670 if (ID == ".end_amdhsa_kernel") 4671 break; 4672 4673 if (Seen.find(ID) != Seen.end()) 4674 return TokError(".amdhsa_ directives cannot be repeated"); 4675 Seen.insert(ID); 4676 4677 SMLoc ValStart = getLoc(); 4678 int64_t IVal; 4679 if (getParser().parseAbsoluteExpression(IVal)) 4680 return true; 4681 SMLoc ValEnd = getLoc(); 4682 SMRange ValRange = SMRange(ValStart, ValEnd); 4683 4684 if (IVal < 0) 4685 return OutOfRangeError(ValRange); 4686 4687 uint64_t Val = IVal; 4688 4689 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4690 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4691 return OutOfRangeError(RANGE); \ 4692 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4693 4694 if (ID == ".amdhsa_group_segment_fixed_size") { 4695 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4696 return OutOfRangeError(ValRange); 4697 KD.group_segment_fixed_size = Val; 4698 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4699 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4700 return OutOfRangeError(ValRange); 4701 KD.private_segment_fixed_size = Val; 4702 } else if (ID == ".amdhsa_kernarg_size") { 4703 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4704 return OutOfRangeError(ValRange); 4705 KD.kernarg_size = Val; 4706 } else if (ID == ".amdhsa_user_sgpr_count") { 4707 ExplicitUserSGPRCount = Val; 4708 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4709 if (hasArchitectedFlatScratch()) 4710 return Error(IDRange.Start, 4711 "directive is not supported with architected flat scratch", 4712 IDRange); 4713 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4714 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4715 Val, ValRange); 4716 if (Val) 4717 ImpliedUserSGPRCount += 4; 4718 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4719 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4720 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4721 ValRange); 4722 if (Val) 4723 ImpliedUserSGPRCount += 2; 4724 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4725 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4726 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4727 ValRange); 4728 if (Val) 4729 ImpliedUserSGPRCount += 2; 4730 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4731 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4732 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4733 Val, ValRange); 4734 if (Val) 4735 ImpliedUserSGPRCount += 2; 4736 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4737 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4738 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4739 ValRange); 4740 if (Val) 4741 ImpliedUserSGPRCount += 2; 4742 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4743 if (hasArchitectedFlatScratch()) 4744 return Error(IDRange.Start, 4745 "directive is not supported with architected flat scratch", 4746 IDRange); 4747 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4749 ValRange); 4750 if (Val) 4751 ImpliedUserSGPRCount += 2; 4752 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4753 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4754 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4755 Val, ValRange); 4756 if (Val) 4757 ImpliedUserSGPRCount += 1; 4758 } else if (ID == ".amdhsa_wavefront_size32") { 4759 if (IVersion.Major < 10) 4760 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4761 EnableWavefrontSize32 = Val; 4762 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4763 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4764 Val, ValRange); 4765 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4766 if (hasArchitectedFlatScratch()) 4767 return Error(IDRange.Start, 4768 "directive is not supported with architected flat scratch", 4769 IDRange); 4770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4771 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4772 } else if (ID == ".amdhsa_enable_private_segment") { 4773 if (!hasArchitectedFlatScratch()) 4774 return Error( 4775 IDRange.Start, 4776 "directive is not supported without architected flat scratch", 4777 IDRange); 4778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4779 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4780 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4782 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4783 ValRange); 4784 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4786 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4787 ValRange); 4788 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4790 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4791 ValRange); 4792 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4793 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4794 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4795 ValRange); 4796 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4798 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4799 ValRange); 4800 } else if (ID == ".amdhsa_next_free_vgpr") { 4801 VGPRRange = ValRange; 4802 NextFreeVGPR = Val; 4803 } else if (ID == ".amdhsa_next_free_sgpr") { 4804 SGPRRange = ValRange; 4805 NextFreeSGPR = Val; 4806 } else if (ID == ".amdhsa_accum_offset") { 4807 if (!isGFX90A()) 4808 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4809 AccumOffset = Val; 4810 } else if (ID == ".amdhsa_reserve_vcc") { 4811 if (!isUInt<1>(Val)) 4812 return OutOfRangeError(ValRange); 4813 ReserveVCC = Val; 4814 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4815 if (IVersion.Major < 7) 4816 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4817 if (hasArchitectedFlatScratch()) 4818 return Error(IDRange.Start, 4819 "directive is not supported with architected flat scratch", 4820 IDRange); 4821 if (!isUInt<1>(Val)) 4822 return OutOfRangeError(ValRange); 4823 ReserveFlatScr = Val; 4824 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4825 if (IVersion.Major < 8) 4826 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4827 if (!isUInt<1>(Val)) 4828 return OutOfRangeError(ValRange); 4829 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4830 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4831 IDRange); 4832 } else if (ID == ".amdhsa_float_round_mode_32") { 4833 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4834 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4835 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4837 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4838 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4839 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4840 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4841 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4843 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4844 ValRange); 4845 } else if (ID == ".amdhsa_dx10_clamp") { 4846 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4847 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4848 } else if (ID == ".amdhsa_ieee_mode") { 4849 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4850 Val, ValRange); 4851 } else if (ID == ".amdhsa_fp16_overflow") { 4852 if (IVersion.Major < 9) 4853 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4854 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4855 ValRange); 4856 } else if (ID == ".amdhsa_tg_split") { 4857 if (!isGFX90A()) 4858 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4860 ValRange); 4861 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4862 if (IVersion.Major < 10) 4863 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4865 ValRange); 4866 } else if (ID == ".amdhsa_memory_ordered") { 4867 if (IVersion.Major < 10) 4868 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4869 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4870 ValRange); 4871 } else if (ID == ".amdhsa_forward_progress") { 4872 if (IVersion.Major < 10) 4873 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4874 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4875 ValRange); 4876 } else if (ID == ".amdhsa_shared_vgpr_count") { 4877 if (IVersion.Major < 10) 4878 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4879 SharedVGPRCount = Val; 4880 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4881 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4882 ValRange); 4883 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4884 PARSE_BITS_ENTRY( 4885 KD.compute_pgm_rsrc2, 4886 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4887 ValRange); 4888 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4889 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4890 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4891 Val, ValRange); 4892 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4893 PARSE_BITS_ENTRY( 4894 KD.compute_pgm_rsrc2, 4895 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4896 ValRange); 4897 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4899 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4900 Val, ValRange); 4901 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4902 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4903 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4904 Val, ValRange); 4905 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4906 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4907 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4908 Val, ValRange); 4909 } else if (ID == ".amdhsa_exception_int_div_zero") { 4910 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4911 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4912 Val, ValRange); 4913 } else { 4914 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4915 } 4916 4917 #undef PARSE_BITS_ENTRY 4918 } 4919 4920 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4921 return TokError(".amdhsa_next_free_vgpr directive is required"); 4922 4923 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4924 return TokError(".amdhsa_next_free_sgpr directive is required"); 4925 4926 unsigned VGPRBlocks; 4927 unsigned SGPRBlocks; 4928 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4929 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4930 EnableWavefrontSize32, NextFreeVGPR, 4931 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4932 SGPRBlocks)) 4933 return true; 4934 4935 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4936 VGPRBlocks)) 4937 return OutOfRangeError(VGPRRange); 4938 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4939 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4940 4941 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4942 SGPRBlocks)) 4943 return OutOfRangeError(SGPRRange); 4944 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4945 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4946 SGPRBlocks); 4947 4948 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4949 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4950 "enabled user SGPRs"); 4951 4952 unsigned UserSGPRCount = 4953 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4954 4955 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4956 return TokError("too many user SGPRs enabled"); 4957 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4958 UserSGPRCount); 4959 4960 if (isGFX90A()) { 4961 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4962 return TokError(".amdhsa_accum_offset directive is required"); 4963 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4964 return TokError("accum_offset should be in range [4..256] in " 4965 "increments of 4"); 4966 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4967 return TokError("accum_offset exceeds total VGPR allocation"); 4968 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4969 (AccumOffset / 4 - 1)); 4970 } 4971 4972 if (IVersion.Major == 10) { 4973 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 4974 if (SharedVGPRCount && EnableWavefrontSize32) { 4975 return TokError("shared_vgpr_count directive not valid on " 4976 "wavefront size 32"); 4977 } 4978 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 4979 return TokError("shared_vgpr_count*2 + " 4980 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 4981 "exceed 63\n"); 4982 } 4983 } 4984 4985 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4986 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4987 ReserveFlatScr); 4988 return false; 4989 } 4990 4991 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4992 uint32_t Major; 4993 uint32_t Minor; 4994 4995 if (ParseDirectiveMajorMinor(Major, Minor)) 4996 return true; 4997 4998 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4999 return false; 5000 } 5001 5002 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5003 uint32_t Major; 5004 uint32_t Minor; 5005 uint32_t Stepping; 5006 StringRef VendorName; 5007 StringRef ArchName; 5008 5009 // If this directive has no arguments, then use the ISA version for the 5010 // targeted GPU. 5011 if (isToken(AsmToken::EndOfStatement)) { 5012 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5013 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5014 ISA.Stepping, 5015 "AMD", "AMDGPU"); 5016 return false; 5017 } 5018 5019 if (ParseDirectiveMajorMinor(Major, Minor)) 5020 return true; 5021 5022 if (!trySkipToken(AsmToken::Comma)) 5023 return TokError("stepping version number required, comma expected"); 5024 5025 if (ParseAsAbsoluteExpression(Stepping)) 5026 return TokError("invalid stepping version"); 5027 5028 if (!trySkipToken(AsmToken::Comma)) 5029 return TokError("vendor name required, comma expected"); 5030 5031 if (!parseString(VendorName, "invalid vendor name")) 5032 return true; 5033 5034 if (!trySkipToken(AsmToken::Comma)) 5035 return TokError("arch name required, comma expected"); 5036 5037 if (!parseString(ArchName, "invalid arch name")) 5038 return true; 5039 5040 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5041 VendorName, ArchName); 5042 return false; 5043 } 5044 5045 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5046 amd_kernel_code_t &Header) { 5047 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5048 // assembly for backwards compatibility. 5049 if (ID == "max_scratch_backing_memory_byte_size") { 5050 Parser.eatToEndOfStatement(); 5051 return false; 5052 } 5053 5054 SmallString<40> ErrStr; 5055 raw_svector_ostream Err(ErrStr); 5056 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5057 return TokError(Err.str()); 5058 } 5059 Lex(); 5060 5061 if (ID == "enable_wavefront_size32") { 5062 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5063 if (!isGFX10Plus()) 5064 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5065 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5066 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5067 } else { 5068 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5069 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5070 } 5071 } 5072 5073 if (ID == "wavefront_size") { 5074 if (Header.wavefront_size == 5) { 5075 if (!isGFX10Plus()) 5076 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5077 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5078 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5079 } else if (Header.wavefront_size == 6) { 5080 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5081 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5082 } 5083 } 5084 5085 if (ID == "enable_wgp_mode") { 5086 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5087 !isGFX10Plus()) 5088 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5089 } 5090 5091 if (ID == "enable_mem_ordered") { 5092 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5093 !isGFX10Plus()) 5094 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5095 } 5096 5097 if (ID == "enable_fwd_progress") { 5098 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5099 !isGFX10Plus()) 5100 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5101 } 5102 5103 return false; 5104 } 5105 5106 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5107 amd_kernel_code_t Header; 5108 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5109 5110 while (true) { 5111 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5112 // will set the current token to EndOfStatement. 5113 while(trySkipToken(AsmToken::EndOfStatement)); 5114 5115 StringRef ID; 5116 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5117 return true; 5118 5119 if (ID == ".end_amd_kernel_code_t") 5120 break; 5121 5122 if (ParseAMDKernelCodeTValue(ID, Header)) 5123 return true; 5124 } 5125 5126 getTargetStreamer().EmitAMDKernelCodeT(Header); 5127 5128 return false; 5129 } 5130 5131 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5132 StringRef KernelName; 5133 if (!parseId(KernelName, "expected symbol name")) 5134 return true; 5135 5136 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5137 ELF::STT_AMDGPU_HSA_KERNEL); 5138 5139 KernelScope.initialize(getContext()); 5140 return false; 5141 } 5142 5143 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5144 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5145 return Error(getLoc(), 5146 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5147 "architectures"); 5148 } 5149 5150 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5151 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5152 return Error(getParser().getTok().getLoc(), "target id must match options"); 5153 5154 getTargetStreamer().EmitISAVersion(); 5155 Lex(); 5156 5157 return false; 5158 } 5159 5160 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5161 const char *AssemblerDirectiveBegin; 5162 const char *AssemblerDirectiveEnd; 5163 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5164 isHsaAbiVersion3AndAbove(&getSTI()) 5165 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5166 HSAMD::V3::AssemblerDirectiveEnd) 5167 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5168 HSAMD::AssemblerDirectiveEnd); 5169 5170 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5171 return Error(getLoc(), 5172 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5173 "not available on non-amdhsa OSes")).str()); 5174 } 5175 5176 std::string HSAMetadataString; 5177 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5178 HSAMetadataString)) 5179 return true; 5180 5181 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5182 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5183 return Error(getLoc(), "invalid HSA metadata"); 5184 } else { 5185 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5186 return Error(getLoc(), "invalid HSA metadata"); 5187 } 5188 5189 return false; 5190 } 5191 5192 /// Common code to parse out a block of text (typically YAML) between start and 5193 /// end directives. 5194 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5195 const char *AssemblerDirectiveEnd, 5196 std::string &CollectString) { 5197 5198 raw_string_ostream CollectStream(CollectString); 5199 5200 getLexer().setSkipSpace(false); 5201 5202 bool FoundEnd = false; 5203 while (!isToken(AsmToken::Eof)) { 5204 while (isToken(AsmToken::Space)) { 5205 CollectStream << getTokenStr(); 5206 Lex(); 5207 } 5208 5209 if (trySkipId(AssemblerDirectiveEnd)) { 5210 FoundEnd = true; 5211 break; 5212 } 5213 5214 CollectStream << Parser.parseStringToEndOfStatement() 5215 << getContext().getAsmInfo()->getSeparatorString(); 5216 5217 Parser.eatToEndOfStatement(); 5218 } 5219 5220 getLexer().setSkipSpace(true); 5221 5222 if (isToken(AsmToken::Eof) && !FoundEnd) { 5223 return TokError(Twine("expected directive ") + 5224 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5225 } 5226 5227 CollectStream.flush(); 5228 return false; 5229 } 5230 5231 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5232 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5233 std::string String; 5234 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5235 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5236 return true; 5237 5238 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5239 if (!PALMetadata->setFromString(String)) 5240 return Error(getLoc(), "invalid PAL metadata"); 5241 return false; 5242 } 5243 5244 /// Parse the assembler directive for old linear-format PAL metadata. 5245 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5246 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5247 return Error(getLoc(), 5248 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5249 "not available on non-amdpal OSes")).str()); 5250 } 5251 5252 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5253 PALMetadata->setLegacy(); 5254 for (;;) { 5255 uint32_t Key, Value; 5256 if (ParseAsAbsoluteExpression(Key)) { 5257 return TokError(Twine("invalid value in ") + 5258 Twine(PALMD::AssemblerDirective)); 5259 } 5260 if (!trySkipToken(AsmToken::Comma)) { 5261 return TokError(Twine("expected an even number of values in ") + 5262 Twine(PALMD::AssemblerDirective)); 5263 } 5264 if (ParseAsAbsoluteExpression(Value)) { 5265 return TokError(Twine("invalid value in ") + 5266 Twine(PALMD::AssemblerDirective)); 5267 } 5268 PALMetadata->setRegister(Key, Value); 5269 if (!trySkipToken(AsmToken::Comma)) 5270 break; 5271 } 5272 return false; 5273 } 5274 5275 /// ParseDirectiveAMDGPULDS 5276 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5277 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5278 if (getParser().checkForValidSection()) 5279 return true; 5280 5281 StringRef Name; 5282 SMLoc NameLoc = getLoc(); 5283 if (getParser().parseIdentifier(Name)) 5284 return TokError("expected identifier in directive"); 5285 5286 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5287 if (parseToken(AsmToken::Comma, "expected ','")) 5288 return true; 5289 5290 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5291 5292 int64_t Size; 5293 SMLoc SizeLoc = getLoc(); 5294 if (getParser().parseAbsoluteExpression(Size)) 5295 return true; 5296 if (Size < 0) 5297 return Error(SizeLoc, "size must be non-negative"); 5298 if (Size > LocalMemorySize) 5299 return Error(SizeLoc, "size is too large"); 5300 5301 int64_t Alignment = 4; 5302 if (trySkipToken(AsmToken::Comma)) { 5303 SMLoc AlignLoc = getLoc(); 5304 if (getParser().parseAbsoluteExpression(Alignment)) 5305 return true; 5306 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5307 return Error(AlignLoc, "alignment must be a power of two"); 5308 5309 // Alignment larger than the size of LDS is possible in theory, as long 5310 // as the linker manages to place to symbol at address 0, but we do want 5311 // to make sure the alignment fits nicely into a 32-bit integer. 5312 if (Alignment >= 1u << 31) 5313 return Error(AlignLoc, "alignment is too large"); 5314 } 5315 5316 if (parseToken(AsmToken::EndOfStatement, 5317 "unexpected token in '.amdgpu_lds' directive")) 5318 return true; 5319 5320 Symbol->redefineIfPossible(); 5321 if (!Symbol->isUndefined()) 5322 return Error(NameLoc, "invalid symbol redefinition"); 5323 5324 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5325 return false; 5326 } 5327 5328 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5329 StringRef IDVal = DirectiveID.getString(); 5330 5331 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5332 if (IDVal == ".amdhsa_kernel") 5333 return ParseDirectiveAMDHSAKernel(); 5334 5335 // TODO: Restructure/combine with PAL metadata directive. 5336 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5337 return ParseDirectiveHSAMetadata(); 5338 } else { 5339 if (IDVal == ".hsa_code_object_version") 5340 return ParseDirectiveHSACodeObjectVersion(); 5341 5342 if (IDVal == ".hsa_code_object_isa") 5343 return ParseDirectiveHSACodeObjectISA(); 5344 5345 if (IDVal == ".amd_kernel_code_t") 5346 return ParseDirectiveAMDKernelCodeT(); 5347 5348 if (IDVal == ".amdgpu_hsa_kernel") 5349 return ParseDirectiveAMDGPUHsaKernel(); 5350 5351 if (IDVal == ".amd_amdgpu_isa") 5352 return ParseDirectiveISAVersion(); 5353 5354 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5355 return ParseDirectiveHSAMetadata(); 5356 } 5357 5358 if (IDVal == ".amdgcn_target") 5359 return ParseDirectiveAMDGCNTarget(); 5360 5361 if (IDVal == ".amdgpu_lds") 5362 return ParseDirectiveAMDGPULDS(); 5363 5364 if (IDVal == PALMD::AssemblerDirectiveBegin) 5365 return ParseDirectivePALMetadataBegin(); 5366 5367 if (IDVal == PALMD::AssemblerDirective) 5368 return ParseDirectivePALMetadata(); 5369 5370 return true; 5371 } 5372 5373 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5374 unsigned RegNo) { 5375 5376 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5377 return isGFX9Plus(); 5378 5379 // GFX10 has 2 more SGPRs 104 and 105. 5380 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5381 return hasSGPR104_SGPR105(); 5382 5383 switch (RegNo) { 5384 case AMDGPU::SRC_SHARED_BASE: 5385 case AMDGPU::SRC_SHARED_LIMIT: 5386 case AMDGPU::SRC_PRIVATE_BASE: 5387 case AMDGPU::SRC_PRIVATE_LIMIT: 5388 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5389 return isGFX9Plus(); 5390 case AMDGPU::TBA: 5391 case AMDGPU::TBA_LO: 5392 case AMDGPU::TBA_HI: 5393 case AMDGPU::TMA: 5394 case AMDGPU::TMA_LO: 5395 case AMDGPU::TMA_HI: 5396 return !isGFX9Plus(); 5397 case AMDGPU::XNACK_MASK: 5398 case AMDGPU::XNACK_MASK_LO: 5399 case AMDGPU::XNACK_MASK_HI: 5400 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5401 case AMDGPU::SGPR_NULL: 5402 return isGFX10Plus(); 5403 default: 5404 break; 5405 } 5406 5407 if (isCI()) 5408 return true; 5409 5410 if (isSI() || isGFX10Plus()) { 5411 // No flat_scr on SI. 5412 // On GFX10 flat scratch is not a valid register operand and can only be 5413 // accessed with s_setreg/s_getreg. 5414 switch (RegNo) { 5415 case AMDGPU::FLAT_SCR: 5416 case AMDGPU::FLAT_SCR_LO: 5417 case AMDGPU::FLAT_SCR_HI: 5418 return false; 5419 default: 5420 return true; 5421 } 5422 } 5423 5424 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5425 // SI/CI have. 5426 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5427 return hasSGPR102_SGPR103(); 5428 5429 return true; 5430 } 5431 5432 OperandMatchResultTy 5433 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5434 OperandMode Mode) { 5435 // Try to parse with a custom parser 5436 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5437 5438 // If we successfully parsed the operand or if there as an error parsing, 5439 // we are done. 5440 // 5441 // If we are parsing after we reach EndOfStatement then this means we 5442 // are appending default values to the Operands list. This is only done 5443 // by custom parser, so we shouldn't continue on to the generic parsing. 5444 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5445 isToken(AsmToken::EndOfStatement)) 5446 return ResTy; 5447 5448 SMLoc RBraceLoc; 5449 SMLoc LBraceLoc = getLoc(); 5450 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5451 unsigned Prefix = Operands.size(); 5452 5453 for (;;) { 5454 auto Loc = getLoc(); 5455 ResTy = parseReg(Operands); 5456 if (ResTy == MatchOperand_NoMatch) 5457 Error(Loc, "expected a register"); 5458 if (ResTy != MatchOperand_Success) 5459 return MatchOperand_ParseFail; 5460 5461 RBraceLoc = getLoc(); 5462 if (trySkipToken(AsmToken::RBrac)) 5463 break; 5464 5465 if (!skipToken(AsmToken::Comma, 5466 "expected a comma or a closing square bracket")) { 5467 return MatchOperand_ParseFail; 5468 } 5469 } 5470 5471 if (Operands.size() - Prefix > 1) { 5472 Operands.insert(Operands.begin() + Prefix, 5473 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5474 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5475 } 5476 5477 return MatchOperand_Success; 5478 } 5479 5480 return parseRegOrImm(Operands); 5481 } 5482 5483 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5484 // Clear any forced encodings from the previous instruction. 5485 setForcedEncodingSize(0); 5486 setForcedDPP(false); 5487 setForcedSDWA(false); 5488 5489 if (Name.endswith("_e64")) { 5490 setForcedEncodingSize(64); 5491 return Name.substr(0, Name.size() - 4); 5492 } else if (Name.endswith("_e32")) { 5493 setForcedEncodingSize(32); 5494 return Name.substr(0, Name.size() - 4); 5495 } else if (Name.endswith("_dpp")) { 5496 setForcedDPP(true); 5497 return Name.substr(0, Name.size() - 4); 5498 } else if (Name.endswith("_sdwa")) { 5499 setForcedSDWA(true); 5500 return Name.substr(0, Name.size() - 5); 5501 } 5502 return Name; 5503 } 5504 5505 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5506 StringRef Name, 5507 SMLoc NameLoc, OperandVector &Operands) { 5508 // Add the instruction mnemonic 5509 Name = parseMnemonicSuffix(Name); 5510 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5511 5512 bool IsMIMG = Name.startswith("image_"); 5513 5514 while (!trySkipToken(AsmToken::EndOfStatement)) { 5515 OperandMode Mode = OperandMode_Default; 5516 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5517 Mode = OperandMode_NSA; 5518 CPolSeen = 0; 5519 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5520 5521 if (Res != MatchOperand_Success) { 5522 checkUnsupportedInstruction(Name, NameLoc); 5523 if (!Parser.hasPendingError()) { 5524 // FIXME: use real operand location rather than the current location. 5525 StringRef Msg = 5526 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5527 "not a valid operand."; 5528 Error(getLoc(), Msg); 5529 } 5530 while (!trySkipToken(AsmToken::EndOfStatement)) { 5531 lex(); 5532 } 5533 return true; 5534 } 5535 5536 // Eat the comma or space if there is one. 5537 trySkipToken(AsmToken::Comma); 5538 } 5539 5540 return false; 5541 } 5542 5543 //===----------------------------------------------------------------------===// 5544 // Utility functions 5545 //===----------------------------------------------------------------------===// 5546 5547 OperandMatchResultTy 5548 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5549 5550 if (!trySkipId(Prefix, AsmToken::Colon)) 5551 return MatchOperand_NoMatch; 5552 5553 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5554 } 5555 5556 OperandMatchResultTy 5557 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5558 AMDGPUOperand::ImmTy ImmTy, 5559 bool (*ConvertResult)(int64_t&)) { 5560 SMLoc S = getLoc(); 5561 int64_t Value = 0; 5562 5563 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5564 if (Res != MatchOperand_Success) 5565 return Res; 5566 5567 if (ConvertResult && !ConvertResult(Value)) { 5568 Error(S, "invalid " + StringRef(Prefix) + " value."); 5569 } 5570 5571 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5572 return MatchOperand_Success; 5573 } 5574 5575 OperandMatchResultTy 5576 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5577 OperandVector &Operands, 5578 AMDGPUOperand::ImmTy ImmTy, 5579 bool (*ConvertResult)(int64_t&)) { 5580 SMLoc S = getLoc(); 5581 if (!trySkipId(Prefix, AsmToken::Colon)) 5582 return MatchOperand_NoMatch; 5583 5584 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5585 return MatchOperand_ParseFail; 5586 5587 unsigned Val = 0; 5588 const unsigned MaxSize = 4; 5589 5590 // FIXME: How to verify the number of elements matches the number of src 5591 // operands? 5592 for (int I = 0; ; ++I) { 5593 int64_t Op; 5594 SMLoc Loc = getLoc(); 5595 if (!parseExpr(Op)) 5596 return MatchOperand_ParseFail; 5597 5598 if (Op != 0 && Op != 1) { 5599 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5600 return MatchOperand_ParseFail; 5601 } 5602 5603 Val |= (Op << I); 5604 5605 if (trySkipToken(AsmToken::RBrac)) 5606 break; 5607 5608 if (I + 1 == MaxSize) { 5609 Error(getLoc(), "expected a closing square bracket"); 5610 return MatchOperand_ParseFail; 5611 } 5612 5613 if (!skipToken(AsmToken::Comma, "expected a comma")) 5614 return MatchOperand_ParseFail; 5615 } 5616 5617 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5618 return MatchOperand_Success; 5619 } 5620 5621 OperandMatchResultTy 5622 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5623 AMDGPUOperand::ImmTy ImmTy) { 5624 int64_t Bit; 5625 SMLoc S = getLoc(); 5626 5627 if (trySkipId(Name)) { 5628 Bit = 1; 5629 } else if (trySkipId("no", Name)) { 5630 Bit = 0; 5631 } else { 5632 return MatchOperand_NoMatch; 5633 } 5634 5635 if (Name == "r128" && !hasMIMG_R128()) { 5636 Error(S, "r128 modifier is not supported on this GPU"); 5637 return MatchOperand_ParseFail; 5638 } 5639 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5640 Error(S, "a16 modifier is not supported on this GPU"); 5641 return MatchOperand_ParseFail; 5642 } 5643 5644 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5645 ImmTy = AMDGPUOperand::ImmTyR128A16; 5646 5647 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5648 return MatchOperand_Success; 5649 } 5650 5651 OperandMatchResultTy 5652 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5653 unsigned CPolOn = 0; 5654 unsigned CPolOff = 0; 5655 SMLoc S = getLoc(); 5656 5657 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5658 if (isGFX940() && !Mnemo.startswith("s_")) { 5659 if (trySkipId("sc0")) 5660 CPolOn = AMDGPU::CPol::SC0; 5661 else if (trySkipId("nosc0")) 5662 CPolOff = AMDGPU::CPol::SC0; 5663 else if (trySkipId("nt")) 5664 CPolOn = AMDGPU::CPol::NT; 5665 else if (trySkipId("nont")) 5666 CPolOff = AMDGPU::CPol::NT; 5667 else if (trySkipId("sc1")) 5668 CPolOn = AMDGPU::CPol::SC1; 5669 else if (trySkipId("nosc1")) 5670 CPolOff = AMDGPU::CPol::SC1; 5671 else 5672 return MatchOperand_NoMatch; 5673 } 5674 else if (trySkipId("glc")) 5675 CPolOn = AMDGPU::CPol::GLC; 5676 else if (trySkipId("noglc")) 5677 CPolOff = AMDGPU::CPol::GLC; 5678 else if (trySkipId("slc")) 5679 CPolOn = AMDGPU::CPol::SLC; 5680 else if (trySkipId("noslc")) 5681 CPolOff = AMDGPU::CPol::SLC; 5682 else if (trySkipId("dlc")) 5683 CPolOn = AMDGPU::CPol::DLC; 5684 else if (trySkipId("nodlc")) 5685 CPolOff = AMDGPU::CPol::DLC; 5686 else if (trySkipId("scc")) 5687 CPolOn = AMDGPU::CPol::SCC; 5688 else if (trySkipId("noscc")) 5689 CPolOff = AMDGPU::CPol::SCC; 5690 else 5691 return MatchOperand_NoMatch; 5692 5693 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5694 Error(S, "dlc modifier is not supported on this GPU"); 5695 return MatchOperand_ParseFail; 5696 } 5697 5698 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5699 Error(S, "scc modifier is not supported on this GPU"); 5700 return MatchOperand_ParseFail; 5701 } 5702 5703 if (CPolSeen & (CPolOn | CPolOff)) { 5704 Error(S, "duplicate cache policy modifier"); 5705 return MatchOperand_ParseFail; 5706 } 5707 5708 CPolSeen |= (CPolOn | CPolOff); 5709 5710 for (unsigned I = 1; I != Operands.size(); ++I) { 5711 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5712 if (Op.isCPol()) { 5713 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5714 return MatchOperand_Success; 5715 } 5716 } 5717 5718 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5719 AMDGPUOperand::ImmTyCPol)); 5720 5721 return MatchOperand_Success; 5722 } 5723 5724 static void addOptionalImmOperand( 5725 MCInst& Inst, const OperandVector& Operands, 5726 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5727 AMDGPUOperand::ImmTy ImmT, 5728 int64_t Default = 0) { 5729 auto i = OptionalIdx.find(ImmT); 5730 if (i != OptionalIdx.end()) { 5731 unsigned Idx = i->second; 5732 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5733 } else { 5734 Inst.addOperand(MCOperand::createImm(Default)); 5735 } 5736 } 5737 5738 OperandMatchResultTy 5739 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5740 StringRef &Value, 5741 SMLoc &StringLoc) { 5742 if (!trySkipId(Prefix, AsmToken::Colon)) 5743 return MatchOperand_NoMatch; 5744 5745 StringLoc = getLoc(); 5746 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5747 : MatchOperand_ParseFail; 5748 } 5749 5750 //===----------------------------------------------------------------------===// 5751 // MTBUF format 5752 //===----------------------------------------------------------------------===// 5753 5754 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5755 int64_t MaxVal, 5756 int64_t &Fmt) { 5757 int64_t Val; 5758 SMLoc Loc = getLoc(); 5759 5760 auto Res = parseIntWithPrefix(Pref, Val); 5761 if (Res == MatchOperand_ParseFail) 5762 return false; 5763 if (Res == MatchOperand_NoMatch) 5764 return true; 5765 5766 if (Val < 0 || Val > MaxVal) { 5767 Error(Loc, Twine("out of range ", StringRef(Pref))); 5768 return false; 5769 } 5770 5771 Fmt = Val; 5772 return true; 5773 } 5774 5775 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5776 // values to live in a joint format operand in the MCInst encoding. 5777 OperandMatchResultTy 5778 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5779 using namespace llvm::AMDGPU::MTBUFFormat; 5780 5781 int64_t Dfmt = DFMT_UNDEF; 5782 int64_t Nfmt = NFMT_UNDEF; 5783 5784 // dfmt and nfmt can appear in either order, and each is optional. 5785 for (int I = 0; I < 2; ++I) { 5786 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5787 return MatchOperand_ParseFail; 5788 5789 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5790 return MatchOperand_ParseFail; 5791 } 5792 // Skip optional comma between dfmt/nfmt 5793 // but guard against 2 commas following each other. 5794 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5795 !peekToken().is(AsmToken::Comma)) { 5796 trySkipToken(AsmToken::Comma); 5797 } 5798 } 5799 5800 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5801 return MatchOperand_NoMatch; 5802 5803 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5804 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5805 5806 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5807 return MatchOperand_Success; 5808 } 5809 5810 OperandMatchResultTy 5811 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5812 using namespace llvm::AMDGPU::MTBUFFormat; 5813 5814 int64_t Fmt = UFMT_UNDEF; 5815 5816 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5817 return MatchOperand_ParseFail; 5818 5819 if (Fmt == UFMT_UNDEF) 5820 return MatchOperand_NoMatch; 5821 5822 Format = Fmt; 5823 return MatchOperand_Success; 5824 } 5825 5826 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5827 int64_t &Nfmt, 5828 StringRef FormatStr, 5829 SMLoc Loc) { 5830 using namespace llvm::AMDGPU::MTBUFFormat; 5831 int64_t Format; 5832 5833 Format = getDfmt(FormatStr); 5834 if (Format != DFMT_UNDEF) { 5835 Dfmt = Format; 5836 return true; 5837 } 5838 5839 Format = getNfmt(FormatStr, getSTI()); 5840 if (Format != NFMT_UNDEF) { 5841 Nfmt = Format; 5842 return true; 5843 } 5844 5845 Error(Loc, "unsupported format"); 5846 return false; 5847 } 5848 5849 OperandMatchResultTy 5850 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5851 SMLoc FormatLoc, 5852 int64_t &Format) { 5853 using namespace llvm::AMDGPU::MTBUFFormat; 5854 5855 int64_t Dfmt = DFMT_UNDEF; 5856 int64_t Nfmt = NFMT_UNDEF; 5857 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5858 return MatchOperand_ParseFail; 5859 5860 if (trySkipToken(AsmToken::Comma)) { 5861 StringRef Str; 5862 SMLoc Loc = getLoc(); 5863 if (!parseId(Str, "expected a format string") || 5864 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5865 return MatchOperand_ParseFail; 5866 } 5867 if (Dfmt == DFMT_UNDEF) { 5868 Error(Loc, "duplicate numeric format"); 5869 return MatchOperand_ParseFail; 5870 } else if (Nfmt == NFMT_UNDEF) { 5871 Error(Loc, "duplicate data format"); 5872 return MatchOperand_ParseFail; 5873 } 5874 } 5875 5876 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5877 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5878 5879 if (isGFX10Plus()) { 5880 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5881 if (Ufmt == UFMT_UNDEF) { 5882 Error(FormatLoc, "unsupported format"); 5883 return MatchOperand_ParseFail; 5884 } 5885 Format = Ufmt; 5886 } else { 5887 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5888 } 5889 5890 return MatchOperand_Success; 5891 } 5892 5893 OperandMatchResultTy 5894 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5895 SMLoc Loc, 5896 int64_t &Format) { 5897 using namespace llvm::AMDGPU::MTBUFFormat; 5898 5899 auto Id = getUnifiedFormat(FormatStr); 5900 if (Id == UFMT_UNDEF) 5901 return MatchOperand_NoMatch; 5902 5903 if (!isGFX10Plus()) { 5904 Error(Loc, "unified format is not supported on this GPU"); 5905 return MatchOperand_ParseFail; 5906 } 5907 5908 Format = Id; 5909 return MatchOperand_Success; 5910 } 5911 5912 OperandMatchResultTy 5913 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5914 using namespace llvm::AMDGPU::MTBUFFormat; 5915 SMLoc Loc = getLoc(); 5916 5917 if (!parseExpr(Format)) 5918 return MatchOperand_ParseFail; 5919 if (!isValidFormatEncoding(Format, getSTI())) { 5920 Error(Loc, "out of range format"); 5921 return MatchOperand_ParseFail; 5922 } 5923 5924 return MatchOperand_Success; 5925 } 5926 5927 OperandMatchResultTy 5928 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5929 using namespace llvm::AMDGPU::MTBUFFormat; 5930 5931 if (!trySkipId("format", AsmToken::Colon)) 5932 return MatchOperand_NoMatch; 5933 5934 if (trySkipToken(AsmToken::LBrac)) { 5935 StringRef FormatStr; 5936 SMLoc Loc = getLoc(); 5937 if (!parseId(FormatStr, "expected a format string")) 5938 return MatchOperand_ParseFail; 5939 5940 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5941 if (Res == MatchOperand_NoMatch) 5942 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5943 if (Res != MatchOperand_Success) 5944 return Res; 5945 5946 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5947 return MatchOperand_ParseFail; 5948 5949 return MatchOperand_Success; 5950 } 5951 5952 return parseNumericFormat(Format); 5953 } 5954 5955 OperandMatchResultTy 5956 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5957 using namespace llvm::AMDGPU::MTBUFFormat; 5958 5959 int64_t Format = getDefaultFormatEncoding(getSTI()); 5960 OperandMatchResultTy Res; 5961 SMLoc Loc = getLoc(); 5962 5963 // Parse legacy format syntax. 5964 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5965 if (Res == MatchOperand_ParseFail) 5966 return Res; 5967 5968 bool FormatFound = (Res == MatchOperand_Success); 5969 5970 Operands.push_back( 5971 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5972 5973 if (FormatFound) 5974 trySkipToken(AsmToken::Comma); 5975 5976 if (isToken(AsmToken::EndOfStatement)) { 5977 // We are expecting an soffset operand, 5978 // but let matcher handle the error. 5979 return MatchOperand_Success; 5980 } 5981 5982 // Parse soffset. 5983 Res = parseRegOrImm(Operands); 5984 if (Res != MatchOperand_Success) 5985 return Res; 5986 5987 trySkipToken(AsmToken::Comma); 5988 5989 if (!FormatFound) { 5990 Res = parseSymbolicOrNumericFormat(Format); 5991 if (Res == MatchOperand_ParseFail) 5992 return Res; 5993 if (Res == MatchOperand_Success) { 5994 auto Size = Operands.size(); 5995 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5996 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5997 Op.setImm(Format); 5998 } 5999 return MatchOperand_Success; 6000 } 6001 6002 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6003 Error(getLoc(), "duplicate format"); 6004 return MatchOperand_ParseFail; 6005 } 6006 return MatchOperand_Success; 6007 } 6008 6009 //===----------------------------------------------------------------------===// 6010 // ds 6011 //===----------------------------------------------------------------------===// 6012 6013 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6014 const OperandVector &Operands) { 6015 OptionalImmIndexMap OptionalIdx; 6016 6017 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6018 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6019 6020 // Add the register arguments 6021 if (Op.isReg()) { 6022 Op.addRegOperands(Inst, 1); 6023 continue; 6024 } 6025 6026 // Handle optional arguments 6027 OptionalIdx[Op.getImmTy()] = i; 6028 } 6029 6030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6031 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6033 6034 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6035 } 6036 6037 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6038 bool IsGdsHardcoded) { 6039 OptionalImmIndexMap OptionalIdx; 6040 6041 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6042 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6043 6044 // Add the register arguments 6045 if (Op.isReg()) { 6046 Op.addRegOperands(Inst, 1); 6047 continue; 6048 } 6049 6050 if (Op.isToken() && Op.getToken() == "gds") { 6051 IsGdsHardcoded = true; 6052 continue; 6053 } 6054 6055 // Handle optional arguments 6056 OptionalIdx[Op.getImmTy()] = i; 6057 } 6058 6059 AMDGPUOperand::ImmTy OffsetType = 6060 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6061 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6062 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6063 AMDGPUOperand::ImmTyOffset; 6064 6065 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6066 6067 if (!IsGdsHardcoded) { 6068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6069 } 6070 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6071 } 6072 6073 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6074 OptionalImmIndexMap OptionalIdx; 6075 6076 unsigned OperandIdx[4]; 6077 unsigned EnMask = 0; 6078 int SrcIdx = 0; 6079 6080 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6081 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6082 6083 // Add the register arguments 6084 if (Op.isReg()) { 6085 assert(SrcIdx < 4); 6086 OperandIdx[SrcIdx] = Inst.size(); 6087 Op.addRegOperands(Inst, 1); 6088 ++SrcIdx; 6089 continue; 6090 } 6091 6092 if (Op.isOff()) { 6093 assert(SrcIdx < 4); 6094 OperandIdx[SrcIdx] = Inst.size(); 6095 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6096 ++SrcIdx; 6097 continue; 6098 } 6099 6100 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6101 Op.addImmOperands(Inst, 1); 6102 continue; 6103 } 6104 6105 if (Op.isToken() && Op.getToken() == "done") 6106 continue; 6107 6108 // Handle optional arguments 6109 OptionalIdx[Op.getImmTy()] = i; 6110 } 6111 6112 assert(SrcIdx == 4); 6113 6114 bool Compr = false; 6115 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6116 Compr = true; 6117 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6118 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6119 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6120 } 6121 6122 for (auto i = 0; i < SrcIdx; ++i) { 6123 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6124 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6125 } 6126 } 6127 6128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6130 6131 Inst.addOperand(MCOperand::createImm(EnMask)); 6132 } 6133 6134 //===----------------------------------------------------------------------===// 6135 // s_waitcnt 6136 //===----------------------------------------------------------------------===// 6137 6138 static bool 6139 encodeCnt( 6140 const AMDGPU::IsaVersion ISA, 6141 int64_t &IntVal, 6142 int64_t CntVal, 6143 bool Saturate, 6144 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6145 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6146 { 6147 bool Failed = false; 6148 6149 IntVal = encode(ISA, IntVal, CntVal); 6150 if (CntVal != decode(ISA, IntVal)) { 6151 if (Saturate) { 6152 IntVal = encode(ISA, IntVal, -1); 6153 } else { 6154 Failed = true; 6155 } 6156 } 6157 return Failed; 6158 } 6159 6160 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6161 6162 SMLoc CntLoc = getLoc(); 6163 StringRef CntName = getTokenStr(); 6164 6165 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6166 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6167 return false; 6168 6169 int64_t CntVal; 6170 SMLoc ValLoc = getLoc(); 6171 if (!parseExpr(CntVal)) 6172 return false; 6173 6174 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6175 6176 bool Failed = true; 6177 bool Sat = CntName.endswith("_sat"); 6178 6179 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6180 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6181 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6182 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6183 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6184 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6185 } else { 6186 Error(CntLoc, "invalid counter name " + CntName); 6187 return false; 6188 } 6189 6190 if (Failed) { 6191 Error(ValLoc, "too large value for " + CntName); 6192 return false; 6193 } 6194 6195 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6196 return false; 6197 6198 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6199 if (isToken(AsmToken::EndOfStatement)) { 6200 Error(getLoc(), "expected a counter name"); 6201 return false; 6202 } 6203 } 6204 6205 return true; 6206 } 6207 6208 OperandMatchResultTy 6209 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6210 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6211 int64_t Waitcnt = getWaitcntBitMask(ISA); 6212 SMLoc S = getLoc(); 6213 6214 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6215 while (!isToken(AsmToken::EndOfStatement)) { 6216 if (!parseCnt(Waitcnt)) 6217 return MatchOperand_ParseFail; 6218 } 6219 } else { 6220 if (!parseExpr(Waitcnt)) 6221 return MatchOperand_ParseFail; 6222 } 6223 6224 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6225 return MatchOperand_Success; 6226 } 6227 6228 bool 6229 AMDGPUOperand::isSWaitCnt() const { 6230 return isImm(); 6231 } 6232 6233 //===----------------------------------------------------------------------===// 6234 // hwreg 6235 //===----------------------------------------------------------------------===// 6236 6237 bool 6238 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6239 OperandInfoTy &Offset, 6240 OperandInfoTy &Width) { 6241 using namespace llvm::AMDGPU::Hwreg; 6242 6243 // The register may be specified by name or using a numeric code 6244 HwReg.Loc = getLoc(); 6245 if (isToken(AsmToken::Identifier) && 6246 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) { 6247 HwReg.IsSymbolic = true; 6248 lex(); // skip register name 6249 } else if (!parseExpr(HwReg.Id, "a register name")) { 6250 return false; 6251 } 6252 6253 if (trySkipToken(AsmToken::RParen)) 6254 return true; 6255 6256 // parse optional params 6257 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6258 return false; 6259 6260 Offset.Loc = getLoc(); 6261 if (!parseExpr(Offset.Id)) 6262 return false; 6263 6264 if (!skipToken(AsmToken::Comma, "expected a comma")) 6265 return false; 6266 6267 Width.Loc = getLoc(); 6268 return parseExpr(Width.Id) && 6269 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6270 } 6271 6272 bool 6273 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6274 const OperandInfoTy &Offset, 6275 const OperandInfoTy &Width) { 6276 6277 using namespace llvm::AMDGPU::Hwreg; 6278 6279 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6280 Error(HwReg.Loc, 6281 "specified hardware register is not supported on this GPU"); 6282 return false; 6283 } 6284 if (!isValidHwreg(HwReg.Id)) { 6285 Error(HwReg.Loc, 6286 "invalid code of hardware register: only 6-bit values are legal"); 6287 return false; 6288 } 6289 if (!isValidHwregOffset(Offset.Id)) { 6290 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6291 return false; 6292 } 6293 if (!isValidHwregWidth(Width.Id)) { 6294 Error(Width.Loc, 6295 "invalid bitfield width: only values from 1 to 32 are legal"); 6296 return false; 6297 } 6298 return true; 6299 } 6300 6301 OperandMatchResultTy 6302 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6303 using namespace llvm::AMDGPU::Hwreg; 6304 6305 int64_t ImmVal = 0; 6306 SMLoc Loc = getLoc(); 6307 6308 if (trySkipId("hwreg", AsmToken::LParen)) { 6309 OperandInfoTy HwReg(ID_UNKNOWN_); 6310 OperandInfoTy Offset(OFFSET_DEFAULT_); 6311 OperandInfoTy Width(WIDTH_DEFAULT_); 6312 if (parseHwregBody(HwReg, Offset, Width) && 6313 validateHwreg(HwReg, Offset, Width)) { 6314 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6315 } else { 6316 return MatchOperand_ParseFail; 6317 } 6318 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6319 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6320 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6321 return MatchOperand_ParseFail; 6322 } 6323 } else { 6324 return MatchOperand_ParseFail; 6325 } 6326 6327 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6328 return MatchOperand_Success; 6329 } 6330 6331 bool AMDGPUOperand::isHwreg() const { 6332 return isImmTy(ImmTyHwreg); 6333 } 6334 6335 //===----------------------------------------------------------------------===// 6336 // sendmsg 6337 //===----------------------------------------------------------------------===// 6338 6339 bool 6340 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6341 OperandInfoTy &Op, 6342 OperandInfoTy &Stream) { 6343 using namespace llvm::AMDGPU::SendMsg; 6344 6345 Msg.Loc = getLoc(); 6346 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6347 Msg.IsSymbolic = true; 6348 lex(); // skip message name 6349 } else if (!parseExpr(Msg.Id, "a message name")) { 6350 return false; 6351 } 6352 6353 if (trySkipToken(AsmToken::Comma)) { 6354 Op.IsDefined = true; 6355 Op.Loc = getLoc(); 6356 if (isToken(AsmToken::Identifier) && 6357 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6358 lex(); // skip operation name 6359 } else if (!parseExpr(Op.Id, "an operation name")) { 6360 return false; 6361 } 6362 6363 if (trySkipToken(AsmToken::Comma)) { 6364 Stream.IsDefined = true; 6365 Stream.Loc = getLoc(); 6366 if (!parseExpr(Stream.Id)) 6367 return false; 6368 } 6369 } 6370 6371 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6372 } 6373 6374 bool 6375 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6376 const OperandInfoTy &Op, 6377 const OperandInfoTy &Stream) { 6378 using namespace llvm::AMDGPU::SendMsg; 6379 6380 // Validation strictness depends on whether message is specified 6381 // in a symbolic or in a numeric form. In the latter case 6382 // only encoding possibility is checked. 6383 bool Strict = Msg.IsSymbolic; 6384 6385 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6386 Error(Msg.Loc, "invalid message id"); 6387 return false; 6388 } 6389 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6390 if (Op.IsDefined) { 6391 Error(Op.Loc, "message does not support operations"); 6392 } else { 6393 Error(Msg.Loc, "missing message operation"); 6394 } 6395 return false; 6396 } 6397 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6398 Error(Op.Loc, "invalid operation id"); 6399 return false; 6400 } 6401 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6402 Error(Stream.Loc, "message operation does not support streams"); 6403 return false; 6404 } 6405 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6406 Error(Stream.Loc, "invalid message stream id"); 6407 return false; 6408 } 6409 return true; 6410 } 6411 6412 OperandMatchResultTy 6413 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6414 using namespace llvm::AMDGPU::SendMsg; 6415 6416 int64_t ImmVal = 0; 6417 SMLoc Loc = getLoc(); 6418 6419 if (trySkipId("sendmsg", AsmToken::LParen)) { 6420 OperandInfoTy Msg(ID_UNKNOWN_); 6421 OperandInfoTy Op(OP_NONE_); 6422 OperandInfoTy Stream(STREAM_ID_NONE_); 6423 if (parseSendMsgBody(Msg, Op, Stream) && 6424 validateSendMsg(Msg, Op, Stream)) { 6425 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6426 } else { 6427 return MatchOperand_ParseFail; 6428 } 6429 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6430 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6431 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6432 return MatchOperand_ParseFail; 6433 } 6434 } else { 6435 return MatchOperand_ParseFail; 6436 } 6437 6438 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6439 return MatchOperand_Success; 6440 } 6441 6442 bool AMDGPUOperand::isSendMsg() const { 6443 return isImmTy(ImmTySendMsg); 6444 } 6445 6446 //===----------------------------------------------------------------------===// 6447 // v_interp 6448 //===----------------------------------------------------------------------===// 6449 6450 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6451 StringRef Str; 6452 SMLoc S = getLoc(); 6453 6454 if (!parseId(Str)) 6455 return MatchOperand_NoMatch; 6456 6457 int Slot = StringSwitch<int>(Str) 6458 .Case("p10", 0) 6459 .Case("p20", 1) 6460 .Case("p0", 2) 6461 .Default(-1); 6462 6463 if (Slot == -1) { 6464 Error(S, "invalid interpolation slot"); 6465 return MatchOperand_ParseFail; 6466 } 6467 6468 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6469 AMDGPUOperand::ImmTyInterpSlot)); 6470 return MatchOperand_Success; 6471 } 6472 6473 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6474 StringRef Str; 6475 SMLoc S = getLoc(); 6476 6477 if (!parseId(Str)) 6478 return MatchOperand_NoMatch; 6479 6480 if (!Str.startswith("attr")) { 6481 Error(S, "invalid interpolation attribute"); 6482 return MatchOperand_ParseFail; 6483 } 6484 6485 StringRef Chan = Str.take_back(2); 6486 int AttrChan = StringSwitch<int>(Chan) 6487 .Case(".x", 0) 6488 .Case(".y", 1) 6489 .Case(".z", 2) 6490 .Case(".w", 3) 6491 .Default(-1); 6492 if (AttrChan == -1) { 6493 Error(S, "invalid or missing interpolation attribute channel"); 6494 return MatchOperand_ParseFail; 6495 } 6496 6497 Str = Str.drop_back(2).drop_front(4); 6498 6499 uint8_t Attr; 6500 if (Str.getAsInteger(10, Attr)) { 6501 Error(S, "invalid or missing interpolation attribute number"); 6502 return MatchOperand_ParseFail; 6503 } 6504 6505 if (Attr > 63) { 6506 Error(S, "out of bounds interpolation attribute number"); 6507 return MatchOperand_ParseFail; 6508 } 6509 6510 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6511 6512 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6513 AMDGPUOperand::ImmTyInterpAttr)); 6514 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6515 AMDGPUOperand::ImmTyAttrChan)); 6516 return MatchOperand_Success; 6517 } 6518 6519 //===----------------------------------------------------------------------===// 6520 // exp 6521 //===----------------------------------------------------------------------===// 6522 6523 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6524 using namespace llvm::AMDGPU::Exp; 6525 6526 StringRef Str; 6527 SMLoc S = getLoc(); 6528 6529 if (!parseId(Str)) 6530 return MatchOperand_NoMatch; 6531 6532 unsigned Id = getTgtId(Str); 6533 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6534 Error(S, (Id == ET_INVALID) ? 6535 "invalid exp target" : 6536 "exp target is not supported on this GPU"); 6537 return MatchOperand_ParseFail; 6538 } 6539 6540 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6541 AMDGPUOperand::ImmTyExpTgt)); 6542 return MatchOperand_Success; 6543 } 6544 6545 //===----------------------------------------------------------------------===// 6546 // parser helpers 6547 //===----------------------------------------------------------------------===// 6548 6549 bool 6550 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6551 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6552 } 6553 6554 bool 6555 AMDGPUAsmParser::isId(const StringRef Id) const { 6556 return isId(getToken(), Id); 6557 } 6558 6559 bool 6560 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6561 return getTokenKind() == Kind; 6562 } 6563 6564 bool 6565 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6566 if (isId(Id)) { 6567 lex(); 6568 return true; 6569 } 6570 return false; 6571 } 6572 6573 bool 6574 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6575 if (isToken(AsmToken::Identifier)) { 6576 StringRef Tok = getTokenStr(); 6577 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6578 lex(); 6579 return true; 6580 } 6581 } 6582 return false; 6583 } 6584 6585 bool 6586 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6587 if (isId(Id) && peekToken().is(Kind)) { 6588 lex(); 6589 lex(); 6590 return true; 6591 } 6592 return false; 6593 } 6594 6595 bool 6596 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6597 if (isToken(Kind)) { 6598 lex(); 6599 return true; 6600 } 6601 return false; 6602 } 6603 6604 bool 6605 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6606 const StringRef ErrMsg) { 6607 if (!trySkipToken(Kind)) { 6608 Error(getLoc(), ErrMsg); 6609 return false; 6610 } 6611 return true; 6612 } 6613 6614 bool 6615 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6616 SMLoc S = getLoc(); 6617 6618 const MCExpr *Expr; 6619 if (Parser.parseExpression(Expr)) 6620 return false; 6621 6622 if (Expr->evaluateAsAbsolute(Imm)) 6623 return true; 6624 6625 if (Expected.empty()) { 6626 Error(S, "expected absolute expression"); 6627 } else { 6628 Error(S, Twine("expected ", Expected) + 6629 Twine(" or an absolute expression")); 6630 } 6631 return false; 6632 } 6633 6634 bool 6635 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6636 SMLoc S = getLoc(); 6637 6638 const MCExpr *Expr; 6639 if (Parser.parseExpression(Expr)) 6640 return false; 6641 6642 int64_t IntVal; 6643 if (Expr->evaluateAsAbsolute(IntVal)) { 6644 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6645 } else { 6646 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6647 } 6648 return true; 6649 } 6650 6651 bool 6652 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6653 if (isToken(AsmToken::String)) { 6654 Val = getToken().getStringContents(); 6655 lex(); 6656 return true; 6657 } else { 6658 Error(getLoc(), ErrMsg); 6659 return false; 6660 } 6661 } 6662 6663 bool 6664 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6665 if (isToken(AsmToken::Identifier)) { 6666 Val = getTokenStr(); 6667 lex(); 6668 return true; 6669 } else { 6670 if (!ErrMsg.empty()) 6671 Error(getLoc(), ErrMsg); 6672 return false; 6673 } 6674 } 6675 6676 AsmToken 6677 AMDGPUAsmParser::getToken() const { 6678 return Parser.getTok(); 6679 } 6680 6681 AsmToken 6682 AMDGPUAsmParser::peekToken() { 6683 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6684 } 6685 6686 void 6687 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6688 auto TokCount = getLexer().peekTokens(Tokens); 6689 6690 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6691 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6692 } 6693 6694 AsmToken::TokenKind 6695 AMDGPUAsmParser::getTokenKind() const { 6696 return getLexer().getKind(); 6697 } 6698 6699 SMLoc 6700 AMDGPUAsmParser::getLoc() const { 6701 return getToken().getLoc(); 6702 } 6703 6704 StringRef 6705 AMDGPUAsmParser::getTokenStr() const { 6706 return getToken().getString(); 6707 } 6708 6709 void 6710 AMDGPUAsmParser::lex() { 6711 Parser.Lex(); 6712 } 6713 6714 SMLoc 6715 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6716 const OperandVector &Operands) const { 6717 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6718 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6719 if (Test(Op)) 6720 return Op.getStartLoc(); 6721 } 6722 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6723 } 6724 6725 SMLoc 6726 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6727 const OperandVector &Operands) const { 6728 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6729 return getOperandLoc(Test, Operands); 6730 } 6731 6732 SMLoc 6733 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6734 const OperandVector &Operands) const { 6735 auto Test = [=](const AMDGPUOperand& Op) { 6736 return Op.isRegKind() && Op.getReg() == Reg; 6737 }; 6738 return getOperandLoc(Test, Operands); 6739 } 6740 6741 SMLoc 6742 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6743 auto Test = [](const AMDGPUOperand& Op) { 6744 return Op.IsImmKindLiteral() || Op.isExpr(); 6745 }; 6746 return getOperandLoc(Test, Operands); 6747 } 6748 6749 SMLoc 6750 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6751 auto Test = [](const AMDGPUOperand& Op) { 6752 return Op.isImmKindConst(); 6753 }; 6754 return getOperandLoc(Test, Operands); 6755 } 6756 6757 //===----------------------------------------------------------------------===// 6758 // swizzle 6759 //===----------------------------------------------------------------------===// 6760 6761 LLVM_READNONE 6762 static unsigned 6763 encodeBitmaskPerm(const unsigned AndMask, 6764 const unsigned OrMask, 6765 const unsigned XorMask) { 6766 using namespace llvm::AMDGPU::Swizzle; 6767 6768 return BITMASK_PERM_ENC | 6769 (AndMask << BITMASK_AND_SHIFT) | 6770 (OrMask << BITMASK_OR_SHIFT) | 6771 (XorMask << BITMASK_XOR_SHIFT); 6772 } 6773 6774 bool 6775 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6776 const unsigned MinVal, 6777 const unsigned MaxVal, 6778 const StringRef ErrMsg, 6779 SMLoc &Loc) { 6780 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6781 return false; 6782 } 6783 Loc = getLoc(); 6784 if (!parseExpr(Op)) { 6785 return false; 6786 } 6787 if (Op < MinVal || Op > MaxVal) { 6788 Error(Loc, ErrMsg); 6789 return false; 6790 } 6791 6792 return true; 6793 } 6794 6795 bool 6796 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6797 const unsigned MinVal, 6798 const unsigned MaxVal, 6799 const StringRef ErrMsg) { 6800 SMLoc Loc; 6801 for (unsigned i = 0; i < OpNum; ++i) { 6802 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6803 return false; 6804 } 6805 6806 return true; 6807 } 6808 6809 bool 6810 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6811 using namespace llvm::AMDGPU::Swizzle; 6812 6813 int64_t Lane[LANE_NUM]; 6814 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6815 "expected a 2-bit lane id")) { 6816 Imm = QUAD_PERM_ENC; 6817 for (unsigned I = 0; I < LANE_NUM; ++I) { 6818 Imm |= Lane[I] << (LANE_SHIFT * I); 6819 } 6820 return true; 6821 } 6822 return false; 6823 } 6824 6825 bool 6826 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6827 using namespace llvm::AMDGPU::Swizzle; 6828 6829 SMLoc Loc; 6830 int64_t GroupSize; 6831 int64_t LaneIdx; 6832 6833 if (!parseSwizzleOperand(GroupSize, 6834 2, 32, 6835 "group size must be in the interval [2,32]", 6836 Loc)) { 6837 return false; 6838 } 6839 if (!isPowerOf2_64(GroupSize)) { 6840 Error(Loc, "group size must be a power of two"); 6841 return false; 6842 } 6843 if (parseSwizzleOperand(LaneIdx, 6844 0, GroupSize - 1, 6845 "lane id must be in the interval [0,group size - 1]", 6846 Loc)) { 6847 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6848 return true; 6849 } 6850 return false; 6851 } 6852 6853 bool 6854 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6855 using namespace llvm::AMDGPU::Swizzle; 6856 6857 SMLoc Loc; 6858 int64_t GroupSize; 6859 6860 if (!parseSwizzleOperand(GroupSize, 6861 2, 32, 6862 "group size must be in the interval [2,32]", 6863 Loc)) { 6864 return false; 6865 } 6866 if (!isPowerOf2_64(GroupSize)) { 6867 Error(Loc, "group size must be a power of two"); 6868 return false; 6869 } 6870 6871 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6872 return true; 6873 } 6874 6875 bool 6876 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6877 using namespace llvm::AMDGPU::Swizzle; 6878 6879 SMLoc Loc; 6880 int64_t GroupSize; 6881 6882 if (!parseSwizzleOperand(GroupSize, 6883 1, 16, 6884 "group size must be in the interval [1,16]", 6885 Loc)) { 6886 return false; 6887 } 6888 if (!isPowerOf2_64(GroupSize)) { 6889 Error(Loc, "group size must be a power of two"); 6890 return false; 6891 } 6892 6893 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6894 return true; 6895 } 6896 6897 bool 6898 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6899 using namespace llvm::AMDGPU::Swizzle; 6900 6901 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6902 return false; 6903 } 6904 6905 StringRef Ctl; 6906 SMLoc StrLoc = getLoc(); 6907 if (!parseString(Ctl)) { 6908 return false; 6909 } 6910 if (Ctl.size() != BITMASK_WIDTH) { 6911 Error(StrLoc, "expected a 5-character mask"); 6912 return false; 6913 } 6914 6915 unsigned AndMask = 0; 6916 unsigned OrMask = 0; 6917 unsigned XorMask = 0; 6918 6919 for (size_t i = 0; i < Ctl.size(); ++i) { 6920 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6921 switch(Ctl[i]) { 6922 default: 6923 Error(StrLoc, "invalid mask"); 6924 return false; 6925 case '0': 6926 break; 6927 case '1': 6928 OrMask |= Mask; 6929 break; 6930 case 'p': 6931 AndMask |= Mask; 6932 break; 6933 case 'i': 6934 AndMask |= Mask; 6935 XorMask |= Mask; 6936 break; 6937 } 6938 } 6939 6940 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6941 return true; 6942 } 6943 6944 bool 6945 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6946 6947 SMLoc OffsetLoc = getLoc(); 6948 6949 if (!parseExpr(Imm, "a swizzle macro")) { 6950 return false; 6951 } 6952 if (!isUInt<16>(Imm)) { 6953 Error(OffsetLoc, "expected a 16-bit offset"); 6954 return false; 6955 } 6956 return true; 6957 } 6958 6959 bool 6960 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6961 using namespace llvm::AMDGPU::Swizzle; 6962 6963 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6964 6965 SMLoc ModeLoc = getLoc(); 6966 bool Ok = false; 6967 6968 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6969 Ok = parseSwizzleQuadPerm(Imm); 6970 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6971 Ok = parseSwizzleBitmaskPerm(Imm); 6972 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6973 Ok = parseSwizzleBroadcast(Imm); 6974 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6975 Ok = parseSwizzleSwap(Imm); 6976 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6977 Ok = parseSwizzleReverse(Imm); 6978 } else { 6979 Error(ModeLoc, "expected a swizzle mode"); 6980 } 6981 6982 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6983 } 6984 6985 return false; 6986 } 6987 6988 OperandMatchResultTy 6989 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6990 SMLoc S = getLoc(); 6991 int64_t Imm = 0; 6992 6993 if (trySkipId("offset")) { 6994 6995 bool Ok = false; 6996 if (skipToken(AsmToken::Colon, "expected a colon")) { 6997 if (trySkipId("swizzle")) { 6998 Ok = parseSwizzleMacro(Imm); 6999 } else { 7000 Ok = parseSwizzleOffset(Imm); 7001 } 7002 } 7003 7004 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7005 7006 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7007 } else { 7008 // Swizzle "offset" operand is optional. 7009 // If it is omitted, try parsing other optional operands. 7010 return parseOptionalOpr(Operands); 7011 } 7012 } 7013 7014 bool 7015 AMDGPUOperand::isSwizzle() const { 7016 return isImmTy(ImmTySwizzle); 7017 } 7018 7019 //===----------------------------------------------------------------------===// 7020 // VGPR Index Mode 7021 //===----------------------------------------------------------------------===// 7022 7023 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7024 7025 using namespace llvm::AMDGPU::VGPRIndexMode; 7026 7027 if (trySkipToken(AsmToken::RParen)) { 7028 return OFF; 7029 } 7030 7031 int64_t Imm = 0; 7032 7033 while (true) { 7034 unsigned Mode = 0; 7035 SMLoc S = getLoc(); 7036 7037 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7038 if (trySkipId(IdSymbolic[ModeId])) { 7039 Mode = 1 << ModeId; 7040 break; 7041 } 7042 } 7043 7044 if (Mode == 0) { 7045 Error(S, (Imm == 0)? 7046 "expected a VGPR index mode or a closing parenthesis" : 7047 "expected a VGPR index mode"); 7048 return UNDEF; 7049 } 7050 7051 if (Imm & Mode) { 7052 Error(S, "duplicate VGPR index mode"); 7053 return UNDEF; 7054 } 7055 Imm |= Mode; 7056 7057 if (trySkipToken(AsmToken::RParen)) 7058 break; 7059 if (!skipToken(AsmToken::Comma, 7060 "expected a comma or a closing parenthesis")) 7061 return UNDEF; 7062 } 7063 7064 return Imm; 7065 } 7066 7067 OperandMatchResultTy 7068 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7069 7070 using namespace llvm::AMDGPU::VGPRIndexMode; 7071 7072 int64_t Imm = 0; 7073 SMLoc S = getLoc(); 7074 7075 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7076 Imm = parseGPRIdxMacro(); 7077 if (Imm == UNDEF) 7078 return MatchOperand_ParseFail; 7079 } else { 7080 if (getParser().parseAbsoluteExpression(Imm)) 7081 return MatchOperand_ParseFail; 7082 if (Imm < 0 || !isUInt<4>(Imm)) { 7083 Error(S, "invalid immediate: only 4-bit values are legal"); 7084 return MatchOperand_ParseFail; 7085 } 7086 } 7087 7088 Operands.push_back( 7089 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7090 return MatchOperand_Success; 7091 } 7092 7093 bool AMDGPUOperand::isGPRIdxMode() const { 7094 return isImmTy(ImmTyGprIdxMode); 7095 } 7096 7097 //===----------------------------------------------------------------------===// 7098 // sopp branch targets 7099 //===----------------------------------------------------------------------===// 7100 7101 OperandMatchResultTy 7102 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7103 7104 // Make sure we are not parsing something 7105 // that looks like a label or an expression but is not. 7106 // This will improve error messages. 7107 if (isRegister() || isModifier()) 7108 return MatchOperand_NoMatch; 7109 7110 if (!parseExpr(Operands)) 7111 return MatchOperand_ParseFail; 7112 7113 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7114 assert(Opr.isImm() || Opr.isExpr()); 7115 SMLoc Loc = Opr.getStartLoc(); 7116 7117 // Currently we do not support arbitrary expressions as branch targets. 7118 // Only labels and absolute expressions are accepted. 7119 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7120 Error(Loc, "expected an absolute expression or a label"); 7121 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7122 Error(Loc, "expected a 16-bit signed jump offset"); 7123 } 7124 7125 return MatchOperand_Success; 7126 } 7127 7128 //===----------------------------------------------------------------------===// 7129 // Boolean holding registers 7130 //===----------------------------------------------------------------------===// 7131 7132 OperandMatchResultTy 7133 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7134 return parseReg(Operands); 7135 } 7136 7137 //===----------------------------------------------------------------------===// 7138 // mubuf 7139 //===----------------------------------------------------------------------===// 7140 7141 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7142 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7143 } 7144 7145 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7146 const OperandVector &Operands, 7147 bool IsAtomic, 7148 bool IsLds) { 7149 bool IsLdsOpcode = IsLds; 7150 bool HasLdsModifier = false; 7151 OptionalImmIndexMap OptionalIdx; 7152 unsigned FirstOperandIdx = 1; 7153 bool IsAtomicReturn = false; 7154 7155 if (IsAtomic) { 7156 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7158 if (!Op.isCPol()) 7159 continue; 7160 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7161 break; 7162 } 7163 7164 if (!IsAtomicReturn) { 7165 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7166 if (NewOpc != -1) 7167 Inst.setOpcode(NewOpc); 7168 } 7169 7170 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7171 SIInstrFlags::IsAtomicRet; 7172 } 7173 7174 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7175 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7176 7177 // Add the register arguments 7178 if (Op.isReg()) { 7179 Op.addRegOperands(Inst, 1); 7180 // Insert a tied src for atomic return dst. 7181 // This cannot be postponed as subsequent calls to 7182 // addImmOperands rely on correct number of MC operands. 7183 if (IsAtomicReturn && i == FirstOperandIdx) 7184 Op.addRegOperands(Inst, 1); 7185 continue; 7186 } 7187 7188 // Handle the case where soffset is an immediate 7189 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7190 Op.addImmOperands(Inst, 1); 7191 continue; 7192 } 7193 7194 HasLdsModifier |= Op.isLDS(); 7195 7196 // Handle tokens like 'offen' which are sometimes hard-coded into the 7197 // asm string. There are no MCInst operands for these. 7198 if (Op.isToken()) { 7199 continue; 7200 } 7201 assert(Op.isImm()); 7202 7203 // Handle optional arguments 7204 OptionalIdx[Op.getImmTy()] = i; 7205 } 7206 7207 // This is a workaround for an llvm quirk which may result in an 7208 // incorrect instruction selection. Lds and non-lds versions of 7209 // MUBUF instructions are identical except that lds versions 7210 // have mandatory 'lds' modifier. However this modifier follows 7211 // optional modifiers and llvm asm matcher regards this 'lds' 7212 // modifier as an optional one. As a result, an lds version 7213 // of opcode may be selected even if it has no 'lds' modifier. 7214 if (IsLdsOpcode && !HasLdsModifier) { 7215 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7216 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7217 Inst.setOpcode(NoLdsOpcode); 7218 IsLdsOpcode = false; 7219 } 7220 } 7221 7222 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7223 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7224 7225 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7227 } 7228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7229 } 7230 7231 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7232 OptionalImmIndexMap OptionalIdx; 7233 7234 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7235 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7236 7237 // Add the register arguments 7238 if (Op.isReg()) { 7239 Op.addRegOperands(Inst, 1); 7240 continue; 7241 } 7242 7243 // Handle the case where soffset is an immediate 7244 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7245 Op.addImmOperands(Inst, 1); 7246 continue; 7247 } 7248 7249 // Handle tokens like 'offen' which are sometimes hard-coded into the 7250 // asm string. There are no MCInst operands for these. 7251 if (Op.isToken()) { 7252 continue; 7253 } 7254 assert(Op.isImm()); 7255 7256 // Handle optional arguments 7257 OptionalIdx[Op.getImmTy()] = i; 7258 } 7259 7260 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7261 AMDGPUOperand::ImmTyOffset); 7262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7266 } 7267 7268 //===----------------------------------------------------------------------===// 7269 // mimg 7270 //===----------------------------------------------------------------------===// 7271 7272 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7273 bool IsAtomic) { 7274 unsigned I = 1; 7275 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7276 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7277 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7278 } 7279 7280 if (IsAtomic) { 7281 // Add src, same as dst 7282 assert(Desc.getNumDefs() == 1); 7283 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7284 } 7285 7286 OptionalImmIndexMap OptionalIdx; 7287 7288 for (unsigned E = Operands.size(); I != E; ++I) { 7289 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7290 7291 // Add the register arguments 7292 if (Op.isReg()) { 7293 Op.addRegOperands(Inst, 1); 7294 } else if (Op.isImmModifier()) { 7295 OptionalIdx[Op.getImmTy()] = I; 7296 } else if (!Op.isToken()) { 7297 llvm_unreachable("unexpected operand type"); 7298 } 7299 } 7300 7301 bool IsGFX10Plus = isGFX10Plus(); 7302 7303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7304 if (IsGFX10Plus) 7305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7309 if (IsGFX10Plus) 7310 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7311 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7312 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7314 if (!IsGFX10Plus) 7315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7317 } 7318 7319 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7320 cvtMIMG(Inst, Operands, true); 7321 } 7322 7323 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7324 OptionalImmIndexMap OptionalIdx; 7325 bool IsAtomicReturn = false; 7326 7327 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7328 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7329 if (!Op.isCPol()) 7330 continue; 7331 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7332 break; 7333 } 7334 7335 if (!IsAtomicReturn) { 7336 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7337 if (NewOpc != -1) 7338 Inst.setOpcode(NewOpc); 7339 } 7340 7341 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7342 SIInstrFlags::IsAtomicRet; 7343 7344 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7345 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7346 7347 // Add the register arguments 7348 if (Op.isReg()) { 7349 Op.addRegOperands(Inst, 1); 7350 if (IsAtomicReturn && i == 1) 7351 Op.addRegOperands(Inst, 1); 7352 continue; 7353 } 7354 7355 // Handle the case where soffset is an immediate 7356 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7357 Op.addImmOperands(Inst, 1); 7358 continue; 7359 } 7360 7361 // Handle tokens like 'offen' which are sometimes hard-coded into the 7362 // asm string. There are no MCInst operands for these. 7363 if (Op.isToken()) { 7364 continue; 7365 } 7366 assert(Op.isImm()); 7367 7368 // Handle optional arguments 7369 OptionalIdx[Op.getImmTy()] = i; 7370 } 7371 7372 if ((int)Inst.getNumOperands() <= 7373 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7376 } 7377 7378 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7379 const OperandVector &Operands) { 7380 for (unsigned I = 1; I < Operands.size(); ++I) { 7381 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7382 if (Operand.isReg()) 7383 Operand.addRegOperands(Inst, 1); 7384 } 7385 7386 Inst.addOperand(MCOperand::createImm(1)); // a16 7387 } 7388 7389 //===----------------------------------------------------------------------===// 7390 // smrd 7391 //===----------------------------------------------------------------------===// 7392 7393 bool AMDGPUOperand::isSMRDOffset8() const { 7394 return isImm() && isUInt<8>(getImm()); 7395 } 7396 7397 bool AMDGPUOperand::isSMEMOffset() const { 7398 return isImm(); // Offset range is checked later by validator. 7399 } 7400 7401 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7402 // 32-bit literals are only supported on CI and we only want to use them 7403 // when the offset is > 8-bits. 7404 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7405 } 7406 7407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7408 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7409 } 7410 7411 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7412 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7413 } 7414 7415 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7416 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7417 } 7418 7419 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7420 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7421 } 7422 7423 //===----------------------------------------------------------------------===// 7424 // vop3 7425 //===----------------------------------------------------------------------===// 7426 7427 static bool ConvertOmodMul(int64_t &Mul) { 7428 if (Mul != 1 && Mul != 2 && Mul != 4) 7429 return false; 7430 7431 Mul >>= 1; 7432 return true; 7433 } 7434 7435 static bool ConvertOmodDiv(int64_t &Div) { 7436 if (Div == 1) { 7437 Div = 0; 7438 return true; 7439 } 7440 7441 if (Div == 2) { 7442 Div = 3; 7443 return true; 7444 } 7445 7446 return false; 7447 } 7448 7449 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7450 // This is intentional and ensures compatibility with sp3. 7451 // See bug 35397 for details. 7452 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7453 if (BoundCtrl == 0 || BoundCtrl == 1) { 7454 BoundCtrl = 1; 7455 return true; 7456 } 7457 return false; 7458 } 7459 7460 // Note: the order in this table matches the order of operands in AsmString. 7461 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7462 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7463 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7464 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7465 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7466 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7467 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7468 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7469 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7470 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7471 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7472 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7473 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7474 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7475 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7476 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7477 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7478 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7479 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7480 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7481 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7482 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7483 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7484 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7485 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7486 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7487 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7488 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7489 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7490 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7491 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7492 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7493 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7494 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7495 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7496 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7497 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7498 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7499 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7500 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7501 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7502 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7503 }; 7504 7505 void AMDGPUAsmParser::onBeginOfFile() { 7506 if (!getParser().getStreamer().getTargetStreamer() || 7507 getSTI().getTargetTriple().getArch() == Triple::r600) 7508 return; 7509 7510 if (!getTargetStreamer().getTargetID()) 7511 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7512 7513 if (isHsaAbiVersion3AndAbove(&getSTI())) 7514 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7515 } 7516 7517 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7518 7519 OperandMatchResultTy res = parseOptionalOpr(Operands); 7520 7521 // This is a hack to enable hardcoded mandatory operands which follow 7522 // optional operands. 7523 // 7524 // Current design assumes that all operands after the first optional operand 7525 // are also optional. However implementation of some instructions violates 7526 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7527 // 7528 // To alleviate this problem, we have to (implicitly) parse extra operands 7529 // to make sure autogenerated parser of custom operands never hit hardcoded 7530 // mandatory operands. 7531 7532 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7533 if (res != MatchOperand_Success || 7534 isToken(AsmToken::EndOfStatement)) 7535 break; 7536 7537 trySkipToken(AsmToken::Comma); 7538 res = parseOptionalOpr(Operands); 7539 } 7540 7541 return res; 7542 } 7543 7544 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7545 OperandMatchResultTy res; 7546 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7547 // try to parse any optional operand here 7548 if (Op.IsBit) { 7549 res = parseNamedBit(Op.Name, Operands, Op.Type); 7550 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7551 res = parseOModOperand(Operands); 7552 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7553 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7554 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7555 res = parseSDWASel(Operands, Op.Name, Op.Type); 7556 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7557 res = parseSDWADstUnused(Operands); 7558 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7559 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7560 Op.Type == AMDGPUOperand::ImmTyNegLo || 7561 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7562 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7563 Op.ConvertResult); 7564 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7565 res = parseDim(Operands); 7566 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7567 res = parseCPol(Operands); 7568 } else { 7569 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7570 } 7571 if (res != MatchOperand_NoMatch) { 7572 return res; 7573 } 7574 } 7575 return MatchOperand_NoMatch; 7576 } 7577 7578 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7579 StringRef Name = getTokenStr(); 7580 if (Name == "mul") { 7581 return parseIntWithPrefix("mul", Operands, 7582 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7583 } 7584 7585 if (Name == "div") { 7586 return parseIntWithPrefix("div", Operands, 7587 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7588 } 7589 7590 return MatchOperand_NoMatch; 7591 } 7592 7593 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7594 cvtVOP3P(Inst, Operands); 7595 7596 int Opc = Inst.getOpcode(); 7597 7598 int SrcNum; 7599 const int Ops[] = { AMDGPU::OpName::src0, 7600 AMDGPU::OpName::src1, 7601 AMDGPU::OpName::src2 }; 7602 for (SrcNum = 0; 7603 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7604 ++SrcNum); 7605 assert(SrcNum > 0); 7606 7607 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7608 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7609 7610 if ((OpSel & (1 << SrcNum)) != 0) { 7611 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7612 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7613 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7614 } 7615 } 7616 7617 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7618 // 1. This operand is input modifiers 7619 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7620 // 2. This is not last operand 7621 && Desc.NumOperands > (OpNum + 1) 7622 // 3. Next operand is register class 7623 && Desc.OpInfo[OpNum + 1].RegClass != -1 7624 // 4. Next register is not tied to any other operand 7625 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7626 } 7627 7628 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7629 { 7630 OptionalImmIndexMap OptionalIdx; 7631 unsigned Opc = Inst.getOpcode(); 7632 7633 unsigned I = 1; 7634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7635 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7636 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7637 } 7638 7639 for (unsigned E = Operands.size(); I != E; ++I) { 7640 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7641 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7642 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7643 } else if (Op.isInterpSlot() || 7644 Op.isInterpAttr() || 7645 Op.isAttrChan()) { 7646 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7647 } else if (Op.isImmModifier()) { 7648 OptionalIdx[Op.getImmTy()] = I; 7649 } else { 7650 llvm_unreachable("unhandled operand type"); 7651 } 7652 } 7653 7654 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7656 } 7657 7658 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7660 } 7661 7662 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7664 } 7665 } 7666 7667 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7668 OptionalImmIndexMap &OptionalIdx) { 7669 unsigned Opc = Inst.getOpcode(); 7670 7671 unsigned I = 1; 7672 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7673 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7674 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7675 } 7676 7677 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7678 // This instruction has src modifiers 7679 for (unsigned E = Operands.size(); I != E; ++I) { 7680 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7681 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7682 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7683 } else if (Op.isImmModifier()) { 7684 OptionalIdx[Op.getImmTy()] = I; 7685 } else if (Op.isRegOrImm()) { 7686 Op.addRegOrImmOperands(Inst, 1); 7687 } else { 7688 llvm_unreachable("unhandled operand type"); 7689 } 7690 } 7691 } else { 7692 // No src modifiers 7693 for (unsigned E = Operands.size(); I != E; ++I) { 7694 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7695 if (Op.isMod()) { 7696 OptionalIdx[Op.getImmTy()] = I; 7697 } else { 7698 Op.addRegOrImmOperands(Inst, 1); 7699 } 7700 } 7701 } 7702 7703 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7705 } 7706 7707 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7709 } 7710 7711 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7712 // it has src2 register operand that is tied to dst operand 7713 // we don't allow modifiers for this operand in assembler so src2_modifiers 7714 // should be 0. 7715 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7716 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7717 Opc == AMDGPU::V_MAC_F32_e64_vi || 7718 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7719 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7720 Opc == AMDGPU::V_MAC_F16_e64_vi || 7721 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7722 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7723 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7724 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7725 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7726 auto it = Inst.begin(); 7727 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7728 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7729 ++it; 7730 // Copy the operand to ensure it's not invalidated when Inst grows. 7731 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7732 } 7733 } 7734 7735 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7736 OptionalImmIndexMap OptionalIdx; 7737 cvtVOP3(Inst, Operands, OptionalIdx); 7738 } 7739 7740 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7741 OptionalImmIndexMap &OptIdx) { 7742 const int Opc = Inst.getOpcode(); 7743 const MCInstrDesc &Desc = MII.get(Opc); 7744 7745 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7746 7747 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7748 assert(!IsPacked); 7749 Inst.addOperand(Inst.getOperand(0)); 7750 } 7751 7752 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7753 // instruction, and then figure out where to actually put the modifiers 7754 7755 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7756 if (OpSelIdx != -1) { 7757 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7758 } 7759 7760 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7761 if (OpSelHiIdx != -1) { 7762 int DefaultVal = IsPacked ? -1 : 0; 7763 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7764 DefaultVal); 7765 } 7766 7767 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7768 if (NegLoIdx != -1) { 7769 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7770 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7771 } 7772 7773 const int Ops[] = { AMDGPU::OpName::src0, 7774 AMDGPU::OpName::src1, 7775 AMDGPU::OpName::src2 }; 7776 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7777 AMDGPU::OpName::src1_modifiers, 7778 AMDGPU::OpName::src2_modifiers }; 7779 7780 unsigned OpSel = 0; 7781 unsigned OpSelHi = 0; 7782 unsigned NegLo = 0; 7783 unsigned NegHi = 0; 7784 7785 if (OpSelIdx != -1) 7786 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7787 7788 if (OpSelHiIdx != -1) 7789 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7790 7791 if (NegLoIdx != -1) { 7792 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7793 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7794 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7795 } 7796 7797 for (int J = 0; J < 3; ++J) { 7798 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7799 if (OpIdx == -1) 7800 break; 7801 7802 uint32_t ModVal = 0; 7803 7804 if ((OpSel & (1 << J)) != 0) 7805 ModVal |= SISrcMods::OP_SEL_0; 7806 7807 if ((OpSelHi & (1 << J)) != 0) 7808 ModVal |= SISrcMods::OP_SEL_1; 7809 7810 if ((NegLo & (1 << J)) != 0) 7811 ModVal |= SISrcMods::NEG; 7812 7813 if ((NegHi & (1 << J)) != 0) 7814 ModVal |= SISrcMods::NEG_HI; 7815 7816 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7817 7818 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7819 } 7820 } 7821 7822 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7823 OptionalImmIndexMap OptIdx; 7824 cvtVOP3(Inst, Operands, OptIdx); 7825 cvtVOP3P(Inst, Operands, OptIdx); 7826 } 7827 7828 //===----------------------------------------------------------------------===// 7829 // dpp 7830 //===----------------------------------------------------------------------===// 7831 7832 bool AMDGPUOperand::isDPP8() const { 7833 return isImmTy(ImmTyDPP8); 7834 } 7835 7836 bool AMDGPUOperand::isDPPCtrl() const { 7837 using namespace AMDGPU::DPP; 7838 7839 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7840 if (result) { 7841 int64_t Imm = getImm(); 7842 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7843 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7844 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7845 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7846 (Imm == DppCtrl::WAVE_SHL1) || 7847 (Imm == DppCtrl::WAVE_ROL1) || 7848 (Imm == DppCtrl::WAVE_SHR1) || 7849 (Imm == DppCtrl::WAVE_ROR1) || 7850 (Imm == DppCtrl::ROW_MIRROR) || 7851 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7852 (Imm == DppCtrl::BCAST15) || 7853 (Imm == DppCtrl::BCAST31) || 7854 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7855 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7856 } 7857 return false; 7858 } 7859 7860 //===----------------------------------------------------------------------===// 7861 // mAI 7862 //===----------------------------------------------------------------------===// 7863 7864 bool AMDGPUOperand::isBLGP() const { 7865 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7866 } 7867 7868 bool AMDGPUOperand::isCBSZ() const { 7869 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7870 } 7871 7872 bool AMDGPUOperand::isABID() const { 7873 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7874 } 7875 7876 bool AMDGPUOperand::isS16Imm() const { 7877 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7878 } 7879 7880 bool AMDGPUOperand::isU16Imm() const { 7881 return isImm() && isUInt<16>(getImm()); 7882 } 7883 7884 //===----------------------------------------------------------------------===// 7885 // dim 7886 //===----------------------------------------------------------------------===// 7887 7888 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7889 // We want to allow "dim:1D" etc., 7890 // but the initial 1 is tokenized as an integer. 7891 std::string Token; 7892 if (isToken(AsmToken::Integer)) { 7893 SMLoc Loc = getToken().getEndLoc(); 7894 Token = std::string(getTokenStr()); 7895 lex(); 7896 if (getLoc() != Loc) 7897 return false; 7898 } 7899 7900 StringRef Suffix; 7901 if (!parseId(Suffix)) 7902 return false; 7903 Token += Suffix; 7904 7905 StringRef DimId = Token; 7906 if (DimId.startswith("SQ_RSRC_IMG_")) 7907 DimId = DimId.drop_front(12); 7908 7909 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7910 if (!DimInfo) 7911 return false; 7912 7913 Encoding = DimInfo->Encoding; 7914 return true; 7915 } 7916 7917 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7918 if (!isGFX10Plus()) 7919 return MatchOperand_NoMatch; 7920 7921 SMLoc S = getLoc(); 7922 7923 if (!trySkipId("dim", AsmToken::Colon)) 7924 return MatchOperand_NoMatch; 7925 7926 unsigned Encoding; 7927 SMLoc Loc = getLoc(); 7928 if (!parseDimId(Encoding)) { 7929 Error(Loc, "invalid dim value"); 7930 return MatchOperand_ParseFail; 7931 } 7932 7933 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7934 AMDGPUOperand::ImmTyDim)); 7935 return MatchOperand_Success; 7936 } 7937 7938 //===----------------------------------------------------------------------===// 7939 // dpp 7940 //===----------------------------------------------------------------------===// 7941 7942 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7943 SMLoc S = getLoc(); 7944 7945 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7946 return MatchOperand_NoMatch; 7947 7948 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7949 7950 int64_t Sels[8]; 7951 7952 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7953 return MatchOperand_ParseFail; 7954 7955 for (size_t i = 0; i < 8; ++i) { 7956 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7957 return MatchOperand_ParseFail; 7958 7959 SMLoc Loc = getLoc(); 7960 if (getParser().parseAbsoluteExpression(Sels[i])) 7961 return MatchOperand_ParseFail; 7962 if (0 > Sels[i] || 7 < Sels[i]) { 7963 Error(Loc, "expected a 3-bit value"); 7964 return MatchOperand_ParseFail; 7965 } 7966 } 7967 7968 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7969 return MatchOperand_ParseFail; 7970 7971 unsigned DPP8 = 0; 7972 for (size_t i = 0; i < 8; ++i) 7973 DPP8 |= (Sels[i] << (i * 3)); 7974 7975 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7976 return MatchOperand_Success; 7977 } 7978 7979 bool 7980 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7981 const OperandVector &Operands) { 7982 if (Ctrl == "row_newbcast") 7983 return isGFX90A(); 7984 7985 if (Ctrl == "row_share" || 7986 Ctrl == "row_xmask") 7987 return isGFX10Plus(); 7988 7989 if (Ctrl == "wave_shl" || 7990 Ctrl == "wave_shr" || 7991 Ctrl == "wave_rol" || 7992 Ctrl == "wave_ror" || 7993 Ctrl == "row_bcast") 7994 return isVI() || isGFX9(); 7995 7996 return Ctrl == "row_mirror" || 7997 Ctrl == "row_half_mirror" || 7998 Ctrl == "quad_perm" || 7999 Ctrl == "row_shl" || 8000 Ctrl == "row_shr" || 8001 Ctrl == "row_ror"; 8002 } 8003 8004 int64_t 8005 AMDGPUAsmParser::parseDPPCtrlPerm() { 8006 // quad_perm:[%d,%d,%d,%d] 8007 8008 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8009 return -1; 8010 8011 int64_t Val = 0; 8012 for (int i = 0; i < 4; ++i) { 8013 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8014 return -1; 8015 8016 int64_t Temp; 8017 SMLoc Loc = getLoc(); 8018 if (getParser().parseAbsoluteExpression(Temp)) 8019 return -1; 8020 if (Temp < 0 || Temp > 3) { 8021 Error(Loc, "expected a 2-bit value"); 8022 return -1; 8023 } 8024 8025 Val += (Temp << i * 2); 8026 } 8027 8028 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8029 return -1; 8030 8031 return Val; 8032 } 8033 8034 int64_t 8035 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8036 using namespace AMDGPU::DPP; 8037 8038 // sel:%d 8039 8040 int64_t Val; 8041 SMLoc Loc = getLoc(); 8042 8043 if (getParser().parseAbsoluteExpression(Val)) 8044 return -1; 8045 8046 struct DppCtrlCheck { 8047 int64_t Ctrl; 8048 int Lo; 8049 int Hi; 8050 }; 8051 8052 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8053 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8054 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8055 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8056 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8057 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8058 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8059 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8060 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8061 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8062 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8063 .Default({-1, 0, 0}); 8064 8065 bool Valid; 8066 if (Check.Ctrl == -1) { 8067 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8068 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8069 } else { 8070 Valid = Check.Lo <= Val && Val <= Check.Hi; 8071 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8072 } 8073 8074 if (!Valid) { 8075 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8076 return -1; 8077 } 8078 8079 return Val; 8080 } 8081 8082 OperandMatchResultTy 8083 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8084 using namespace AMDGPU::DPP; 8085 8086 if (!isToken(AsmToken::Identifier) || 8087 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8088 return MatchOperand_NoMatch; 8089 8090 SMLoc S = getLoc(); 8091 int64_t Val = -1; 8092 StringRef Ctrl; 8093 8094 parseId(Ctrl); 8095 8096 if (Ctrl == "row_mirror") { 8097 Val = DppCtrl::ROW_MIRROR; 8098 } else if (Ctrl == "row_half_mirror") { 8099 Val = DppCtrl::ROW_HALF_MIRROR; 8100 } else { 8101 if (skipToken(AsmToken::Colon, "expected a colon")) { 8102 if (Ctrl == "quad_perm") { 8103 Val = parseDPPCtrlPerm(); 8104 } else { 8105 Val = parseDPPCtrlSel(Ctrl); 8106 } 8107 } 8108 } 8109 8110 if (Val == -1) 8111 return MatchOperand_ParseFail; 8112 8113 Operands.push_back( 8114 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8115 return MatchOperand_Success; 8116 } 8117 8118 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8119 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8120 } 8121 8122 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8123 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8124 } 8125 8126 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8127 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8128 } 8129 8130 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8131 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8132 } 8133 8134 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8135 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8136 } 8137 8138 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8139 OptionalImmIndexMap OptionalIdx; 8140 8141 unsigned Opc = Inst.getOpcode(); 8142 bool HasModifiers = 8143 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8144 unsigned I = 1; 8145 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8146 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8147 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8148 } 8149 8150 int Fi = 0; 8151 for (unsigned E = Operands.size(); I != E; ++I) { 8152 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8153 MCOI::TIED_TO); 8154 if (TiedTo != -1) { 8155 assert((unsigned)TiedTo < Inst.getNumOperands()); 8156 // handle tied old or src2 for MAC instructions 8157 Inst.addOperand(Inst.getOperand(TiedTo)); 8158 } 8159 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8160 // Add the register arguments 8161 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8162 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8163 // Skip it. 8164 continue; 8165 } 8166 8167 if (IsDPP8) { 8168 if (Op.isDPP8()) { 8169 Op.addImmOperands(Inst, 1); 8170 } else if (HasModifiers && 8171 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8172 Op.addRegWithFPInputModsOperands(Inst, 2); 8173 } else if (Op.isFI()) { 8174 Fi = Op.getImm(); 8175 } else if (Op.isReg()) { 8176 Op.addRegOperands(Inst, 1); 8177 } else { 8178 llvm_unreachable("Invalid operand type"); 8179 } 8180 } else { 8181 if (HasModifiers && 8182 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8183 Op.addRegWithFPInputModsOperands(Inst, 2); 8184 } else if (Op.isReg()) { 8185 Op.addRegOperands(Inst, 1); 8186 } else if (Op.isDPPCtrl()) { 8187 Op.addImmOperands(Inst, 1); 8188 } else if (Op.isImm()) { 8189 // Handle optional arguments 8190 OptionalIdx[Op.getImmTy()] = I; 8191 } else { 8192 llvm_unreachable("Invalid operand type"); 8193 } 8194 } 8195 } 8196 8197 if (IsDPP8) { 8198 using namespace llvm::AMDGPU::DPP; 8199 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8200 } else { 8201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8204 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8206 } 8207 } 8208 } 8209 8210 //===----------------------------------------------------------------------===// 8211 // sdwa 8212 //===----------------------------------------------------------------------===// 8213 8214 OperandMatchResultTy 8215 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8216 AMDGPUOperand::ImmTy Type) { 8217 using namespace llvm::AMDGPU::SDWA; 8218 8219 SMLoc S = getLoc(); 8220 StringRef Value; 8221 OperandMatchResultTy res; 8222 8223 SMLoc StringLoc; 8224 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8225 if (res != MatchOperand_Success) { 8226 return res; 8227 } 8228 8229 int64_t Int; 8230 Int = StringSwitch<int64_t>(Value) 8231 .Case("BYTE_0", SdwaSel::BYTE_0) 8232 .Case("BYTE_1", SdwaSel::BYTE_1) 8233 .Case("BYTE_2", SdwaSel::BYTE_2) 8234 .Case("BYTE_3", SdwaSel::BYTE_3) 8235 .Case("WORD_0", SdwaSel::WORD_0) 8236 .Case("WORD_1", SdwaSel::WORD_1) 8237 .Case("DWORD", SdwaSel::DWORD) 8238 .Default(0xffffffff); 8239 8240 if (Int == 0xffffffff) { 8241 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8242 return MatchOperand_ParseFail; 8243 } 8244 8245 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8246 return MatchOperand_Success; 8247 } 8248 8249 OperandMatchResultTy 8250 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8251 using namespace llvm::AMDGPU::SDWA; 8252 8253 SMLoc S = getLoc(); 8254 StringRef Value; 8255 OperandMatchResultTy res; 8256 8257 SMLoc StringLoc; 8258 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8259 if (res != MatchOperand_Success) { 8260 return res; 8261 } 8262 8263 int64_t Int; 8264 Int = StringSwitch<int64_t>(Value) 8265 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8266 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8267 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8268 .Default(0xffffffff); 8269 8270 if (Int == 0xffffffff) { 8271 Error(StringLoc, "invalid dst_unused value"); 8272 return MatchOperand_ParseFail; 8273 } 8274 8275 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8276 return MatchOperand_Success; 8277 } 8278 8279 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8280 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8281 } 8282 8283 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8284 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8285 } 8286 8287 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8288 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8289 } 8290 8291 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8292 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8293 } 8294 8295 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8296 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8297 } 8298 8299 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8300 uint64_t BasicInstType, 8301 bool SkipDstVcc, 8302 bool SkipSrcVcc) { 8303 using namespace llvm::AMDGPU::SDWA; 8304 8305 OptionalImmIndexMap OptionalIdx; 8306 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8307 bool SkippedVcc = false; 8308 8309 unsigned I = 1; 8310 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8311 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8312 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8313 } 8314 8315 for (unsigned E = Operands.size(); I != E; ++I) { 8316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8317 if (SkipVcc && !SkippedVcc && Op.isReg() && 8318 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8319 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8320 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8321 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8322 // Skip VCC only if we didn't skip it on previous iteration. 8323 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8324 if (BasicInstType == SIInstrFlags::VOP2 && 8325 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8326 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8327 SkippedVcc = true; 8328 continue; 8329 } else if (BasicInstType == SIInstrFlags::VOPC && 8330 Inst.getNumOperands() == 0) { 8331 SkippedVcc = true; 8332 continue; 8333 } 8334 } 8335 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8336 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8337 } else if (Op.isImm()) { 8338 // Handle optional arguments 8339 OptionalIdx[Op.getImmTy()] = I; 8340 } else { 8341 llvm_unreachable("Invalid operand type"); 8342 } 8343 SkippedVcc = false; 8344 } 8345 8346 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8347 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8348 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8349 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8350 switch (BasicInstType) { 8351 case SIInstrFlags::VOP1: 8352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8353 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8355 } 8356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8359 break; 8360 8361 case SIInstrFlags::VOP2: 8362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8363 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8364 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8365 } 8366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8368 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8370 break; 8371 8372 case SIInstrFlags::VOPC: 8373 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8377 break; 8378 8379 default: 8380 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8381 } 8382 } 8383 8384 // special case v_mac_{f16, f32}: 8385 // it has src2 register operand that is tied to dst operand 8386 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8387 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8388 auto it = Inst.begin(); 8389 std::advance( 8390 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8391 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8392 } 8393 } 8394 8395 //===----------------------------------------------------------------------===// 8396 // mAI 8397 //===----------------------------------------------------------------------===// 8398 8399 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8400 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8401 } 8402 8403 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8404 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8405 } 8406 8407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8408 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8409 } 8410 8411 /// Force static initialization. 8412 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8413 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8414 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8415 } 8416 8417 #define GET_REGISTER_MATCHER 8418 #define GET_MATCHER_IMPLEMENTATION 8419 #define GET_MNEMONIC_SPELL_CHECKER 8420 #define GET_MNEMONIC_CHECKER 8421 #include "AMDGPUGenAsmMatcher.inc" 8422 8423 // This function should be defined after auto-generated include so that we have 8424 // MatchClassKind enum defined 8425 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8426 unsigned Kind) { 8427 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8428 // But MatchInstructionImpl() expects to meet token and fails to validate 8429 // operand. This method checks if we are given immediate operand but expect to 8430 // get corresponding token. 8431 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8432 switch (Kind) { 8433 case MCK_addr64: 8434 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8435 case MCK_gds: 8436 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8437 case MCK_lds: 8438 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8439 case MCK_idxen: 8440 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8441 case MCK_offen: 8442 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8443 case MCK_SSrcB32: 8444 // When operands have expression values, they will return true for isToken, 8445 // because it is not possible to distinguish between a token and an 8446 // expression at parse time. MatchInstructionImpl() will always try to 8447 // match an operand as a token, when isToken returns true, and when the 8448 // name of the expression is not a valid token, the match will fail, 8449 // so we need to handle it here. 8450 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8451 case MCK_SSrcF32: 8452 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8453 case MCK_SoppBrTarget: 8454 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8455 case MCK_VReg32OrOff: 8456 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8457 case MCK_InterpSlot: 8458 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8459 case MCK_Attr: 8460 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8461 case MCK_AttrChan: 8462 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8463 case MCK_ImmSMEMOffset: 8464 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8465 case MCK_SReg_64: 8466 case MCK_SReg_64_XEXEC: 8467 // Null is defined as a 32-bit register but 8468 // it should also be enabled with 64-bit operands. 8469 // The following code enables it for SReg_64 operands 8470 // used as source and destination. Remaining source 8471 // operands are handled in isInlinableImm. 8472 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8473 default: 8474 return Match_InvalidOperand; 8475 } 8476 } 8477 8478 //===----------------------------------------------------------------------===// 8479 // endpgm 8480 //===----------------------------------------------------------------------===// 8481 8482 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8483 SMLoc S = getLoc(); 8484 int64_t Imm = 0; 8485 8486 if (!parseExpr(Imm)) { 8487 // The operand is optional, if not present default to 0 8488 Imm = 0; 8489 } 8490 8491 if (!isUInt<16>(Imm)) { 8492 Error(S, "expected a 16-bit value"); 8493 return MatchOperand_ParseFail; 8494 } 8495 8496 Operands.push_back( 8497 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8498 return MatchOperand_Success; 8499 } 8500 8501 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8502