1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 MCContext *Ctx = nullptr; 1129 1130 void usesSgprAt(int i) { 1131 if (i >= SgprIndexUnusedMin) { 1132 SgprIndexUnusedMin = ++i; 1133 if (Ctx) { 1134 MCSymbol* const Sym = 1135 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1136 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1137 } 1138 } 1139 } 1140 1141 void usesVgprAt(int i) { 1142 if (i >= VgprIndexUnusedMin) { 1143 VgprIndexUnusedMin = ++i; 1144 if (Ctx) { 1145 MCSymbol* const Sym = 1146 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1147 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1148 } 1149 } 1150 } 1151 1152 public: 1153 KernelScopeInfo() = default; 1154 1155 void initialize(MCContext &Context) { 1156 Ctx = &Context; 1157 usesSgprAt(SgprIndexUnusedMin = -1); 1158 usesVgprAt(VgprIndexUnusedMin = -1); 1159 } 1160 1161 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1162 switch (RegKind) { 1163 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1164 case IS_AGPR: // fall through 1165 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1166 default: break; 1167 } 1168 } 1169 }; 1170 1171 class AMDGPUAsmParser : public MCTargetAsmParser { 1172 MCAsmParser &Parser; 1173 1174 // Number of extra operands parsed after the first optional operand. 1175 // This may be necessary to skip hardcoded mandatory operands. 1176 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1177 1178 unsigned ForcedEncodingSize = 0; 1179 bool ForcedDPP = false; 1180 bool ForcedSDWA = false; 1181 KernelScopeInfo KernelScope; 1182 unsigned CPolSeen; 1183 1184 /// @name Auto-generated Match Functions 1185 /// { 1186 1187 #define GET_ASSEMBLER_HEADER 1188 #include "AMDGPUGenAsmMatcher.inc" 1189 1190 /// } 1191 1192 private: 1193 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1194 bool OutOfRangeError(SMRange Range); 1195 /// Calculate VGPR/SGPR blocks required for given target, reserved 1196 /// registers, and user-specified NextFreeXGPR values. 1197 /// 1198 /// \param Features [in] Target features, used for bug corrections. 1199 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1200 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1201 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1202 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1203 /// descriptor field, if valid. 1204 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1205 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1206 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1207 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1208 /// \param VGPRBlocks [out] Result VGPR block count. 1209 /// \param SGPRBlocks [out] Result SGPR block count. 1210 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1211 bool FlatScrUsed, bool XNACKUsed, 1212 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1213 SMRange VGPRRange, unsigned NextFreeSGPR, 1214 SMRange SGPRRange, unsigned &VGPRBlocks, 1215 unsigned &SGPRBlocks); 1216 bool ParseDirectiveAMDGCNTarget(); 1217 bool ParseDirectiveAMDHSAKernel(); 1218 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1219 bool ParseDirectiveHSACodeObjectVersion(); 1220 bool ParseDirectiveHSACodeObjectISA(); 1221 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1222 bool ParseDirectiveAMDKernelCodeT(); 1223 // TODO: Possibly make subtargetHasRegister const. 1224 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1225 bool ParseDirectiveAMDGPUHsaKernel(); 1226 1227 bool ParseDirectiveISAVersion(); 1228 bool ParseDirectiveHSAMetadata(); 1229 bool ParseDirectivePALMetadataBegin(); 1230 bool ParseDirectivePALMetadata(); 1231 bool ParseDirectiveAMDGPULDS(); 1232 1233 /// Common code to parse out a block of text (typically YAML) between start and 1234 /// end directives. 1235 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1236 const char *AssemblerDirectiveEnd, 1237 std::string &CollectString); 1238 1239 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1240 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1241 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1242 unsigned &RegNum, unsigned &RegWidth, 1243 bool RestoreOnFailure = false); 1244 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1245 unsigned &RegNum, unsigned &RegWidth, 1246 SmallVectorImpl<AsmToken> &Tokens); 1247 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1248 unsigned &RegWidth, 1249 SmallVectorImpl<AsmToken> &Tokens); 1250 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1251 unsigned &RegWidth, 1252 SmallVectorImpl<AsmToken> &Tokens); 1253 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1254 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1255 bool ParseRegRange(unsigned& Num, unsigned& Width); 1256 unsigned getRegularReg(RegisterKind RegKind, 1257 unsigned RegNum, 1258 unsigned RegWidth, 1259 SMLoc Loc); 1260 1261 bool isRegister(); 1262 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1263 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1264 void initializeGprCountSymbol(RegisterKind RegKind); 1265 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1266 unsigned RegWidth); 1267 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1268 bool IsAtomic, bool IsLds = false); 1269 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1270 bool IsGdsHardcoded); 1271 1272 public: 1273 enum AMDGPUMatchResultTy { 1274 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1275 }; 1276 enum OperandMode { 1277 OperandMode_Default, 1278 OperandMode_NSA, 1279 }; 1280 1281 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1282 1283 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1284 const MCInstrInfo &MII, 1285 const MCTargetOptions &Options) 1286 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1287 MCAsmParserExtension::Initialize(Parser); 1288 1289 if (getFeatureBits().none()) { 1290 // Set default features. 1291 copySTI().ToggleFeature("southern-islands"); 1292 } 1293 1294 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1295 1296 { 1297 // TODO: make those pre-defined variables read-only. 1298 // Currently there is none suitable machinery in the core llvm-mc for this. 1299 // MCSymbol::isRedefinable is intended for another purpose, and 1300 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1301 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1302 MCContext &Ctx = getContext(); 1303 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } else { 1312 MCSymbol *Sym = 1313 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1315 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1316 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1317 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1318 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1319 } 1320 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1321 initializeGprCountSymbol(IS_VGPR); 1322 initializeGprCountSymbol(IS_SGPR); 1323 } else 1324 KernelScope.initialize(getContext()); 1325 } 1326 } 1327 1328 bool hasMIMG_R128() const { 1329 return AMDGPU::hasMIMG_R128(getSTI()); 1330 } 1331 1332 bool hasPackedD16() const { 1333 return AMDGPU::hasPackedD16(getSTI()); 1334 } 1335 1336 bool hasGFX10A16() const { 1337 return AMDGPU::hasGFX10A16(getSTI()); 1338 } 1339 1340 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1341 1342 bool isSI() const { 1343 return AMDGPU::isSI(getSTI()); 1344 } 1345 1346 bool isCI() const { 1347 return AMDGPU::isCI(getSTI()); 1348 } 1349 1350 bool isVI() const { 1351 return AMDGPU::isVI(getSTI()); 1352 } 1353 1354 bool isGFX9() const { 1355 return AMDGPU::isGFX9(getSTI()); 1356 } 1357 1358 bool isGFX90A() const { 1359 return AMDGPU::isGFX90A(getSTI()); 1360 } 1361 1362 bool isGFX9Plus() const { 1363 return AMDGPU::isGFX9Plus(getSTI()); 1364 } 1365 1366 bool isGFX10() const { 1367 return AMDGPU::isGFX10(getSTI()); 1368 } 1369 1370 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1371 1372 bool isGFX10_BEncoding() const { 1373 return AMDGPU::isGFX10_BEncoding(getSTI()); 1374 } 1375 1376 bool hasInv2PiInlineImm() const { 1377 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1378 } 1379 1380 bool hasFlatOffsets() const { 1381 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1382 } 1383 1384 bool hasArchitectedFlatScratch() const { 1385 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1386 } 1387 1388 bool hasSGPR102_SGPR103() const { 1389 return !isVI() && !isGFX9(); 1390 } 1391 1392 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1393 1394 bool hasIntClamp() const { 1395 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1396 } 1397 1398 AMDGPUTargetStreamer &getTargetStreamer() { 1399 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1400 return static_cast<AMDGPUTargetStreamer &>(TS); 1401 } 1402 1403 const MCRegisterInfo *getMRI() const { 1404 // We need this const_cast because for some reason getContext() is not const 1405 // in MCAsmParser. 1406 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1407 } 1408 1409 const MCInstrInfo *getMII() const { 1410 return &MII; 1411 } 1412 1413 const FeatureBitset &getFeatureBits() const { 1414 return getSTI().getFeatureBits(); 1415 } 1416 1417 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1418 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1419 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1420 1421 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1422 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1423 bool isForcedDPP() const { return ForcedDPP; } 1424 bool isForcedSDWA() const { return ForcedSDWA; } 1425 ArrayRef<unsigned> getMatchedVariants() const; 1426 StringRef getMatchedVariantName() const; 1427 1428 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1429 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1430 bool RestoreOnFailure); 1431 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1432 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1433 SMLoc &EndLoc) override; 1434 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1435 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1436 unsigned Kind) override; 1437 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1438 OperandVector &Operands, MCStreamer &Out, 1439 uint64_t &ErrorInfo, 1440 bool MatchingInlineAsm) override; 1441 bool ParseDirective(AsmToken DirectiveID) override; 1442 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1443 OperandMode Mode = OperandMode_Default); 1444 StringRef parseMnemonicSuffix(StringRef Name); 1445 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1446 SMLoc NameLoc, OperandVector &Operands) override; 1447 //bool ProcessInstruction(MCInst &Inst); 1448 1449 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1450 1451 OperandMatchResultTy 1452 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1453 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1454 bool (*ConvertResult)(int64_t &) = nullptr); 1455 1456 OperandMatchResultTy 1457 parseOperandArrayWithPrefix(const char *Prefix, 1458 OperandVector &Operands, 1459 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1460 bool (*ConvertResult)(int64_t&) = nullptr); 1461 1462 OperandMatchResultTy 1463 parseNamedBit(StringRef Name, OperandVector &Operands, 1464 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1465 OperandMatchResultTy parseCPol(OperandVector &Operands); 1466 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1467 StringRef &Value, 1468 SMLoc &StringLoc); 1469 1470 bool isModifier(); 1471 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1474 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1475 bool parseSP3NegModifier(); 1476 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1477 OperandMatchResultTy parseReg(OperandVector &Operands); 1478 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1479 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1480 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1481 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1482 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1483 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1484 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1485 OperandMatchResultTy parseUfmt(int64_t &Format); 1486 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1487 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1488 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1489 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1490 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1491 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1492 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1493 1494 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1495 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1496 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1497 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1498 1499 bool parseCnt(int64_t &IntVal); 1500 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1501 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1502 1503 private: 1504 struct OperandInfoTy { 1505 SMLoc Loc; 1506 int64_t Id; 1507 bool IsSymbolic = false; 1508 bool IsDefined = false; 1509 1510 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1511 }; 1512 1513 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1514 bool validateSendMsg(const OperandInfoTy &Msg, 1515 const OperandInfoTy &Op, 1516 const OperandInfoTy &Stream); 1517 1518 bool parseHwregBody(OperandInfoTy &HwReg, 1519 OperandInfoTy &Offset, 1520 OperandInfoTy &Width); 1521 bool validateHwreg(const OperandInfoTy &HwReg, 1522 const OperandInfoTy &Offset, 1523 const OperandInfoTy &Width); 1524 1525 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1526 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1527 1528 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1529 const OperandVector &Operands) const; 1530 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1531 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1532 SMLoc getLitLoc(const OperandVector &Operands) const; 1533 SMLoc getConstLoc(const OperandVector &Operands) const; 1534 1535 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1536 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1538 bool validateSOPLiteral(const MCInst &Inst) const; 1539 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateIntClampSupported(const MCInst &Inst); 1542 bool validateMIMGAtomicDMask(const MCInst &Inst); 1543 bool validateMIMGGatherDMask(const MCInst &Inst); 1544 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1545 bool validateMIMGDataSize(const MCInst &Inst); 1546 bool validateMIMGAddrSize(const MCInst &Inst); 1547 bool validateMIMGD16(const MCInst &Inst); 1548 bool validateMIMGDim(const MCInst &Inst); 1549 bool validateMIMGMSAA(const MCInst &Inst); 1550 bool validateOpSel(const MCInst &Inst); 1551 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1552 bool validateVccOperand(unsigned Reg) const; 1553 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1555 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1556 bool validateAGPRLdSt(const MCInst &Inst) const; 1557 bool validateVGPRAlign(const MCInst &Inst) const; 1558 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1559 bool validateDivScale(const MCInst &Inst); 1560 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1561 const SMLoc &IDLoc); 1562 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1563 unsigned getConstantBusLimit(unsigned Opcode) const; 1564 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1565 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1566 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1567 1568 bool isSupportedMnemo(StringRef Mnemo, 1569 const FeatureBitset &FBS); 1570 bool isSupportedMnemo(StringRef Mnemo, 1571 const FeatureBitset &FBS, 1572 ArrayRef<unsigned> Variants); 1573 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1574 1575 bool isId(const StringRef Id) const; 1576 bool isId(const AsmToken &Token, const StringRef Id) const; 1577 bool isToken(const AsmToken::TokenKind Kind) const; 1578 bool trySkipId(const StringRef Id); 1579 bool trySkipId(const StringRef Pref, const StringRef Id); 1580 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1581 bool trySkipToken(const AsmToken::TokenKind Kind); 1582 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1583 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1584 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1585 1586 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1587 AsmToken::TokenKind getTokenKind() const; 1588 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1589 bool parseExpr(OperandVector &Operands); 1590 StringRef getTokenStr() const; 1591 AsmToken peekToken(); 1592 AsmToken getToken() const; 1593 SMLoc getLoc() const; 1594 void lex(); 1595 1596 public: 1597 void onBeginOfFile() override; 1598 1599 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1600 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1601 1602 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1603 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1604 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1605 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1606 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1607 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1608 1609 bool parseSwizzleOperand(int64_t &Op, 1610 const unsigned MinVal, 1611 const unsigned MaxVal, 1612 const StringRef ErrMsg, 1613 SMLoc &Loc); 1614 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1615 const unsigned MinVal, 1616 const unsigned MaxVal, 1617 const StringRef ErrMsg); 1618 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1619 bool parseSwizzleOffset(int64_t &Imm); 1620 bool parseSwizzleMacro(int64_t &Imm); 1621 bool parseSwizzleQuadPerm(int64_t &Imm); 1622 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1623 bool parseSwizzleBroadcast(int64_t &Imm); 1624 bool parseSwizzleSwap(int64_t &Imm); 1625 bool parseSwizzleReverse(int64_t &Imm); 1626 1627 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1628 int64_t parseGPRIdxMacro(); 1629 1630 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1631 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1632 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1633 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1634 1635 AMDGPUOperand::Ptr defaultCPol() const; 1636 1637 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1638 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1639 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1640 AMDGPUOperand::Ptr defaultFlatOffset() const; 1641 1642 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1643 1644 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1645 OptionalImmIndexMap &OptionalIdx); 1646 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1647 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1648 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1649 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1650 OptionalImmIndexMap &OptionalIdx); 1651 1652 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1653 1654 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1655 bool IsAtomic = false); 1656 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1657 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1658 1659 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1660 1661 bool parseDimId(unsigned &Encoding); 1662 OperandMatchResultTy parseDim(OperandVector &Operands); 1663 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1664 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1665 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1666 int64_t parseDPPCtrlSel(StringRef Ctrl); 1667 int64_t parseDPPCtrlPerm(); 1668 AMDGPUOperand::Ptr defaultRowMask() const; 1669 AMDGPUOperand::Ptr defaultBankMask() const; 1670 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1671 AMDGPUOperand::Ptr defaultFI() const; 1672 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1673 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1674 1675 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1676 AMDGPUOperand::ImmTy Type); 1677 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1678 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1679 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1680 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1681 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1682 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1683 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1684 uint64_t BasicInstType, 1685 bool SkipDstVcc = false, 1686 bool SkipSrcVcc = false); 1687 1688 AMDGPUOperand::Ptr defaultBLGP() const; 1689 AMDGPUOperand::Ptr defaultCBSZ() const; 1690 AMDGPUOperand::Ptr defaultABID() const; 1691 1692 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1693 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1694 }; 1695 1696 struct OptionalOperand { 1697 const char *Name; 1698 AMDGPUOperand::ImmTy Type; 1699 bool IsBit; 1700 bool (*ConvertResult)(int64_t&); 1701 }; 1702 1703 } // end anonymous namespace 1704 1705 // May be called with integer type with equivalent bitwidth. 1706 static const fltSemantics *getFltSemantics(unsigned Size) { 1707 switch (Size) { 1708 case 4: 1709 return &APFloat::IEEEsingle(); 1710 case 8: 1711 return &APFloat::IEEEdouble(); 1712 case 2: 1713 return &APFloat::IEEEhalf(); 1714 default: 1715 llvm_unreachable("unsupported fp type"); 1716 } 1717 } 1718 1719 static const fltSemantics *getFltSemantics(MVT VT) { 1720 return getFltSemantics(VT.getSizeInBits() / 8); 1721 } 1722 1723 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1724 switch (OperandType) { 1725 case AMDGPU::OPERAND_REG_IMM_INT32: 1726 case AMDGPU::OPERAND_REG_IMM_FP32: 1727 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1728 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1729 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1733 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1734 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1735 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1736 case AMDGPU::OPERAND_KIMM32: 1737 return &APFloat::IEEEsingle(); 1738 case AMDGPU::OPERAND_REG_IMM_INT64: 1739 case AMDGPU::OPERAND_REG_IMM_FP64: 1740 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1741 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1743 return &APFloat::IEEEdouble(); 1744 case AMDGPU::OPERAND_REG_IMM_INT16: 1745 case AMDGPU::OPERAND_REG_IMM_FP16: 1746 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1747 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1748 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1751 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1752 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1753 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1754 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1755 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1756 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1757 case AMDGPU::OPERAND_KIMM16: 1758 return &APFloat::IEEEhalf(); 1759 default: 1760 llvm_unreachable("unsupported fp type"); 1761 } 1762 } 1763 1764 //===----------------------------------------------------------------------===// 1765 // Operand 1766 //===----------------------------------------------------------------------===// 1767 1768 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1769 bool Lost; 1770 1771 // Convert literal to single precision 1772 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1773 APFloat::rmNearestTiesToEven, 1774 &Lost); 1775 // We allow precision lost but not overflow or underflow 1776 if (Status != APFloat::opOK && 1777 Lost && 1778 ((Status & APFloat::opOverflow) != 0 || 1779 (Status & APFloat::opUnderflow) != 0)) { 1780 return false; 1781 } 1782 1783 return true; 1784 } 1785 1786 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1787 return isUIntN(Size, Val) || isIntN(Size, Val); 1788 } 1789 1790 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1791 if (VT.getScalarType() == MVT::i16) { 1792 // FP immediate values are broken. 1793 return isInlinableIntLiteral(Val); 1794 } 1795 1796 // f16/v2f16 operands work correctly for all values. 1797 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1798 } 1799 1800 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1801 1802 // This is a hack to enable named inline values like 1803 // shared_base with both 32-bit and 64-bit operands. 1804 // Note that these values are defined as 1805 // 32-bit operands only. 1806 if (isInlineValue()) { 1807 return true; 1808 } 1809 1810 if (!isImmTy(ImmTyNone)) { 1811 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1812 return false; 1813 } 1814 // TODO: We should avoid using host float here. It would be better to 1815 // check the float bit values which is what a few other places do. 1816 // We've had bot failures before due to weird NaN support on mips hosts. 1817 1818 APInt Literal(64, Imm.Val); 1819 1820 if (Imm.IsFPImm) { // We got fp literal token 1821 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1822 return AMDGPU::isInlinableLiteral64(Imm.Val, 1823 AsmParser->hasInv2PiInlineImm()); 1824 } 1825 1826 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1827 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1828 return false; 1829 1830 if (type.getScalarSizeInBits() == 16) { 1831 return isInlineableLiteralOp16( 1832 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1833 type, AsmParser->hasInv2PiInlineImm()); 1834 } 1835 1836 // Check if single precision literal is inlinable 1837 return AMDGPU::isInlinableLiteral32( 1838 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1839 AsmParser->hasInv2PiInlineImm()); 1840 } 1841 1842 // We got int literal token. 1843 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1844 return AMDGPU::isInlinableLiteral64(Imm.Val, 1845 AsmParser->hasInv2PiInlineImm()); 1846 } 1847 1848 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1849 return false; 1850 } 1851 1852 if (type.getScalarSizeInBits() == 16) { 1853 return isInlineableLiteralOp16( 1854 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1855 type, AsmParser->hasInv2PiInlineImm()); 1856 } 1857 1858 return AMDGPU::isInlinableLiteral32( 1859 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1860 AsmParser->hasInv2PiInlineImm()); 1861 } 1862 1863 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1864 // Check that this immediate can be added as literal 1865 if (!isImmTy(ImmTyNone)) { 1866 return false; 1867 } 1868 1869 if (!Imm.IsFPImm) { 1870 // We got int literal token. 1871 1872 if (type == MVT::f64 && hasFPModifiers()) { 1873 // Cannot apply fp modifiers to int literals preserving the same semantics 1874 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1875 // disable these cases. 1876 return false; 1877 } 1878 1879 unsigned Size = type.getSizeInBits(); 1880 if (Size == 64) 1881 Size = 32; 1882 1883 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1884 // types. 1885 return isSafeTruncation(Imm.Val, Size); 1886 } 1887 1888 // We got fp literal token 1889 if (type == MVT::f64) { // Expected 64-bit fp operand 1890 // We would set low 64-bits of literal to zeroes but we accept this literals 1891 return true; 1892 } 1893 1894 if (type == MVT::i64) { // Expected 64-bit int operand 1895 // We don't allow fp literals in 64-bit integer instructions. It is 1896 // unclear how we should encode them. 1897 return false; 1898 } 1899 1900 // We allow fp literals with f16x2 operands assuming that the specified 1901 // literal goes into the lower half and the upper half is zero. We also 1902 // require that the literal may be losslesly converted to f16. 1903 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1904 (type == MVT::v2i16)? MVT::i16 : 1905 (type == MVT::v2f32)? MVT::f32 : type; 1906 1907 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1908 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1909 } 1910 1911 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1912 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1913 } 1914 1915 bool AMDGPUOperand::isVRegWithInputMods() const { 1916 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1917 // GFX90A allows DPP on 64-bit operands. 1918 (isRegClass(AMDGPU::VReg_64RegClassID) && 1919 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1923 if (AsmParser->isVI()) 1924 return isVReg32(); 1925 else if (AsmParser->isGFX9Plus()) 1926 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1927 else 1928 return false; 1929 } 1930 1931 bool AMDGPUOperand::isSDWAFP16Operand() const { 1932 return isSDWAOperand(MVT::f16); 1933 } 1934 1935 bool AMDGPUOperand::isSDWAFP32Operand() const { 1936 return isSDWAOperand(MVT::f32); 1937 } 1938 1939 bool AMDGPUOperand::isSDWAInt16Operand() const { 1940 return isSDWAOperand(MVT::i16); 1941 } 1942 1943 bool AMDGPUOperand::isSDWAInt32Operand() const { 1944 return isSDWAOperand(MVT::i32); 1945 } 1946 1947 bool AMDGPUOperand::isBoolReg() const { 1948 auto FB = AsmParser->getFeatureBits(); 1949 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1950 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1951 } 1952 1953 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1954 { 1955 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1956 assert(Size == 2 || Size == 4 || Size == 8); 1957 1958 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1959 1960 if (Imm.Mods.Abs) { 1961 Val &= ~FpSignMask; 1962 } 1963 if (Imm.Mods.Neg) { 1964 Val ^= FpSignMask; 1965 } 1966 1967 return Val; 1968 } 1969 1970 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1971 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1972 Inst.getNumOperands())) { 1973 addLiteralImmOperand(Inst, Imm.Val, 1974 ApplyModifiers & 1975 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1976 } else { 1977 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1978 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1979 setImmKindNone(); 1980 } 1981 } 1982 1983 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1984 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1985 auto OpNum = Inst.getNumOperands(); 1986 // Check that this operand accepts literals 1987 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1988 1989 if (ApplyModifiers) { 1990 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1991 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1992 Val = applyInputFPModifiers(Val, Size); 1993 } 1994 1995 APInt Literal(64, Val); 1996 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1997 1998 if (Imm.IsFPImm) { // We got fp literal token 1999 switch (OpTy) { 2000 case AMDGPU::OPERAND_REG_IMM_INT64: 2001 case AMDGPU::OPERAND_REG_IMM_FP64: 2002 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2003 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2004 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2005 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2006 AsmParser->hasInv2PiInlineImm())) { 2007 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2008 setImmKindConst(); 2009 return; 2010 } 2011 2012 // Non-inlineable 2013 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2014 // For fp operands we check if low 32 bits are zeros 2015 if (Literal.getLoBits(32) != 0) { 2016 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2017 "Can't encode literal as exact 64-bit floating-point operand. " 2018 "Low 32-bits will be set to zero"); 2019 } 2020 2021 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2022 setImmKindLiteral(); 2023 return; 2024 } 2025 2026 // We don't allow fp literals in 64-bit integer instructions. It is 2027 // unclear how we should encode them. This case should be checked earlier 2028 // in predicate methods (isLiteralImm()) 2029 llvm_unreachable("fp literal in 64-bit integer instruction."); 2030 2031 case AMDGPU::OPERAND_REG_IMM_INT32: 2032 case AMDGPU::OPERAND_REG_IMM_FP32: 2033 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2034 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2035 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2036 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2037 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2038 case AMDGPU::OPERAND_REG_IMM_INT16: 2039 case AMDGPU::OPERAND_REG_IMM_FP16: 2040 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2041 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2042 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2043 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2044 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2045 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2046 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2047 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2048 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2049 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2050 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2051 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2052 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2053 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2054 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2055 case AMDGPU::OPERAND_KIMM32: 2056 case AMDGPU::OPERAND_KIMM16: { 2057 bool lost; 2058 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2059 // Convert literal to single precision 2060 FPLiteral.convert(*getOpFltSemantics(OpTy), 2061 APFloat::rmNearestTiesToEven, &lost); 2062 // We allow precision lost but not overflow or underflow. This should be 2063 // checked earlier in isLiteralImm() 2064 2065 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2066 Inst.addOperand(MCOperand::createImm(ImmVal)); 2067 setImmKindLiteral(); 2068 return; 2069 } 2070 default: 2071 llvm_unreachable("invalid operand size"); 2072 } 2073 2074 return; 2075 } 2076 2077 // We got int literal token. 2078 // Only sign extend inline immediates. 2079 switch (OpTy) { 2080 case AMDGPU::OPERAND_REG_IMM_INT32: 2081 case AMDGPU::OPERAND_REG_IMM_FP32: 2082 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2083 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2084 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2086 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2087 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2089 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2090 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2091 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2092 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2093 if (isSafeTruncation(Val, 32) && 2094 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2095 AsmParser->hasInv2PiInlineImm())) { 2096 Inst.addOperand(MCOperand::createImm(Val)); 2097 setImmKindConst(); 2098 return; 2099 } 2100 2101 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2102 setImmKindLiteral(); 2103 return; 2104 2105 case AMDGPU::OPERAND_REG_IMM_INT64: 2106 case AMDGPU::OPERAND_REG_IMM_FP64: 2107 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2108 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2109 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2110 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2111 Inst.addOperand(MCOperand::createImm(Val)); 2112 setImmKindConst(); 2113 return; 2114 } 2115 2116 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2117 setImmKindLiteral(); 2118 return; 2119 2120 case AMDGPU::OPERAND_REG_IMM_INT16: 2121 case AMDGPU::OPERAND_REG_IMM_FP16: 2122 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2123 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2124 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2126 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2127 if (isSafeTruncation(Val, 16) && 2128 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2129 AsmParser->hasInv2PiInlineImm())) { 2130 Inst.addOperand(MCOperand::createImm(Val)); 2131 setImmKindConst(); 2132 return; 2133 } 2134 2135 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2136 setImmKindLiteral(); 2137 return; 2138 2139 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2140 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2141 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2142 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2143 assert(isSafeTruncation(Val, 16)); 2144 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2145 AsmParser->hasInv2PiInlineImm())); 2146 2147 Inst.addOperand(MCOperand::createImm(Val)); 2148 return; 2149 } 2150 case AMDGPU::OPERAND_KIMM32: 2151 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2152 setImmKindNone(); 2153 return; 2154 case AMDGPU::OPERAND_KIMM16: 2155 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2156 setImmKindNone(); 2157 return; 2158 default: 2159 llvm_unreachable("invalid operand size"); 2160 } 2161 } 2162 2163 template <unsigned Bitwidth> 2164 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2165 APInt Literal(64, Imm.Val); 2166 setImmKindNone(); 2167 2168 if (!Imm.IsFPImm) { 2169 // We got int literal token. 2170 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2171 return; 2172 } 2173 2174 bool Lost; 2175 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2176 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2177 APFloat::rmNearestTiesToEven, &Lost); 2178 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2179 } 2180 2181 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2182 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2183 } 2184 2185 static bool isInlineValue(unsigned Reg) { 2186 switch (Reg) { 2187 case AMDGPU::SRC_SHARED_BASE: 2188 case AMDGPU::SRC_SHARED_LIMIT: 2189 case AMDGPU::SRC_PRIVATE_BASE: 2190 case AMDGPU::SRC_PRIVATE_LIMIT: 2191 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2192 return true; 2193 case AMDGPU::SRC_VCCZ: 2194 case AMDGPU::SRC_EXECZ: 2195 case AMDGPU::SRC_SCC: 2196 return true; 2197 case AMDGPU::SGPR_NULL: 2198 return true; 2199 default: 2200 return false; 2201 } 2202 } 2203 2204 bool AMDGPUOperand::isInlineValue() const { 2205 return isRegKind() && ::isInlineValue(getReg()); 2206 } 2207 2208 //===----------------------------------------------------------------------===// 2209 // AsmParser 2210 //===----------------------------------------------------------------------===// 2211 2212 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2213 if (Is == IS_VGPR) { 2214 switch (RegWidth) { 2215 default: return -1; 2216 case 1: return AMDGPU::VGPR_32RegClassID; 2217 case 2: return AMDGPU::VReg_64RegClassID; 2218 case 3: return AMDGPU::VReg_96RegClassID; 2219 case 4: return AMDGPU::VReg_128RegClassID; 2220 case 5: return AMDGPU::VReg_160RegClassID; 2221 case 6: return AMDGPU::VReg_192RegClassID; 2222 case 7: return AMDGPU::VReg_224RegClassID; 2223 case 8: return AMDGPU::VReg_256RegClassID; 2224 case 16: return AMDGPU::VReg_512RegClassID; 2225 case 32: return AMDGPU::VReg_1024RegClassID; 2226 } 2227 } else if (Is == IS_TTMP) { 2228 switch (RegWidth) { 2229 default: return -1; 2230 case 1: return AMDGPU::TTMP_32RegClassID; 2231 case 2: return AMDGPU::TTMP_64RegClassID; 2232 case 4: return AMDGPU::TTMP_128RegClassID; 2233 case 8: return AMDGPU::TTMP_256RegClassID; 2234 case 16: return AMDGPU::TTMP_512RegClassID; 2235 } 2236 } else if (Is == IS_SGPR) { 2237 switch (RegWidth) { 2238 default: return -1; 2239 case 1: return AMDGPU::SGPR_32RegClassID; 2240 case 2: return AMDGPU::SGPR_64RegClassID; 2241 case 3: return AMDGPU::SGPR_96RegClassID; 2242 case 4: return AMDGPU::SGPR_128RegClassID; 2243 case 5: return AMDGPU::SGPR_160RegClassID; 2244 case 6: return AMDGPU::SGPR_192RegClassID; 2245 case 7: return AMDGPU::SGPR_224RegClassID; 2246 case 8: return AMDGPU::SGPR_256RegClassID; 2247 case 16: return AMDGPU::SGPR_512RegClassID; 2248 } 2249 } else if (Is == IS_AGPR) { 2250 switch (RegWidth) { 2251 default: return -1; 2252 case 1: return AMDGPU::AGPR_32RegClassID; 2253 case 2: return AMDGPU::AReg_64RegClassID; 2254 case 3: return AMDGPU::AReg_96RegClassID; 2255 case 4: return AMDGPU::AReg_128RegClassID; 2256 case 5: return AMDGPU::AReg_160RegClassID; 2257 case 6: return AMDGPU::AReg_192RegClassID; 2258 case 7: return AMDGPU::AReg_224RegClassID; 2259 case 8: return AMDGPU::AReg_256RegClassID; 2260 case 16: return AMDGPU::AReg_512RegClassID; 2261 case 32: return AMDGPU::AReg_1024RegClassID; 2262 } 2263 } 2264 return -1; 2265 } 2266 2267 static unsigned getSpecialRegForName(StringRef RegName) { 2268 return StringSwitch<unsigned>(RegName) 2269 .Case("exec", AMDGPU::EXEC) 2270 .Case("vcc", AMDGPU::VCC) 2271 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2272 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2273 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2274 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2275 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2276 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2277 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2278 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2279 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2280 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2281 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2282 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2283 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2284 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2285 .Case("m0", AMDGPU::M0) 2286 .Case("vccz", AMDGPU::SRC_VCCZ) 2287 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2288 .Case("execz", AMDGPU::SRC_EXECZ) 2289 .Case("src_execz", AMDGPU::SRC_EXECZ) 2290 .Case("scc", AMDGPU::SRC_SCC) 2291 .Case("src_scc", AMDGPU::SRC_SCC) 2292 .Case("tba", AMDGPU::TBA) 2293 .Case("tma", AMDGPU::TMA) 2294 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2295 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2296 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2297 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2298 .Case("vcc_lo", AMDGPU::VCC_LO) 2299 .Case("vcc_hi", AMDGPU::VCC_HI) 2300 .Case("exec_lo", AMDGPU::EXEC_LO) 2301 .Case("exec_hi", AMDGPU::EXEC_HI) 2302 .Case("tma_lo", AMDGPU::TMA_LO) 2303 .Case("tma_hi", AMDGPU::TMA_HI) 2304 .Case("tba_lo", AMDGPU::TBA_LO) 2305 .Case("tba_hi", AMDGPU::TBA_HI) 2306 .Case("pc", AMDGPU::PC_REG) 2307 .Case("null", AMDGPU::SGPR_NULL) 2308 .Default(AMDGPU::NoRegister); 2309 } 2310 2311 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2312 SMLoc &EndLoc, bool RestoreOnFailure) { 2313 auto R = parseRegister(); 2314 if (!R) return true; 2315 assert(R->isReg()); 2316 RegNo = R->getReg(); 2317 StartLoc = R->getStartLoc(); 2318 EndLoc = R->getEndLoc(); 2319 return false; 2320 } 2321 2322 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2323 SMLoc &EndLoc) { 2324 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2325 } 2326 2327 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2328 SMLoc &StartLoc, 2329 SMLoc &EndLoc) { 2330 bool Result = 2331 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2332 bool PendingErrors = getParser().hasPendingError(); 2333 getParser().clearPendingErrors(); 2334 if (PendingErrors) 2335 return MatchOperand_ParseFail; 2336 if (Result) 2337 return MatchOperand_NoMatch; 2338 return MatchOperand_Success; 2339 } 2340 2341 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2342 RegisterKind RegKind, unsigned Reg1, 2343 SMLoc Loc) { 2344 switch (RegKind) { 2345 case IS_SPECIAL: 2346 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2347 Reg = AMDGPU::EXEC; 2348 RegWidth = 2; 2349 return true; 2350 } 2351 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2352 Reg = AMDGPU::FLAT_SCR; 2353 RegWidth = 2; 2354 return true; 2355 } 2356 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2357 Reg = AMDGPU::XNACK_MASK; 2358 RegWidth = 2; 2359 return true; 2360 } 2361 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2362 Reg = AMDGPU::VCC; 2363 RegWidth = 2; 2364 return true; 2365 } 2366 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2367 Reg = AMDGPU::TBA; 2368 RegWidth = 2; 2369 return true; 2370 } 2371 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2372 Reg = AMDGPU::TMA; 2373 RegWidth = 2; 2374 return true; 2375 } 2376 Error(Loc, "register does not fit in the list"); 2377 return false; 2378 case IS_VGPR: 2379 case IS_SGPR: 2380 case IS_AGPR: 2381 case IS_TTMP: 2382 if (Reg1 != Reg + RegWidth) { 2383 Error(Loc, "registers in a list must have consecutive indices"); 2384 return false; 2385 } 2386 RegWidth++; 2387 return true; 2388 default: 2389 llvm_unreachable("unexpected register kind"); 2390 } 2391 } 2392 2393 struct RegInfo { 2394 StringLiteral Name; 2395 RegisterKind Kind; 2396 }; 2397 2398 static constexpr RegInfo RegularRegisters[] = { 2399 {{"v"}, IS_VGPR}, 2400 {{"s"}, IS_SGPR}, 2401 {{"ttmp"}, IS_TTMP}, 2402 {{"acc"}, IS_AGPR}, 2403 {{"a"}, IS_AGPR}, 2404 }; 2405 2406 static bool isRegularReg(RegisterKind Kind) { 2407 return Kind == IS_VGPR || 2408 Kind == IS_SGPR || 2409 Kind == IS_TTMP || 2410 Kind == IS_AGPR; 2411 } 2412 2413 static const RegInfo* getRegularRegInfo(StringRef Str) { 2414 for (const RegInfo &Reg : RegularRegisters) 2415 if (Str.startswith(Reg.Name)) 2416 return &Reg; 2417 return nullptr; 2418 } 2419 2420 static bool getRegNum(StringRef Str, unsigned& Num) { 2421 return !Str.getAsInteger(10, Num); 2422 } 2423 2424 bool 2425 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2426 const AsmToken &NextToken) const { 2427 2428 // A list of consecutive registers: [s0,s1,s2,s3] 2429 if (Token.is(AsmToken::LBrac)) 2430 return true; 2431 2432 if (!Token.is(AsmToken::Identifier)) 2433 return false; 2434 2435 // A single register like s0 or a range of registers like s[0:1] 2436 2437 StringRef Str = Token.getString(); 2438 const RegInfo *Reg = getRegularRegInfo(Str); 2439 if (Reg) { 2440 StringRef RegName = Reg->Name; 2441 StringRef RegSuffix = Str.substr(RegName.size()); 2442 if (!RegSuffix.empty()) { 2443 unsigned Num; 2444 // A single register with an index: rXX 2445 if (getRegNum(RegSuffix, Num)) 2446 return true; 2447 } else { 2448 // A range of registers: r[XX:YY]. 2449 if (NextToken.is(AsmToken::LBrac)) 2450 return true; 2451 } 2452 } 2453 2454 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2455 } 2456 2457 bool 2458 AMDGPUAsmParser::isRegister() 2459 { 2460 return isRegister(getToken(), peekToken()); 2461 } 2462 2463 unsigned 2464 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2465 unsigned RegNum, 2466 unsigned RegWidth, 2467 SMLoc Loc) { 2468 2469 assert(isRegularReg(RegKind)); 2470 2471 unsigned AlignSize = 1; 2472 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2473 // SGPR and TTMP registers must be aligned. 2474 // Max required alignment is 4 dwords. 2475 AlignSize = std::min(RegWidth, 4u); 2476 } 2477 2478 if (RegNum % AlignSize != 0) { 2479 Error(Loc, "invalid register alignment"); 2480 return AMDGPU::NoRegister; 2481 } 2482 2483 unsigned RegIdx = RegNum / AlignSize; 2484 int RCID = getRegClass(RegKind, RegWidth); 2485 if (RCID == -1) { 2486 Error(Loc, "invalid or unsupported register size"); 2487 return AMDGPU::NoRegister; 2488 } 2489 2490 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2491 const MCRegisterClass RC = TRI->getRegClass(RCID); 2492 if (RegIdx >= RC.getNumRegs()) { 2493 Error(Loc, "register index is out of range"); 2494 return AMDGPU::NoRegister; 2495 } 2496 2497 return RC.getRegister(RegIdx); 2498 } 2499 2500 bool 2501 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2502 int64_t RegLo, RegHi; 2503 if (!skipToken(AsmToken::LBrac, "missing register index")) 2504 return false; 2505 2506 SMLoc FirstIdxLoc = getLoc(); 2507 SMLoc SecondIdxLoc; 2508 2509 if (!parseExpr(RegLo)) 2510 return false; 2511 2512 if (trySkipToken(AsmToken::Colon)) { 2513 SecondIdxLoc = getLoc(); 2514 if (!parseExpr(RegHi)) 2515 return false; 2516 } else { 2517 RegHi = RegLo; 2518 } 2519 2520 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2521 return false; 2522 2523 if (!isUInt<32>(RegLo)) { 2524 Error(FirstIdxLoc, "invalid register index"); 2525 return false; 2526 } 2527 2528 if (!isUInt<32>(RegHi)) { 2529 Error(SecondIdxLoc, "invalid register index"); 2530 return false; 2531 } 2532 2533 if (RegLo > RegHi) { 2534 Error(FirstIdxLoc, "first register index should not exceed second index"); 2535 return false; 2536 } 2537 2538 Num = static_cast<unsigned>(RegLo); 2539 Width = (RegHi - RegLo) + 1; 2540 return true; 2541 } 2542 2543 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2544 unsigned &RegNum, unsigned &RegWidth, 2545 SmallVectorImpl<AsmToken> &Tokens) { 2546 assert(isToken(AsmToken::Identifier)); 2547 unsigned Reg = getSpecialRegForName(getTokenStr()); 2548 if (Reg) { 2549 RegNum = 0; 2550 RegWidth = 1; 2551 RegKind = IS_SPECIAL; 2552 Tokens.push_back(getToken()); 2553 lex(); // skip register name 2554 } 2555 return Reg; 2556 } 2557 2558 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2559 unsigned &RegNum, unsigned &RegWidth, 2560 SmallVectorImpl<AsmToken> &Tokens) { 2561 assert(isToken(AsmToken::Identifier)); 2562 StringRef RegName = getTokenStr(); 2563 auto Loc = getLoc(); 2564 2565 const RegInfo *RI = getRegularRegInfo(RegName); 2566 if (!RI) { 2567 Error(Loc, "invalid register name"); 2568 return AMDGPU::NoRegister; 2569 } 2570 2571 Tokens.push_back(getToken()); 2572 lex(); // skip register name 2573 2574 RegKind = RI->Kind; 2575 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2576 if (!RegSuffix.empty()) { 2577 // Single 32-bit register: vXX. 2578 if (!getRegNum(RegSuffix, RegNum)) { 2579 Error(Loc, "invalid register index"); 2580 return AMDGPU::NoRegister; 2581 } 2582 RegWidth = 1; 2583 } else { 2584 // Range of registers: v[XX:YY]. ":YY" is optional. 2585 if (!ParseRegRange(RegNum, RegWidth)) 2586 return AMDGPU::NoRegister; 2587 } 2588 2589 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2590 } 2591 2592 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2593 unsigned &RegWidth, 2594 SmallVectorImpl<AsmToken> &Tokens) { 2595 unsigned Reg = AMDGPU::NoRegister; 2596 auto ListLoc = getLoc(); 2597 2598 if (!skipToken(AsmToken::LBrac, 2599 "expected a register or a list of registers")) { 2600 return AMDGPU::NoRegister; 2601 } 2602 2603 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2604 2605 auto Loc = getLoc(); 2606 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2607 return AMDGPU::NoRegister; 2608 if (RegWidth != 1) { 2609 Error(Loc, "expected a single 32-bit register"); 2610 return AMDGPU::NoRegister; 2611 } 2612 2613 for (; trySkipToken(AsmToken::Comma); ) { 2614 RegisterKind NextRegKind; 2615 unsigned NextReg, NextRegNum, NextRegWidth; 2616 Loc = getLoc(); 2617 2618 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2619 NextRegNum, NextRegWidth, 2620 Tokens)) { 2621 return AMDGPU::NoRegister; 2622 } 2623 if (NextRegWidth != 1) { 2624 Error(Loc, "expected a single 32-bit register"); 2625 return AMDGPU::NoRegister; 2626 } 2627 if (NextRegKind != RegKind) { 2628 Error(Loc, "registers in a list must be of the same kind"); 2629 return AMDGPU::NoRegister; 2630 } 2631 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2632 return AMDGPU::NoRegister; 2633 } 2634 2635 if (!skipToken(AsmToken::RBrac, 2636 "expected a comma or a closing square bracket")) { 2637 return AMDGPU::NoRegister; 2638 } 2639 2640 if (isRegularReg(RegKind)) 2641 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2642 2643 return Reg; 2644 } 2645 2646 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2647 unsigned &RegNum, unsigned &RegWidth, 2648 SmallVectorImpl<AsmToken> &Tokens) { 2649 auto Loc = getLoc(); 2650 Reg = AMDGPU::NoRegister; 2651 2652 if (isToken(AsmToken::Identifier)) { 2653 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2654 if (Reg == AMDGPU::NoRegister) 2655 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2656 } else { 2657 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2658 } 2659 2660 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2661 if (Reg == AMDGPU::NoRegister) { 2662 assert(Parser.hasPendingError()); 2663 return false; 2664 } 2665 2666 if (!subtargetHasRegister(*TRI, Reg)) { 2667 if (Reg == AMDGPU::SGPR_NULL) { 2668 Error(Loc, "'null' operand is not supported on this GPU"); 2669 } else { 2670 Error(Loc, "register not available on this GPU"); 2671 } 2672 return false; 2673 } 2674 2675 return true; 2676 } 2677 2678 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2679 unsigned &RegNum, unsigned &RegWidth, 2680 bool RestoreOnFailure /*=false*/) { 2681 Reg = AMDGPU::NoRegister; 2682 2683 SmallVector<AsmToken, 1> Tokens; 2684 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2685 if (RestoreOnFailure) { 2686 while (!Tokens.empty()) { 2687 getLexer().UnLex(Tokens.pop_back_val()); 2688 } 2689 } 2690 return true; 2691 } 2692 return false; 2693 } 2694 2695 Optional<StringRef> 2696 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2697 switch (RegKind) { 2698 case IS_VGPR: 2699 return StringRef(".amdgcn.next_free_vgpr"); 2700 case IS_SGPR: 2701 return StringRef(".amdgcn.next_free_sgpr"); 2702 default: 2703 return None; 2704 } 2705 } 2706 2707 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2708 auto SymbolName = getGprCountSymbolName(RegKind); 2709 assert(SymbolName && "initializing invalid register kind"); 2710 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2711 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2712 } 2713 2714 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2715 unsigned DwordRegIndex, 2716 unsigned RegWidth) { 2717 // Symbols are only defined for GCN targets 2718 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2719 return true; 2720 2721 auto SymbolName = getGprCountSymbolName(RegKind); 2722 if (!SymbolName) 2723 return true; 2724 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2725 2726 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2727 int64_t OldCount; 2728 2729 if (!Sym->isVariable()) 2730 return !Error(getLoc(), 2731 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2732 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2733 return !Error( 2734 getLoc(), 2735 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2736 2737 if (OldCount <= NewMax) 2738 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2739 2740 return true; 2741 } 2742 2743 std::unique_ptr<AMDGPUOperand> 2744 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2745 const auto &Tok = getToken(); 2746 SMLoc StartLoc = Tok.getLoc(); 2747 SMLoc EndLoc = Tok.getEndLoc(); 2748 RegisterKind RegKind; 2749 unsigned Reg, RegNum, RegWidth; 2750 2751 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2752 return nullptr; 2753 } 2754 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2755 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2756 return nullptr; 2757 } else 2758 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2759 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2760 } 2761 2762 OperandMatchResultTy 2763 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2764 // TODO: add syntactic sugar for 1/(2*PI) 2765 2766 assert(!isRegister()); 2767 assert(!isModifier()); 2768 2769 const auto& Tok = getToken(); 2770 const auto& NextTok = peekToken(); 2771 bool IsReal = Tok.is(AsmToken::Real); 2772 SMLoc S = getLoc(); 2773 bool Negate = false; 2774 2775 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2776 lex(); 2777 IsReal = true; 2778 Negate = true; 2779 } 2780 2781 if (IsReal) { 2782 // Floating-point expressions are not supported. 2783 // Can only allow floating-point literals with an 2784 // optional sign. 2785 2786 StringRef Num = getTokenStr(); 2787 lex(); 2788 2789 APFloat RealVal(APFloat::IEEEdouble()); 2790 auto roundMode = APFloat::rmNearestTiesToEven; 2791 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2792 return MatchOperand_ParseFail; 2793 } 2794 if (Negate) 2795 RealVal.changeSign(); 2796 2797 Operands.push_back( 2798 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2799 AMDGPUOperand::ImmTyNone, true)); 2800 2801 return MatchOperand_Success; 2802 2803 } else { 2804 int64_t IntVal; 2805 const MCExpr *Expr; 2806 SMLoc S = getLoc(); 2807 2808 if (HasSP3AbsModifier) { 2809 // This is a workaround for handling expressions 2810 // as arguments of SP3 'abs' modifier, for example: 2811 // |1.0| 2812 // |-1| 2813 // |1+x| 2814 // This syntax is not compatible with syntax of standard 2815 // MC expressions (due to the trailing '|'). 2816 SMLoc EndLoc; 2817 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2818 return MatchOperand_ParseFail; 2819 } else { 2820 if (Parser.parseExpression(Expr)) 2821 return MatchOperand_ParseFail; 2822 } 2823 2824 if (Expr->evaluateAsAbsolute(IntVal)) { 2825 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2826 } else { 2827 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2828 } 2829 2830 return MatchOperand_Success; 2831 } 2832 2833 return MatchOperand_NoMatch; 2834 } 2835 2836 OperandMatchResultTy 2837 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2838 if (!isRegister()) 2839 return MatchOperand_NoMatch; 2840 2841 if (auto R = parseRegister()) { 2842 assert(R->isReg()); 2843 Operands.push_back(std::move(R)); 2844 return MatchOperand_Success; 2845 } 2846 return MatchOperand_ParseFail; 2847 } 2848 2849 OperandMatchResultTy 2850 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2851 auto res = parseReg(Operands); 2852 if (res != MatchOperand_NoMatch) { 2853 return res; 2854 } else if (isModifier()) { 2855 return MatchOperand_NoMatch; 2856 } else { 2857 return parseImm(Operands, HasSP3AbsMod); 2858 } 2859 } 2860 2861 bool 2862 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2863 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2864 const auto &str = Token.getString(); 2865 return str == "abs" || str == "neg" || str == "sext"; 2866 } 2867 return false; 2868 } 2869 2870 bool 2871 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2872 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2873 } 2874 2875 bool 2876 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2877 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2878 } 2879 2880 bool 2881 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2882 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2883 } 2884 2885 // Check if this is an operand modifier or an opcode modifier 2886 // which may look like an expression but it is not. We should 2887 // avoid parsing these modifiers as expressions. Currently 2888 // recognized sequences are: 2889 // |...| 2890 // abs(...) 2891 // neg(...) 2892 // sext(...) 2893 // -reg 2894 // -|...| 2895 // -abs(...) 2896 // name:... 2897 // Note that simple opcode modifiers like 'gds' may be parsed as 2898 // expressions; this is a special case. See getExpressionAsToken. 2899 // 2900 bool 2901 AMDGPUAsmParser::isModifier() { 2902 2903 AsmToken Tok = getToken(); 2904 AsmToken NextToken[2]; 2905 peekTokens(NextToken); 2906 2907 return isOperandModifier(Tok, NextToken[0]) || 2908 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2909 isOpcodeModifierWithVal(Tok, NextToken[0]); 2910 } 2911 2912 // Check if the current token is an SP3 'neg' modifier. 2913 // Currently this modifier is allowed in the following context: 2914 // 2915 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2916 // 2. Before an 'abs' modifier: -abs(...) 2917 // 3. Before an SP3 'abs' modifier: -|...| 2918 // 2919 // In all other cases "-" is handled as a part 2920 // of an expression that follows the sign. 2921 // 2922 // Note: When "-" is followed by an integer literal, 2923 // this is interpreted as integer negation rather 2924 // than a floating-point NEG modifier applied to N. 2925 // Beside being contr-intuitive, such use of floating-point 2926 // NEG modifier would have resulted in different meaning 2927 // of integer literals used with VOP1/2/C and VOP3, 2928 // for example: 2929 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2930 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2931 // Negative fp literals with preceding "-" are 2932 // handled likewise for unifomtity 2933 // 2934 bool 2935 AMDGPUAsmParser::parseSP3NegModifier() { 2936 2937 AsmToken NextToken[2]; 2938 peekTokens(NextToken); 2939 2940 if (isToken(AsmToken::Minus) && 2941 (isRegister(NextToken[0], NextToken[1]) || 2942 NextToken[0].is(AsmToken::Pipe) || 2943 isId(NextToken[0], "abs"))) { 2944 lex(); 2945 return true; 2946 } 2947 2948 return false; 2949 } 2950 2951 OperandMatchResultTy 2952 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2953 bool AllowImm) { 2954 bool Neg, SP3Neg; 2955 bool Abs, SP3Abs; 2956 SMLoc Loc; 2957 2958 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2959 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2960 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2961 return MatchOperand_ParseFail; 2962 } 2963 2964 SP3Neg = parseSP3NegModifier(); 2965 2966 Loc = getLoc(); 2967 Neg = trySkipId("neg"); 2968 if (Neg && SP3Neg) { 2969 Error(Loc, "expected register or immediate"); 2970 return MatchOperand_ParseFail; 2971 } 2972 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2973 return MatchOperand_ParseFail; 2974 2975 Abs = trySkipId("abs"); 2976 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2977 return MatchOperand_ParseFail; 2978 2979 Loc = getLoc(); 2980 SP3Abs = trySkipToken(AsmToken::Pipe); 2981 if (Abs && SP3Abs) { 2982 Error(Loc, "expected register or immediate"); 2983 return MatchOperand_ParseFail; 2984 } 2985 2986 OperandMatchResultTy Res; 2987 if (AllowImm) { 2988 Res = parseRegOrImm(Operands, SP3Abs); 2989 } else { 2990 Res = parseReg(Operands); 2991 } 2992 if (Res != MatchOperand_Success) { 2993 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2994 } 2995 2996 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2997 return MatchOperand_ParseFail; 2998 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2999 return MatchOperand_ParseFail; 3000 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3001 return MatchOperand_ParseFail; 3002 3003 AMDGPUOperand::Modifiers Mods; 3004 Mods.Abs = Abs || SP3Abs; 3005 Mods.Neg = Neg || SP3Neg; 3006 3007 if (Mods.hasFPModifiers()) { 3008 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3009 if (Op.isExpr()) { 3010 Error(Op.getStartLoc(), "expected an absolute expression"); 3011 return MatchOperand_ParseFail; 3012 } 3013 Op.setModifiers(Mods); 3014 } 3015 return MatchOperand_Success; 3016 } 3017 3018 OperandMatchResultTy 3019 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3020 bool AllowImm) { 3021 bool Sext = trySkipId("sext"); 3022 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3023 return MatchOperand_ParseFail; 3024 3025 OperandMatchResultTy Res; 3026 if (AllowImm) { 3027 Res = parseRegOrImm(Operands); 3028 } else { 3029 Res = parseReg(Operands); 3030 } 3031 if (Res != MatchOperand_Success) { 3032 return Sext? MatchOperand_ParseFail : Res; 3033 } 3034 3035 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3036 return MatchOperand_ParseFail; 3037 3038 AMDGPUOperand::Modifiers Mods; 3039 Mods.Sext = Sext; 3040 3041 if (Mods.hasIntModifiers()) { 3042 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3043 if (Op.isExpr()) { 3044 Error(Op.getStartLoc(), "expected an absolute expression"); 3045 return MatchOperand_ParseFail; 3046 } 3047 Op.setModifiers(Mods); 3048 } 3049 3050 return MatchOperand_Success; 3051 } 3052 3053 OperandMatchResultTy 3054 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3055 return parseRegOrImmWithFPInputMods(Operands, false); 3056 } 3057 3058 OperandMatchResultTy 3059 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3060 return parseRegOrImmWithIntInputMods(Operands, false); 3061 } 3062 3063 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3064 auto Loc = getLoc(); 3065 if (trySkipId("off")) { 3066 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3067 AMDGPUOperand::ImmTyOff, false)); 3068 return MatchOperand_Success; 3069 } 3070 3071 if (!isRegister()) 3072 return MatchOperand_NoMatch; 3073 3074 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3075 if (Reg) { 3076 Operands.push_back(std::move(Reg)); 3077 return MatchOperand_Success; 3078 } 3079 3080 return MatchOperand_ParseFail; 3081 3082 } 3083 3084 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3085 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3086 3087 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3088 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3089 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3090 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3091 return Match_InvalidOperand; 3092 3093 if ((TSFlags & SIInstrFlags::VOP3) && 3094 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3095 getForcedEncodingSize() != 64) 3096 return Match_PreferE32; 3097 3098 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3099 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3100 // v_mac_f32/16 allow only dst_sel == DWORD; 3101 auto OpNum = 3102 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3103 const auto &Op = Inst.getOperand(OpNum); 3104 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3105 return Match_InvalidOperand; 3106 } 3107 } 3108 3109 return Match_Success; 3110 } 3111 3112 static ArrayRef<unsigned> getAllVariants() { 3113 static const unsigned Variants[] = { 3114 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3115 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3116 }; 3117 3118 return makeArrayRef(Variants); 3119 } 3120 3121 // What asm variants we should check 3122 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3123 if (getForcedEncodingSize() == 32) { 3124 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3125 return makeArrayRef(Variants); 3126 } 3127 3128 if (isForcedVOP3()) { 3129 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3130 return makeArrayRef(Variants); 3131 } 3132 3133 if (isForcedSDWA()) { 3134 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3135 AMDGPUAsmVariants::SDWA9}; 3136 return makeArrayRef(Variants); 3137 } 3138 3139 if (isForcedDPP()) { 3140 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3141 return makeArrayRef(Variants); 3142 } 3143 3144 return getAllVariants(); 3145 } 3146 3147 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3148 if (getForcedEncodingSize() == 32) 3149 return "e32"; 3150 3151 if (isForcedVOP3()) 3152 return "e64"; 3153 3154 if (isForcedSDWA()) 3155 return "sdwa"; 3156 3157 if (isForcedDPP()) 3158 return "dpp"; 3159 3160 return ""; 3161 } 3162 3163 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3164 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3165 const unsigned Num = Desc.getNumImplicitUses(); 3166 for (unsigned i = 0; i < Num; ++i) { 3167 unsigned Reg = Desc.ImplicitUses[i]; 3168 switch (Reg) { 3169 case AMDGPU::FLAT_SCR: 3170 case AMDGPU::VCC: 3171 case AMDGPU::VCC_LO: 3172 case AMDGPU::VCC_HI: 3173 case AMDGPU::M0: 3174 return Reg; 3175 default: 3176 break; 3177 } 3178 } 3179 return AMDGPU::NoRegister; 3180 } 3181 3182 // NB: This code is correct only when used to check constant 3183 // bus limitations because GFX7 support no f16 inline constants. 3184 // Note that there are no cases when a GFX7 opcode violates 3185 // constant bus limitations due to the use of an f16 constant. 3186 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3187 unsigned OpIdx) const { 3188 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3189 3190 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3191 return false; 3192 } 3193 3194 const MCOperand &MO = Inst.getOperand(OpIdx); 3195 3196 int64_t Val = MO.getImm(); 3197 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3198 3199 switch (OpSize) { // expected operand size 3200 case 8: 3201 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3202 case 4: 3203 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3204 case 2: { 3205 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3206 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3207 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3208 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3209 return AMDGPU::isInlinableIntLiteral(Val); 3210 3211 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3212 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3213 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3214 return AMDGPU::isInlinableIntLiteralV216(Val); 3215 3216 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3217 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3218 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3219 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3220 3221 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3222 } 3223 default: 3224 llvm_unreachable("invalid operand size"); 3225 } 3226 } 3227 3228 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3229 if (!isGFX10Plus()) 3230 return 1; 3231 3232 switch (Opcode) { 3233 // 64-bit shift instructions can use only one scalar value input 3234 case AMDGPU::V_LSHLREV_B64_e64: 3235 case AMDGPU::V_LSHLREV_B64_gfx10: 3236 case AMDGPU::V_LSHRREV_B64_e64: 3237 case AMDGPU::V_LSHRREV_B64_gfx10: 3238 case AMDGPU::V_ASHRREV_I64_e64: 3239 case AMDGPU::V_ASHRREV_I64_gfx10: 3240 case AMDGPU::V_LSHL_B64_e64: 3241 case AMDGPU::V_LSHR_B64_e64: 3242 case AMDGPU::V_ASHR_I64_e64: 3243 return 1; 3244 default: 3245 return 2; 3246 } 3247 } 3248 3249 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3250 const MCOperand &MO = Inst.getOperand(OpIdx); 3251 if (MO.isImm()) { 3252 return !isInlineConstant(Inst, OpIdx); 3253 } else if (MO.isReg()) { 3254 auto Reg = MO.getReg(); 3255 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3256 auto PReg = mc2PseudoReg(Reg); 3257 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3258 } else { 3259 return true; 3260 } 3261 } 3262 3263 bool 3264 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3265 const OperandVector &Operands) { 3266 const unsigned Opcode = Inst.getOpcode(); 3267 const MCInstrDesc &Desc = MII.get(Opcode); 3268 unsigned LastSGPR = AMDGPU::NoRegister; 3269 unsigned ConstantBusUseCount = 0; 3270 unsigned NumLiterals = 0; 3271 unsigned LiteralSize; 3272 3273 if (Desc.TSFlags & 3274 (SIInstrFlags::VOPC | 3275 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3276 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3277 SIInstrFlags::SDWA)) { 3278 // Check special imm operands (used by madmk, etc) 3279 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3280 ++NumLiterals; 3281 LiteralSize = 4; 3282 } 3283 3284 SmallDenseSet<unsigned> SGPRsUsed; 3285 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3286 if (SGPRUsed != AMDGPU::NoRegister) { 3287 SGPRsUsed.insert(SGPRUsed); 3288 ++ConstantBusUseCount; 3289 } 3290 3291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3294 3295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3296 3297 for (int OpIdx : OpIndices) { 3298 if (OpIdx == -1) break; 3299 3300 const MCOperand &MO = Inst.getOperand(OpIdx); 3301 if (usesConstantBus(Inst, OpIdx)) { 3302 if (MO.isReg()) { 3303 LastSGPR = mc2PseudoReg(MO.getReg()); 3304 // Pairs of registers with a partial intersections like these 3305 // s0, s[0:1] 3306 // flat_scratch_lo, flat_scratch 3307 // flat_scratch_lo, flat_scratch_hi 3308 // are theoretically valid but they are disabled anyway. 3309 // Note that this code mimics SIInstrInfo::verifyInstruction 3310 if (!SGPRsUsed.count(LastSGPR)) { 3311 SGPRsUsed.insert(LastSGPR); 3312 ++ConstantBusUseCount; 3313 } 3314 } else { // Expression or a literal 3315 3316 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3317 continue; // special operand like VINTERP attr_chan 3318 3319 // An instruction may use only one literal. 3320 // This has been validated on the previous step. 3321 // See validateVOPLiteral. 3322 // This literal may be used as more than one operand. 3323 // If all these operands are of the same size, 3324 // this literal counts as one scalar value. 3325 // Otherwise it counts as 2 scalar values. 3326 // See "GFX10 Shader Programming", section 3.6.2.3. 3327 3328 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3329 if (Size < 4) Size = 4; 3330 3331 if (NumLiterals == 0) { 3332 NumLiterals = 1; 3333 LiteralSize = Size; 3334 } else if (LiteralSize != Size) { 3335 NumLiterals = 2; 3336 } 3337 } 3338 } 3339 } 3340 } 3341 ConstantBusUseCount += NumLiterals; 3342 3343 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3344 return true; 3345 3346 SMLoc LitLoc = getLitLoc(Operands); 3347 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3348 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3349 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3350 return false; 3351 } 3352 3353 bool 3354 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3355 const OperandVector &Operands) { 3356 const unsigned Opcode = Inst.getOpcode(); 3357 const MCInstrDesc &Desc = MII.get(Opcode); 3358 3359 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3360 if (DstIdx == -1 || 3361 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3362 return true; 3363 } 3364 3365 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3366 3367 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3368 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3369 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3370 3371 assert(DstIdx != -1); 3372 const MCOperand &Dst = Inst.getOperand(DstIdx); 3373 assert(Dst.isReg()); 3374 3375 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3376 3377 for (int SrcIdx : SrcIndices) { 3378 if (SrcIdx == -1) break; 3379 const MCOperand &Src = Inst.getOperand(SrcIdx); 3380 if (Src.isReg()) { 3381 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3382 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3383 Error(getRegLoc(SrcReg, Operands), 3384 "destination must be different than all sources"); 3385 return false; 3386 } 3387 } 3388 } 3389 3390 return true; 3391 } 3392 3393 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3394 3395 const unsigned Opc = Inst.getOpcode(); 3396 const MCInstrDesc &Desc = MII.get(Opc); 3397 3398 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3399 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3400 assert(ClampIdx != -1); 3401 return Inst.getOperand(ClampIdx).getImm() == 0; 3402 } 3403 3404 return true; 3405 } 3406 3407 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3408 3409 const unsigned Opc = Inst.getOpcode(); 3410 const MCInstrDesc &Desc = MII.get(Opc); 3411 3412 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3413 return true; 3414 3415 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3416 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3417 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3418 3419 assert(VDataIdx != -1); 3420 3421 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3422 return true; 3423 3424 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3425 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3426 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3427 if (DMask == 0) 3428 DMask = 1; 3429 3430 unsigned DataSize = 3431 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3432 if (hasPackedD16()) { 3433 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3434 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3435 DataSize = (DataSize + 1) / 2; 3436 } 3437 3438 return (VDataSize / 4) == DataSize + TFESize; 3439 } 3440 3441 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3442 const unsigned Opc = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opc); 3444 3445 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3446 return true; 3447 3448 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3449 3450 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3451 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3452 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3453 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3454 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3455 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3456 3457 assert(VAddr0Idx != -1); 3458 assert(SrsrcIdx != -1); 3459 assert(SrsrcIdx > VAddr0Idx); 3460 3461 if (DimIdx == -1) 3462 return true; // intersect_ray 3463 3464 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3465 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3466 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3467 unsigned ActualAddrSize = 3468 IsNSA ? SrsrcIdx - VAddr0Idx 3469 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3470 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3471 3472 unsigned ExpectedAddrSize = 3473 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3474 3475 if (!IsNSA) { 3476 if (ExpectedAddrSize > 8) 3477 ExpectedAddrSize = 16; 3478 3479 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3480 // This provides backward compatibility for assembly created 3481 // before 160b/192b/224b types were directly supported. 3482 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3483 return true; 3484 } 3485 3486 return ActualAddrSize == ExpectedAddrSize; 3487 } 3488 3489 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3490 3491 const unsigned Opc = Inst.getOpcode(); 3492 const MCInstrDesc &Desc = MII.get(Opc); 3493 3494 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3495 return true; 3496 if (!Desc.mayLoad() || !Desc.mayStore()) 3497 return true; // Not atomic 3498 3499 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3500 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3501 3502 // This is an incomplete check because image_atomic_cmpswap 3503 // may only use 0x3 and 0xf while other atomic operations 3504 // may use 0x1 and 0x3. However these limitations are 3505 // verified when we check that dmask matches dst size. 3506 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3507 } 3508 3509 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3510 3511 const unsigned Opc = Inst.getOpcode(); 3512 const MCInstrDesc &Desc = MII.get(Opc); 3513 3514 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3515 return true; 3516 3517 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3518 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3519 3520 // GATHER4 instructions use dmask in a different fashion compared to 3521 // other MIMG instructions. The only useful DMASK values are 3522 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3523 // (red,red,red,red) etc.) The ISA document doesn't mention 3524 // this. 3525 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3526 } 3527 3528 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3529 const unsigned Opc = Inst.getOpcode(); 3530 const MCInstrDesc &Desc = MII.get(Opc); 3531 3532 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3533 return true; 3534 3535 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3536 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3537 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3538 3539 if (!BaseOpcode->MSAA) 3540 return true; 3541 3542 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3543 assert(DimIdx != -1); 3544 3545 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3546 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3547 3548 return DimInfo->MSAA; 3549 } 3550 3551 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3552 { 3553 switch (Opcode) { 3554 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3555 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3556 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3557 return true; 3558 default: 3559 return false; 3560 } 3561 } 3562 3563 // movrels* opcodes should only allow VGPRS as src0. 3564 // This is specified in .td description for vop1/vop3, 3565 // but sdwa is handled differently. See isSDWAOperand. 3566 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3567 const OperandVector &Operands) { 3568 3569 const unsigned Opc = Inst.getOpcode(); 3570 const MCInstrDesc &Desc = MII.get(Opc); 3571 3572 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3573 return true; 3574 3575 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3576 assert(Src0Idx != -1); 3577 3578 SMLoc ErrLoc; 3579 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3580 if (Src0.isReg()) { 3581 auto Reg = mc2PseudoReg(Src0.getReg()); 3582 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3583 if (!isSGPR(Reg, TRI)) 3584 return true; 3585 ErrLoc = getRegLoc(Reg, Operands); 3586 } else { 3587 ErrLoc = getConstLoc(Operands); 3588 } 3589 3590 Error(ErrLoc, "source operand must be a VGPR"); 3591 return false; 3592 } 3593 3594 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3595 const OperandVector &Operands) { 3596 3597 const unsigned Opc = Inst.getOpcode(); 3598 3599 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3600 return true; 3601 3602 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3603 assert(Src0Idx != -1); 3604 3605 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3606 if (!Src0.isReg()) 3607 return true; 3608 3609 auto Reg = mc2PseudoReg(Src0.getReg()); 3610 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3611 if (isSGPR(Reg, TRI)) { 3612 Error(getRegLoc(Reg, Operands), 3613 "source operand must be either a VGPR or an inline constant"); 3614 return false; 3615 } 3616 3617 return true; 3618 } 3619 3620 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3621 const OperandVector &Operands) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3626 return true; 3627 3628 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3629 if (Src2Idx == -1) 3630 return true; 3631 3632 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3633 if (!Src2.isReg()) 3634 return true; 3635 3636 MCRegister Src2Reg = Src2.getReg(); 3637 MCRegister DstReg = Inst.getOperand(0).getReg(); 3638 if (Src2Reg == DstReg) 3639 return true; 3640 3641 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3642 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3643 return true; 3644 3645 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3646 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3647 "source 2 operand must not partially overlap with dst"); 3648 return false; 3649 } 3650 3651 return true; 3652 } 3653 3654 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3655 switch (Inst.getOpcode()) { 3656 default: 3657 return true; 3658 case V_DIV_SCALE_F32_gfx6_gfx7: 3659 case V_DIV_SCALE_F32_vi: 3660 case V_DIV_SCALE_F32_gfx10: 3661 case V_DIV_SCALE_F64_gfx6_gfx7: 3662 case V_DIV_SCALE_F64_vi: 3663 case V_DIV_SCALE_F64_gfx10: 3664 break; 3665 } 3666 3667 // TODO: Check that src0 = src1 or src2. 3668 3669 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3670 AMDGPU::OpName::src2_modifiers, 3671 AMDGPU::OpName::src2_modifiers}) { 3672 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3673 .getImm() & 3674 SISrcMods::ABS) { 3675 return false; 3676 } 3677 } 3678 3679 return true; 3680 } 3681 3682 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3683 3684 const unsigned Opc = Inst.getOpcode(); 3685 const MCInstrDesc &Desc = MII.get(Opc); 3686 3687 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3688 return true; 3689 3690 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3691 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3692 if (isCI() || isSI()) 3693 return false; 3694 } 3695 3696 return true; 3697 } 3698 3699 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3700 const unsigned Opc = Inst.getOpcode(); 3701 const MCInstrDesc &Desc = MII.get(Opc); 3702 3703 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3704 return true; 3705 3706 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3707 if (DimIdx < 0) 3708 return true; 3709 3710 long Imm = Inst.getOperand(DimIdx).getImm(); 3711 if (Imm < 0 || Imm >= 8) 3712 return false; 3713 3714 return true; 3715 } 3716 3717 static bool IsRevOpcode(const unsigned Opcode) 3718 { 3719 switch (Opcode) { 3720 case AMDGPU::V_SUBREV_F32_e32: 3721 case AMDGPU::V_SUBREV_F32_e64: 3722 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3723 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3724 case AMDGPU::V_SUBREV_F32_e32_vi: 3725 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3726 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3727 case AMDGPU::V_SUBREV_F32_e64_vi: 3728 3729 case AMDGPU::V_SUBREV_CO_U32_e32: 3730 case AMDGPU::V_SUBREV_CO_U32_e64: 3731 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3732 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3733 3734 case AMDGPU::V_SUBBREV_U32_e32: 3735 case AMDGPU::V_SUBBREV_U32_e64: 3736 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3737 case AMDGPU::V_SUBBREV_U32_e32_vi: 3738 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3739 case AMDGPU::V_SUBBREV_U32_e64_vi: 3740 3741 case AMDGPU::V_SUBREV_U32_e32: 3742 case AMDGPU::V_SUBREV_U32_e64: 3743 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3744 case AMDGPU::V_SUBREV_U32_e32_vi: 3745 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3746 case AMDGPU::V_SUBREV_U32_e64_vi: 3747 3748 case AMDGPU::V_SUBREV_F16_e32: 3749 case AMDGPU::V_SUBREV_F16_e64: 3750 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3751 case AMDGPU::V_SUBREV_F16_e32_vi: 3752 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3753 case AMDGPU::V_SUBREV_F16_e64_vi: 3754 3755 case AMDGPU::V_SUBREV_U16_e32: 3756 case AMDGPU::V_SUBREV_U16_e64: 3757 case AMDGPU::V_SUBREV_U16_e32_vi: 3758 case AMDGPU::V_SUBREV_U16_e64_vi: 3759 3760 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3761 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3762 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3763 3764 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3765 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3766 3767 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3768 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3769 3770 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3771 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3772 3773 case AMDGPU::V_LSHRREV_B32_e32: 3774 case AMDGPU::V_LSHRREV_B32_e64: 3775 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3776 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3777 case AMDGPU::V_LSHRREV_B32_e32_vi: 3778 case AMDGPU::V_LSHRREV_B32_e64_vi: 3779 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3780 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3781 3782 case AMDGPU::V_ASHRREV_I32_e32: 3783 case AMDGPU::V_ASHRREV_I32_e64: 3784 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3785 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3786 case AMDGPU::V_ASHRREV_I32_e32_vi: 3787 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3788 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3789 case AMDGPU::V_ASHRREV_I32_e64_vi: 3790 3791 case AMDGPU::V_LSHLREV_B32_e32: 3792 case AMDGPU::V_LSHLREV_B32_e64: 3793 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3794 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3795 case AMDGPU::V_LSHLREV_B32_e32_vi: 3796 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3797 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3798 case AMDGPU::V_LSHLREV_B32_e64_vi: 3799 3800 case AMDGPU::V_LSHLREV_B16_e32: 3801 case AMDGPU::V_LSHLREV_B16_e64: 3802 case AMDGPU::V_LSHLREV_B16_e32_vi: 3803 case AMDGPU::V_LSHLREV_B16_e64_vi: 3804 case AMDGPU::V_LSHLREV_B16_gfx10: 3805 3806 case AMDGPU::V_LSHRREV_B16_e32: 3807 case AMDGPU::V_LSHRREV_B16_e64: 3808 case AMDGPU::V_LSHRREV_B16_e32_vi: 3809 case AMDGPU::V_LSHRREV_B16_e64_vi: 3810 case AMDGPU::V_LSHRREV_B16_gfx10: 3811 3812 case AMDGPU::V_ASHRREV_I16_e32: 3813 case AMDGPU::V_ASHRREV_I16_e64: 3814 case AMDGPU::V_ASHRREV_I16_e32_vi: 3815 case AMDGPU::V_ASHRREV_I16_e64_vi: 3816 case AMDGPU::V_ASHRREV_I16_gfx10: 3817 3818 case AMDGPU::V_LSHLREV_B64_e64: 3819 case AMDGPU::V_LSHLREV_B64_gfx10: 3820 case AMDGPU::V_LSHLREV_B64_vi: 3821 3822 case AMDGPU::V_LSHRREV_B64_e64: 3823 case AMDGPU::V_LSHRREV_B64_gfx10: 3824 case AMDGPU::V_LSHRREV_B64_vi: 3825 3826 case AMDGPU::V_ASHRREV_I64_e64: 3827 case AMDGPU::V_ASHRREV_I64_gfx10: 3828 case AMDGPU::V_ASHRREV_I64_vi: 3829 3830 case AMDGPU::V_PK_LSHLREV_B16: 3831 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3832 case AMDGPU::V_PK_LSHLREV_B16_vi: 3833 3834 case AMDGPU::V_PK_LSHRREV_B16: 3835 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3836 case AMDGPU::V_PK_LSHRREV_B16_vi: 3837 case AMDGPU::V_PK_ASHRREV_I16: 3838 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3839 case AMDGPU::V_PK_ASHRREV_I16_vi: 3840 return true; 3841 default: 3842 return false; 3843 } 3844 } 3845 3846 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3847 3848 using namespace SIInstrFlags; 3849 const unsigned Opcode = Inst.getOpcode(); 3850 const MCInstrDesc &Desc = MII.get(Opcode); 3851 3852 // lds_direct register is defined so that it can be used 3853 // with 9-bit operands only. Ignore encodings which do not accept these. 3854 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3855 if ((Desc.TSFlags & Enc) == 0) 3856 return None; 3857 3858 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3859 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3860 if (SrcIdx == -1) 3861 break; 3862 const auto &Src = Inst.getOperand(SrcIdx); 3863 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3864 3865 if (isGFX90A()) 3866 return StringRef("lds_direct is not supported on this GPU"); 3867 3868 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3869 return StringRef("lds_direct cannot be used with this instruction"); 3870 3871 if (SrcName != OpName::src0) 3872 return StringRef("lds_direct may be used as src0 only"); 3873 } 3874 } 3875 3876 return None; 3877 } 3878 3879 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3880 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3881 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3882 if (Op.isFlatOffset()) 3883 return Op.getStartLoc(); 3884 } 3885 return getLoc(); 3886 } 3887 3888 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3889 const OperandVector &Operands) { 3890 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3891 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3892 return true; 3893 3894 auto Opcode = Inst.getOpcode(); 3895 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3896 assert(OpNum != -1); 3897 3898 const auto &Op = Inst.getOperand(OpNum); 3899 if (!hasFlatOffsets() && Op.getImm() != 0) { 3900 Error(getFlatOffsetLoc(Operands), 3901 "flat offset modifier is not supported on this GPU"); 3902 return false; 3903 } 3904 3905 // For FLAT segment the offset must be positive; 3906 // MSB is ignored and forced to zero. 3907 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3908 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3909 if (!isIntN(OffsetSize, Op.getImm())) { 3910 Error(getFlatOffsetLoc(Operands), 3911 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3912 return false; 3913 } 3914 } else { 3915 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3916 if (!isUIntN(OffsetSize, Op.getImm())) { 3917 Error(getFlatOffsetLoc(Operands), 3918 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3919 return false; 3920 } 3921 } 3922 3923 return true; 3924 } 3925 3926 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3927 // Start with second operand because SMEM Offset cannot be dst or src0. 3928 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3930 if (Op.isSMEMOffset()) 3931 return Op.getStartLoc(); 3932 } 3933 return getLoc(); 3934 } 3935 3936 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3937 const OperandVector &Operands) { 3938 if (isCI() || isSI()) 3939 return true; 3940 3941 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3942 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3943 return true; 3944 3945 auto Opcode = Inst.getOpcode(); 3946 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3947 if (OpNum == -1) 3948 return true; 3949 3950 const auto &Op = Inst.getOperand(OpNum); 3951 if (!Op.isImm()) 3952 return true; 3953 3954 uint64_t Offset = Op.getImm(); 3955 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3956 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3957 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3958 return true; 3959 3960 Error(getSMEMOffsetLoc(Operands), 3961 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3962 "expected a 21-bit signed offset"); 3963 3964 return false; 3965 } 3966 3967 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3968 unsigned Opcode = Inst.getOpcode(); 3969 const MCInstrDesc &Desc = MII.get(Opcode); 3970 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3971 return true; 3972 3973 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3974 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3975 3976 const int OpIndices[] = { Src0Idx, Src1Idx }; 3977 3978 unsigned NumExprs = 0; 3979 unsigned NumLiterals = 0; 3980 uint32_t LiteralValue; 3981 3982 for (int OpIdx : OpIndices) { 3983 if (OpIdx == -1) break; 3984 3985 const MCOperand &MO = Inst.getOperand(OpIdx); 3986 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3987 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3988 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3989 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3990 if (NumLiterals == 0 || LiteralValue != Value) { 3991 LiteralValue = Value; 3992 ++NumLiterals; 3993 } 3994 } else if (MO.isExpr()) { 3995 ++NumExprs; 3996 } 3997 } 3998 } 3999 4000 return NumLiterals + NumExprs <= 1; 4001 } 4002 4003 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4004 const unsigned Opc = Inst.getOpcode(); 4005 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4006 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4007 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4008 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4009 4010 if (OpSel & ~3) 4011 return false; 4012 } 4013 return true; 4014 } 4015 4016 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4017 const OperandVector &Operands) { 4018 const unsigned Opc = Inst.getOpcode(); 4019 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4020 if (DppCtrlIdx < 0) 4021 return true; 4022 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4023 4024 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4025 // DPP64 is supported for row_newbcast only. 4026 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4027 if (Src0Idx >= 0 && 4028 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4029 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4030 Error(S, "64 bit dpp only supports row_newbcast"); 4031 return false; 4032 } 4033 } 4034 4035 return true; 4036 } 4037 4038 // Check if VCC register matches wavefront size 4039 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4040 auto FB = getFeatureBits(); 4041 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4042 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4043 } 4044 4045 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4046 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4047 const OperandVector &Operands) { 4048 unsigned Opcode = Inst.getOpcode(); 4049 const MCInstrDesc &Desc = MII.get(Opcode); 4050 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4051 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4052 ImmIdx == -1) 4053 return true; 4054 4055 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4056 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4057 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4058 4059 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4060 4061 unsigned NumExprs = 0; 4062 unsigned NumLiterals = 0; 4063 uint32_t LiteralValue; 4064 4065 for (int OpIdx : OpIndices) { 4066 if (OpIdx == -1) 4067 continue; 4068 4069 const MCOperand &MO = Inst.getOperand(OpIdx); 4070 if (!MO.isImm() && !MO.isExpr()) 4071 continue; 4072 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4073 continue; 4074 4075 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4076 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4077 Error(getConstLoc(Operands), 4078 "inline constants are not allowed for this operand"); 4079 return false; 4080 } 4081 4082 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4083 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4084 if (NumLiterals == 0 || LiteralValue != Value) { 4085 LiteralValue = Value; 4086 ++NumLiterals; 4087 } 4088 } else if (MO.isExpr()) { 4089 ++NumExprs; 4090 } 4091 } 4092 NumLiterals += NumExprs; 4093 4094 if (!NumLiterals) 4095 return true; 4096 4097 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4098 Error(getLitLoc(Operands), "literal operands are not supported"); 4099 return false; 4100 } 4101 4102 if (NumLiterals > 1) { 4103 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4104 return false; 4105 } 4106 4107 return true; 4108 } 4109 4110 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4111 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4112 const MCRegisterInfo *MRI) { 4113 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4114 if (OpIdx < 0) 4115 return -1; 4116 4117 const MCOperand &Op = Inst.getOperand(OpIdx); 4118 if (!Op.isReg()) 4119 return -1; 4120 4121 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4122 auto Reg = Sub ? Sub : Op.getReg(); 4123 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4124 return AGPR32.contains(Reg) ? 1 : 0; 4125 } 4126 4127 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4128 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4129 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4130 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4131 SIInstrFlags::DS)) == 0) 4132 return true; 4133 4134 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4135 : AMDGPU::OpName::vdata; 4136 4137 const MCRegisterInfo *MRI = getMRI(); 4138 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4139 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4140 4141 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4142 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4143 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4144 return false; 4145 } 4146 4147 auto FB = getFeatureBits(); 4148 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4149 if (DataAreg < 0 || DstAreg < 0) 4150 return true; 4151 return DstAreg == DataAreg; 4152 } 4153 4154 return DstAreg < 1 && DataAreg < 1; 4155 } 4156 4157 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4158 auto FB = getFeatureBits(); 4159 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4160 return true; 4161 4162 const MCRegisterInfo *MRI = getMRI(); 4163 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4164 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4165 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4166 const MCOperand &Op = Inst.getOperand(I); 4167 if (!Op.isReg()) 4168 continue; 4169 4170 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4171 if (!Sub) 4172 continue; 4173 4174 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4175 return false; 4176 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4177 return false; 4178 } 4179 4180 return true; 4181 } 4182 4183 // gfx90a has an undocumented limitation: 4184 // DS_GWS opcodes must use even aligned registers. 4185 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4186 const OperandVector &Operands) { 4187 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4188 return true; 4189 4190 int Opc = Inst.getOpcode(); 4191 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4192 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4193 return true; 4194 4195 const MCRegisterInfo *MRI = getMRI(); 4196 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4197 int Data0Pos = 4198 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4199 assert(Data0Pos != -1); 4200 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4201 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4202 if (RegIdx & 1) { 4203 SMLoc RegLoc = getRegLoc(Reg, Operands); 4204 Error(RegLoc, "vgpr must be even aligned"); 4205 return false; 4206 } 4207 4208 return true; 4209 } 4210 4211 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4212 const OperandVector &Operands, 4213 const SMLoc &IDLoc) { 4214 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4215 AMDGPU::OpName::cpol); 4216 if (CPolPos == -1) 4217 return true; 4218 4219 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4220 4221 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4222 if ((TSFlags & (SIInstrFlags::SMRD)) && 4223 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4224 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4225 return false; 4226 } 4227 4228 if (isGFX90A() && (CPol & CPol::SCC)) { 4229 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4230 StringRef CStr(S.getPointer()); 4231 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4232 Error(S, "scc is not supported on this GPU"); 4233 return false; 4234 } 4235 4236 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4237 return true; 4238 4239 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4240 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4241 Error(IDLoc, "instruction must use glc"); 4242 return false; 4243 } 4244 } else { 4245 if (CPol & CPol::GLC) { 4246 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4247 StringRef CStr(S.getPointer()); 4248 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4249 Error(S, "instruction must not use glc"); 4250 return false; 4251 } 4252 } 4253 4254 return true; 4255 } 4256 4257 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4258 const SMLoc &IDLoc, 4259 const OperandVector &Operands) { 4260 if (auto ErrMsg = validateLdsDirect(Inst)) { 4261 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4262 return false; 4263 } 4264 if (!validateSOPLiteral(Inst)) { 4265 Error(getLitLoc(Operands), 4266 "only one literal operand is allowed"); 4267 return false; 4268 } 4269 if (!validateVOPLiteral(Inst, Operands)) { 4270 return false; 4271 } 4272 if (!validateConstantBusLimitations(Inst, Operands)) { 4273 return false; 4274 } 4275 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4276 return false; 4277 } 4278 if (!validateIntClampSupported(Inst)) { 4279 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4280 "integer clamping is not supported on this GPU"); 4281 return false; 4282 } 4283 if (!validateOpSel(Inst)) { 4284 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4285 "invalid op_sel operand"); 4286 return false; 4287 } 4288 if (!validateDPP(Inst, Operands)) { 4289 return false; 4290 } 4291 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4292 if (!validateMIMGD16(Inst)) { 4293 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4294 "d16 modifier is not supported on this GPU"); 4295 return false; 4296 } 4297 if (!validateMIMGDim(Inst)) { 4298 Error(IDLoc, "dim modifier is required on this GPU"); 4299 return false; 4300 } 4301 if (!validateMIMGMSAA(Inst)) { 4302 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4303 "invalid dim; must be MSAA type"); 4304 return false; 4305 } 4306 if (!validateMIMGDataSize(Inst)) { 4307 Error(IDLoc, 4308 "image data size does not match dmask and tfe"); 4309 return false; 4310 } 4311 if (!validateMIMGAddrSize(Inst)) { 4312 Error(IDLoc, 4313 "image address size does not match dim and a16"); 4314 return false; 4315 } 4316 if (!validateMIMGAtomicDMask(Inst)) { 4317 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4318 "invalid atomic image dmask"); 4319 return false; 4320 } 4321 if (!validateMIMGGatherDMask(Inst)) { 4322 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4323 "invalid image_gather dmask: only one bit must be set"); 4324 return false; 4325 } 4326 if (!validateMovrels(Inst, Operands)) { 4327 return false; 4328 } 4329 if (!validateFlatOffset(Inst, Operands)) { 4330 return false; 4331 } 4332 if (!validateSMEMOffset(Inst, Operands)) { 4333 return false; 4334 } 4335 if (!validateMAIAccWrite(Inst, Operands)) { 4336 return false; 4337 } 4338 if (!validateMFMA(Inst, Operands)) { 4339 return false; 4340 } 4341 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4342 return false; 4343 } 4344 4345 if (!validateAGPRLdSt(Inst)) { 4346 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4347 ? "invalid register class: data and dst should be all VGPR or AGPR" 4348 : "invalid register class: agpr loads and stores not supported on this GPU" 4349 ); 4350 return false; 4351 } 4352 if (!validateVGPRAlign(Inst)) { 4353 Error(IDLoc, 4354 "invalid register class: vgpr tuples must be 64 bit aligned"); 4355 return false; 4356 } 4357 if (!validateGWS(Inst, Operands)) { 4358 return false; 4359 } 4360 4361 if (!validateDivScale(Inst)) { 4362 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4363 return false; 4364 } 4365 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4366 return false; 4367 } 4368 4369 return true; 4370 } 4371 4372 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4373 const FeatureBitset &FBS, 4374 unsigned VariantID = 0); 4375 4376 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4377 const FeatureBitset &AvailableFeatures, 4378 unsigned VariantID); 4379 4380 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4381 const FeatureBitset &FBS) { 4382 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4383 } 4384 4385 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4386 const FeatureBitset &FBS, 4387 ArrayRef<unsigned> Variants) { 4388 for (auto Variant : Variants) { 4389 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4390 return true; 4391 } 4392 4393 return false; 4394 } 4395 4396 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4397 const SMLoc &IDLoc) { 4398 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4399 4400 // Check if requested instruction variant is supported. 4401 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4402 return false; 4403 4404 // This instruction is not supported. 4405 // Clear any other pending errors because they are no longer relevant. 4406 getParser().clearPendingErrors(); 4407 4408 // Requested instruction variant is not supported. 4409 // Check if any other variants are supported. 4410 StringRef VariantName = getMatchedVariantName(); 4411 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4412 return Error(IDLoc, 4413 Twine(VariantName, 4414 " variant of this instruction is not supported")); 4415 } 4416 4417 // Finally check if this instruction is supported on any other GPU. 4418 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4419 return Error(IDLoc, "instruction not supported on this GPU"); 4420 } 4421 4422 // Instruction not supported on any GPU. Probably a typo. 4423 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4424 return Error(IDLoc, "invalid instruction" + Suggestion); 4425 } 4426 4427 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4428 OperandVector &Operands, 4429 MCStreamer &Out, 4430 uint64_t &ErrorInfo, 4431 bool MatchingInlineAsm) { 4432 MCInst Inst; 4433 unsigned Result = Match_Success; 4434 for (auto Variant : getMatchedVariants()) { 4435 uint64_t EI; 4436 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4437 Variant); 4438 // We order match statuses from least to most specific. We use most specific 4439 // status as resulting 4440 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4441 if ((R == Match_Success) || 4442 (R == Match_PreferE32) || 4443 (R == Match_MissingFeature && Result != Match_PreferE32) || 4444 (R == Match_InvalidOperand && Result != Match_MissingFeature 4445 && Result != Match_PreferE32) || 4446 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4447 && Result != Match_MissingFeature 4448 && Result != Match_PreferE32)) { 4449 Result = R; 4450 ErrorInfo = EI; 4451 } 4452 if (R == Match_Success) 4453 break; 4454 } 4455 4456 if (Result == Match_Success) { 4457 if (!validateInstruction(Inst, IDLoc, Operands)) { 4458 return true; 4459 } 4460 Inst.setLoc(IDLoc); 4461 Out.emitInstruction(Inst, getSTI()); 4462 return false; 4463 } 4464 4465 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4466 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4467 return true; 4468 } 4469 4470 switch (Result) { 4471 default: break; 4472 case Match_MissingFeature: 4473 // It has been verified that the specified instruction 4474 // mnemonic is valid. A match was found but it requires 4475 // features which are not supported on this GPU. 4476 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4477 4478 case Match_InvalidOperand: { 4479 SMLoc ErrorLoc = IDLoc; 4480 if (ErrorInfo != ~0ULL) { 4481 if (ErrorInfo >= Operands.size()) { 4482 return Error(IDLoc, "too few operands for instruction"); 4483 } 4484 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4485 if (ErrorLoc == SMLoc()) 4486 ErrorLoc = IDLoc; 4487 } 4488 return Error(ErrorLoc, "invalid operand for instruction"); 4489 } 4490 4491 case Match_PreferE32: 4492 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4493 "should be encoded as e32"); 4494 case Match_MnemonicFail: 4495 llvm_unreachable("Invalid instructions should have been handled already"); 4496 } 4497 llvm_unreachable("Implement any new match types added!"); 4498 } 4499 4500 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4501 int64_t Tmp = -1; 4502 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4503 return true; 4504 } 4505 if (getParser().parseAbsoluteExpression(Tmp)) { 4506 return true; 4507 } 4508 Ret = static_cast<uint32_t>(Tmp); 4509 return false; 4510 } 4511 4512 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4513 uint32_t &Minor) { 4514 if (ParseAsAbsoluteExpression(Major)) 4515 return TokError("invalid major version"); 4516 4517 if (!trySkipToken(AsmToken::Comma)) 4518 return TokError("minor version number required, comma expected"); 4519 4520 if (ParseAsAbsoluteExpression(Minor)) 4521 return TokError("invalid minor version"); 4522 4523 return false; 4524 } 4525 4526 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4527 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4528 return TokError("directive only supported for amdgcn architecture"); 4529 4530 std::string TargetIDDirective; 4531 SMLoc TargetStart = getTok().getLoc(); 4532 if (getParser().parseEscapedString(TargetIDDirective)) 4533 return true; 4534 4535 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4536 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4537 return getParser().Error(TargetRange.Start, 4538 (Twine(".amdgcn_target directive's target id ") + 4539 Twine(TargetIDDirective) + 4540 Twine(" does not match the specified target id ") + 4541 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4542 4543 return false; 4544 } 4545 4546 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4547 return Error(Range.Start, "value out of range", Range); 4548 } 4549 4550 bool AMDGPUAsmParser::calculateGPRBlocks( 4551 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4552 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4553 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4554 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4555 // TODO(scott.linder): These calculations are duplicated from 4556 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4557 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4558 4559 unsigned NumVGPRs = NextFreeVGPR; 4560 unsigned NumSGPRs = NextFreeSGPR; 4561 4562 if (Version.Major >= 10) 4563 NumSGPRs = 0; 4564 else { 4565 unsigned MaxAddressableNumSGPRs = 4566 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4567 4568 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4569 NumSGPRs > MaxAddressableNumSGPRs) 4570 return OutOfRangeError(SGPRRange); 4571 4572 NumSGPRs += 4573 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4574 4575 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4576 NumSGPRs > MaxAddressableNumSGPRs) 4577 return OutOfRangeError(SGPRRange); 4578 4579 if (Features.test(FeatureSGPRInitBug)) 4580 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4581 } 4582 4583 VGPRBlocks = 4584 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4585 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4586 4587 return false; 4588 } 4589 4590 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4591 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4592 return TokError("directive only supported for amdgcn architecture"); 4593 4594 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4595 return TokError("directive only supported for amdhsa OS"); 4596 4597 StringRef KernelName; 4598 if (getParser().parseIdentifier(KernelName)) 4599 return true; 4600 4601 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4602 4603 StringSet<> Seen; 4604 4605 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4606 4607 SMRange VGPRRange; 4608 uint64_t NextFreeVGPR = 0; 4609 uint64_t AccumOffset = 0; 4610 SMRange SGPRRange; 4611 uint64_t NextFreeSGPR = 0; 4612 4613 // Count the number of user SGPRs implied from the enabled feature bits. 4614 unsigned ImpliedUserSGPRCount = 0; 4615 4616 // Track if the asm explicitly contains the directive for the user SGPR 4617 // count. 4618 Optional<unsigned> ExplicitUserSGPRCount; 4619 bool ReserveVCC = true; 4620 bool ReserveFlatScr = true; 4621 Optional<bool> EnableWavefrontSize32; 4622 4623 while (true) { 4624 while (trySkipToken(AsmToken::EndOfStatement)); 4625 4626 StringRef ID; 4627 SMRange IDRange = getTok().getLocRange(); 4628 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4629 return true; 4630 4631 if (ID == ".end_amdhsa_kernel") 4632 break; 4633 4634 if (Seen.find(ID) != Seen.end()) 4635 return TokError(".amdhsa_ directives cannot be repeated"); 4636 Seen.insert(ID); 4637 4638 SMLoc ValStart = getLoc(); 4639 int64_t IVal; 4640 if (getParser().parseAbsoluteExpression(IVal)) 4641 return true; 4642 SMLoc ValEnd = getLoc(); 4643 SMRange ValRange = SMRange(ValStart, ValEnd); 4644 4645 if (IVal < 0) 4646 return OutOfRangeError(ValRange); 4647 4648 uint64_t Val = IVal; 4649 4650 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4651 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4652 return OutOfRangeError(RANGE); \ 4653 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4654 4655 if (ID == ".amdhsa_group_segment_fixed_size") { 4656 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4657 return OutOfRangeError(ValRange); 4658 KD.group_segment_fixed_size = Val; 4659 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4660 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4661 return OutOfRangeError(ValRange); 4662 KD.private_segment_fixed_size = Val; 4663 } else if (ID == ".amdhsa_kernarg_size") { 4664 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4665 return OutOfRangeError(ValRange); 4666 KD.kernarg_size = Val; 4667 } else if (ID == ".amdhsa_user_sgpr_count") { 4668 ExplicitUserSGPRCount = Val; 4669 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4670 if (hasArchitectedFlatScratch()) 4671 return Error(IDRange.Start, 4672 "directive is not supported with architected flat scratch", 4673 IDRange); 4674 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4675 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4676 Val, ValRange); 4677 if (Val) 4678 ImpliedUserSGPRCount += 4; 4679 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4680 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4682 ValRange); 4683 if (Val) 4684 ImpliedUserSGPRCount += 2; 4685 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4686 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4687 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4688 ValRange); 4689 if (Val) 4690 ImpliedUserSGPRCount += 2; 4691 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4692 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4693 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4694 Val, ValRange); 4695 if (Val) 4696 ImpliedUserSGPRCount += 2; 4697 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4698 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4699 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4700 ValRange); 4701 if (Val) 4702 ImpliedUserSGPRCount += 2; 4703 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4704 if (hasArchitectedFlatScratch()) 4705 return Error(IDRange.Start, 4706 "directive is not supported with architected flat scratch", 4707 IDRange); 4708 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4709 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4710 ValRange); 4711 if (Val) 4712 ImpliedUserSGPRCount += 2; 4713 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4714 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4715 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4716 Val, ValRange); 4717 if (Val) 4718 ImpliedUserSGPRCount += 1; 4719 } else if (ID == ".amdhsa_wavefront_size32") { 4720 if (IVersion.Major < 10) 4721 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4722 EnableWavefrontSize32 = Val; 4723 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4724 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4725 Val, ValRange); 4726 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4727 if (hasArchitectedFlatScratch()) 4728 return Error(IDRange.Start, 4729 "directive is not supported with architected flat scratch", 4730 IDRange); 4731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4732 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4733 } else if (ID == ".amdhsa_enable_private_segment") { 4734 if (!hasArchitectedFlatScratch()) 4735 return Error( 4736 IDRange.Start, 4737 "directive is not supported without architected flat scratch", 4738 IDRange); 4739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4740 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4741 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4742 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4743 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4744 ValRange); 4745 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4747 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4748 ValRange); 4749 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4751 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4752 ValRange); 4753 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4755 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4756 ValRange); 4757 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4758 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4759 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4760 ValRange); 4761 } else if (ID == ".amdhsa_next_free_vgpr") { 4762 VGPRRange = ValRange; 4763 NextFreeVGPR = Val; 4764 } else if (ID == ".amdhsa_next_free_sgpr") { 4765 SGPRRange = ValRange; 4766 NextFreeSGPR = Val; 4767 } else if (ID == ".amdhsa_accum_offset") { 4768 if (!isGFX90A()) 4769 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4770 AccumOffset = Val; 4771 } else if (ID == ".amdhsa_reserve_vcc") { 4772 if (!isUInt<1>(Val)) 4773 return OutOfRangeError(ValRange); 4774 ReserveVCC = Val; 4775 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4776 if (IVersion.Major < 7) 4777 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4778 if (hasArchitectedFlatScratch()) 4779 return Error(IDRange.Start, 4780 "directive is not supported with architected flat scratch", 4781 IDRange); 4782 if (!isUInt<1>(Val)) 4783 return OutOfRangeError(ValRange); 4784 ReserveFlatScr = Val; 4785 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4786 if (IVersion.Major < 8) 4787 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4788 if (!isUInt<1>(Val)) 4789 return OutOfRangeError(ValRange); 4790 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4791 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4792 IDRange); 4793 } else if (ID == ".amdhsa_float_round_mode_32") { 4794 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4795 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4796 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4798 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4799 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4800 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4801 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4802 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4804 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4805 ValRange); 4806 } else if (ID == ".amdhsa_dx10_clamp") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4808 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4809 } else if (ID == ".amdhsa_ieee_mode") { 4810 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4811 Val, ValRange); 4812 } else if (ID == ".amdhsa_fp16_overflow") { 4813 if (IVersion.Major < 9) 4814 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4815 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4816 ValRange); 4817 } else if (ID == ".amdhsa_tg_split") { 4818 if (!isGFX90A()) 4819 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4821 ValRange); 4822 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4823 if (IVersion.Major < 10) 4824 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4826 ValRange); 4827 } else if (ID == ".amdhsa_memory_ordered") { 4828 if (IVersion.Major < 10) 4829 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4830 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4831 ValRange); 4832 } else if (ID == ".amdhsa_forward_progress") { 4833 if (IVersion.Major < 10) 4834 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4835 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4836 ValRange); 4837 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4838 PARSE_BITS_ENTRY( 4839 KD.compute_pgm_rsrc2, 4840 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4841 ValRange); 4842 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4843 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4845 Val, ValRange); 4846 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4847 PARSE_BITS_ENTRY( 4848 KD.compute_pgm_rsrc2, 4849 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4850 ValRange); 4851 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4852 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4853 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4854 Val, ValRange); 4855 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4857 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4858 Val, ValRange); 4859 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4861 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4862 Val, ValRange); 4863 } else if (ID == ".amdhsa_exception_int_div_zero") { 4864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4865 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4866 Val, ValRange); 4867 } else { 4868 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4869 } 4870 4871 #undef PARSE_BITS_ENTRY 4872 } 4873 4874 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4875 return TokError(".amdhsa_next_free_vgpr directive is required"); 4876 4877 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4878 return TokError(".amdhsa_next_free_sgpr directive is required"); 4879 4880 unsigned VGPRBlocks; 4881 unsigned SGPRBlocks; 4882 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4883 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4884 EnableWavefrontSize32, NextFreeVGPR, 4885 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4886 SGPRBlocks)) 4887 return true; 4888 4889 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4890 VGPRBlocks)) 4891 return OutOfRangeError(VGPRRange); 4892 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4893 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4894 4895 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4896 SGPRBlocks)) 4897 return OutOfRangeError(SGPRRange); 4898 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4899 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4900 SGPRBlocks); 4901 4902 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4903 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4904 "enabled user SGPRs"); 4905 4906 unsigned UserSGPRCount = 4907 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4908 4909 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4910 return TokError("too many user SGPRs enabled"); 4911 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4912 UserSGPRCount); 4913 4914 if (isGFX90A()) { 4915 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4916 return TokError(".amdhsa_accum_offset directive is required"); 4917 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4918 return TokError("accum_offset should be in range [4..256] in " 4919 "increments of 4"); 4920 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4921 return TokError("accum_offset exceeds total VGPR allocation"); 4922 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4923 (AccumOffset / 4 - 1)); 4924 } 4925 4926 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4927 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4928 ReserveFlatScr); 4929 return false; 4930 } 4931 4932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4933 uint32_t Major; 4934 uint32_t Minor; 4935 4936 if (ParseDirectiveMajorMinor(Major, Minor)) 4937 return true; 4938 4939 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4940 return false; 4941 } 4942 4943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4944 uint32_t Major; 4945 uint32_t Minor; 4946 uint32_t Stepping; 4947 StringRef VendorName; 4948 StringRef ArchName; 4949 4950 // If this directive has no arguments, then use the ISA version for the 4951 // targeted GPU. 4952 if (isToken(AsmToken::EndOfStatement)) { 4953 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4954 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4955 ISA.Stepping, 4956 "AMD", "AMDGPU"); 4957 return false; 4958 } 4959 4960 if (ParseDirectiveMajorMinor(Major, Minor)) 4961 return true; 4962 4963 if (!trySkipToken(AsmToken::Comma)) 4964 return TokError("stepping version number required, comma expected"); 4965 4966 if (ParseAsAbsoluteExpression(Stepping)) 4967 return TokError("invalid stepping version"); 4968 4969 if (!trySkipToken(AsmToken::Comma)) 4970 return TokError("vendor name required, comma expected"); 4971 4972 if (!parseString(VendorName, "invalid vendor name")) 4973 return true; 4974 4975 if (!trySkipToken(AsmToken::Comma)) 4976 return TokError("arch name required, comma expected"); 4977 4978 if (!parseString(ArchName, "invalid arch name")) 4979 return true; 4980 4981 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4982 VendorName, ArchName); 4983 return false; 4984 } 4985 4986 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4987 amd_kernel_code_t &Header) { 4988 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4989 // assembly for backwards compatibility. 4990 if (ID == "max_scratch_backing_memory_byte_size") { 4991 Parser.eatToEndOfStatement(); 4992 return false; 4993 } 4994 4995 SmallString<40> ErrStr; 4996 raw_svector_ostream Err(ErrStr); 4997 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4998 return TokError(Err.str()); 4999 } 5000 Lex(); 5001 5002 if (ID == "enable_wavefront_size32") { 5003 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5004 if (!isGFX10Plus()) 5005 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5006 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5007 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5008 } else { 5009 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5010 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5011 } 5012 } 5013 5014 if (ID == "wavefront_size") { 5015 if (Header.wavefront_size == 5) { 5016 if (!isGFX10Plus()) 5017 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5018 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5019 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5020 } else if (Header.wavefront_size == 6) { 5021 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5022 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5023 } 5024 } 5025 5026 if (ID == "enable_wgp_mode") { 5027 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5028 !isGFX10Plus()) 5029 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5030 } 5031 5032 if (ID == "enable_mem_ordered") { 5033 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5034 !isGFX10Plus()) 5035 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5036 } 5037 5038 if (ID == "enable_fwd_progress") { 5039 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5040 !isGFX10Plus()) 5041 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5042 } 5043 5044 return false; 5045 } 5046 5047 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5048 amd_kernel_code_t Header; 5049 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5050 5051 while (true) { 5052 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5053 // will set the current token to EndOfStatement. 5054 while(trySkipToken(AsmToken::EndOfStatement)); 5055 5056 StringRef ID; 5057 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5058 return true; 5059 5060 if (ID == ".end_amd_kernel_code_t") 5061 break; 5062 5063 if (ParseAMDKernelCodeTValue(ID, Header)) 5064 return true; 5065 } 5066 5067 getTargetStreamer().EmitAMDKernelCodeT(Header); 5068 5069 return false; 5070 } 5071 5072 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5073 StringRef KernelName; 5074 if (!parseId(KernelName, "expected symbol name")) 5075 return true; 5076 5077 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5078 ELF::STT_AMDGPU_HSA_KERNEL); 5079 5080 KernelScope.initialize(getContext()); 5081 return false; 5082 } 5083 5084 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5085 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5086 return Error(getLoc(), 5087 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5088 "architectures"); 5089 } 5090 5091 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5092 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5093 return Error(getParser().getTok().getLoc(), "target id must match options"); 5094 5095 getTargetStreamer().EmitISAVersion(); 5096 Lex(); 5097 5098 return false; 5099 } 5100 5101 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5102 const char *AssemblerDirectiveBegin; 5103 const char *AssemblerDirectiveEnd; 5104 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5105 isHsaAbiVersion3AndAbove(&getSTI()) 5106 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5107 HSAMD::V3::AssemblerDirectiveEnd) 5108 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5109 HSAMD::AssemblerDirectiveEnd); 5110 5111 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5112 return Error(getLoc(), 5113 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5114 "not available on non-amdhsa OSes")).str()); 5115 } 5116 5117 std::string HSAMetadataString; 5118 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5119 HSAMetadataString)) 5120 return true; 5121 5122 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5123 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5124 return Error(getLoc(), "invalid HSA metadata"); 5125 } else { 5126 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5127 return Error(getLoc(), "invalid HSA metadata"); 5128 } 5129 5130 return false; 5131 } 5132 5133 /// Common code to parse out a block of text (typically YAML) between start and 5134 /// end directives. 5135 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5136 const char *AssemblerDirectiveEnd, 5137 std::string &CollectString) { 5138 5139 raw_string_ostream CollectStream(CollectString); 5140 5141 getLexer().setSkipSpace(false); 5142 5143 bool FoundEnd = false; 5144 while (!isToken(AsmToken::Eof)) { 5145 while (isToken(AsmToken::Space)) { 5146 CollectStream << getTokenStr(); 5147 Lex(); 5148 } 5149 5150 if (trySkipId(AssemblerDirectiveEnd)) { 5151 FoundEnd = true; 5152 break; 5153 } 5154 5155 CollectStream << Parser.parseStringToEndOfStatement() 5156 << getContext().getAsmInfo()->getSeparatorString(); 5157 5158 Parser.eatToEndOfStatement(); 5159 } 5160 5161 getLexer().setSkipSpace(true); 5162 5163 if (isToken(AsmToken::Eof) && !FoundEnd) { 5164 return TokError(Twine("expected directive ") + 5165 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5166 } 5167 5168 CollectStream.flush(); 5169 return false; 5170 } 5171 5172 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5173 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5174 std::string String; 5175 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5176 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5177 return true; 5178 5179 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5180 if (!PALMetadata->setFromString(String)) 5181 return Error(getLoc(), "invalid PAL metadata"); 5182 return false; 5183 } 5184 5185 /// Parse the assembler directive for old linear-format PAL metadata. 5186 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5187 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5188 return Error(getLoc(), 5189 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5190 "not available on non-amdpal OSes")).str()); 5191 } 5192 5193 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5194 PALMetadata->setLegacy(); 5195 for (;;) { 5196 uint32_t Key, Value; 5197 if (ParseAsAbsoluteExpression(Key)) { 5198 return TokError(Twine("invalid value in ") + 5199 Twine(PALMD::AssemblerDirective)); 5200 } 5201 if (!trySkipToken(AsmToken::Comma)) { 5202 return TokError(Twine("expected an even number of values in ") + 5203 Twine(PALMD::AssemblerDirective)); 5204 } 5205 if (ParseAsAbsoluteExpression(Value)) { 5206 return TokError(Twine("invalid value in ") + 5207 Twine(PALMD::AssemblerDirective)); 5208 } 5209 PALMetadata->setRegister(Key, Value); 5210 if (!trySkipToken(AsmToken::Comma)) 5211 break; 5212 } 5213 return false; 5214 } 5215 5216 /// ParseDirectiveAMDGPULDS 5217 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5218 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5219 if (getParser().checkForValidSection()) 5220 return true; 5221 5222 StringRef Name; 5223 SMLoc NameLoc = getLoc(); 5224 if (getParser().parseIdentifier(Name)) 5225 return TokError("expected identifier in directive"); 5226 5227 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5228 if (parseToken(AsmToken::Comma, "expected ','")) 5229 return true; 5230 5231 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5232 5233 int64_t Size; 5234 SMLoc SizeLoc = getLoc(); 5235 if (getParser().parseAbsoluteExpression(Size)) 5236 return true; 5237 if (Size < 0) 5238 return Error(SizeLoc, "size must be non-negative"); 5239 if (Size > LocalMemorySize) 5240 return Error(SizeLoc, "size is too large"); 5241 5242 int64_t Alignment = 4; 5243 if (trySkipToken(AsmToken::Comma)) { 5244 SMLoc AlignLoc = getLoc(); 5245 if (getParser().parseAbsoluteExpression(Alignment)) 5246 return true; 5247 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5248 return Error(AlignLoc, "alignment must be a power of two"); 5249 5250 // Alignment larger than the size of LDS is possible in theory, as long 5251 // as the linker manages to place to symbol at address 0, but we do want 5252 // to make sure the alignment fits nicely into a 32-bit integer. 5253 if (Alignment >= 1u << 31) 5254 return Error(AlignLoc, "alignment is too large"); 5255 } 5256 5257 if (parseToken(AsmToken::EndOfStatement, 5258 "unexpected token in '.amdgpu_lds' directive")) 5259 return true; 5260 5261 Symbol->redefineIfPossible(); 5262 if (!Symbol->isUndefined()) 5263 return Error(NameLoc, "invalid symbol redefinition"); 5264 5265 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5266 return false; 5267 } 5268 5269 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5270 StringRef IDVal = DirectiveID.getString(); 5271 5272 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5273 if (IDVal == ".amdhsa_kernel") 5274 return ParseDirectiveAMDHSAKernel(); 5275 5276 // TODO: Restructure/combine with PAL metadata directive. 5277 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5278 return ParseDirectiveHSAMetadata(); 5279 } else { 5280 if (IDVal == ".hsa_code_object_version") 5281 return ParseDirectiveHSACodeObjectVersion(); 5282 5283 if (IDVal == ".hsa_code_object_isa") 5284 return ParseDirectiveHSACodeObjectISA(); 5285 5286 if (IDVal == ".amd_kernel_code_t") 5287 return ParseDirectiveAMDKernelCodeT(); 5288 5289 if (IDVal == ".amdgpu_hsa_kernel") 5290 return ParseDirectiveAMDGPUHsaKernel(); 5291 5292 if (IDVal == ".amd_amdgpu_isa") 5293 return ParseDirectiveISAVersion(); 5294 5295 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5296 return ParseDirectiveHSAMetadata(); 5297 } 5298 5299 if (IDVal == ".amdgcn_target") 5300 return ParseDirectiveAMDGCNTarget(); 5301 5302 if (IDVal == ".amdgpu_lds") 5303 return ParseDirectiveAMDGPULDS(); 5304 5305 if (IDVal == PALMD::AssemblerDirectiveBegin) 5306 return ParseDirectivePALMetadataBegin(); 5307 5308 if (IDVal == PALMD::AssemblerDirective) 5309 return ParseDirectivePALMetadata(); 5310 5311 return true; 5312 } 5313 5314 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5315 unsigned RegNo) { 5316 5317 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5318 R.isValid(); ++R) { 5319 if (*R == RegNo) 5320 return isGFX9Plus(); 5321 } 5322 5323 // GFX10 has 2 more SGPRs 104 and 105. 5324 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5325 R.isValid(); ++R) { 5326 if (*R == RegNo) 5327 return hasSGPR104_SGPR105(); 5328 } 5329 5330 switch (RegNo) { 5331 case AMDGPU::SRC_SHARED_BASE: 5332 case AMDGPU::SRC_SHARED_LIMIT: 5333 case AMDGPU::SRC_PRIVATE_BASE: 5334 case AMDGPU::SRC_PRIVATE_LIMIT: 5335 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5336 return isGFX9Plus(); 5337 case AMDGPU::TBA: 5338 case AMDGPU::TBA_LO: 5339 case AMDGPU::TBA_HI: 5340 case AMDGPU::TMA: 5341 case AMDGPU::TMA_LO: 5342 case AMDGPU::TMA_HI: 5343 return !isGFX9Plus(); 5344 case AMDGPU::XNACK_MASK: 5345 case AMDGPU::XNACK_MASK_LO: 5346 case AMDGPU::XNACK_MASK_HI: 5347 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5348 case AMDGPU::SGPR_NULL: 5349 return isGFX10Plus(); 5350 default: 5351 break; 5352 } 5353 5354 if (isCI()) 5355 return true; 5356 5357 if (isSI() || isGFX10Plus()) { 5358 // No flat_scr on SI. 5359 // On GFX10 flat scratch is not a valid register operand and can only be 5360 // accessed with s_setreg/s_getreg. 5361 switch (RegNo) { 5362 case AMDGPU::FLAT_SCR: 5363 case AMDGPU::FLAT_SCR_LO: 5364 case AMDGPU::FLAT_SCR_HI: 5365 return false; 5366 default: 5367 return true; 5368 } 5369 } 5370 5371 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5372 // SI/CI have. 5373 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5374 R.isValid(); ++R) { 5375 if (*R == RegNo) 5376 return hasSGPR102_SGPR103(); 5377 } 5378 5379 return true; 5380 } 5381 5382 OperandMatchResultTy 5383 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5384 OperandMode Mode) { 5385 // Try to parse with a custom parser 5386 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5387 5388 // If we successfully parsed the operand or if there as an error parsing, 5389 // we are done. 5390 // 5391 // If we are parsing after we reach EndOfStatement then this means we 5392 // are appending default values to the Operands list. This is only done 5393 // by custom parser, so we shouldn't continue on to the generic parsing. 5394 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5395 isToken(AsmToken::EndOfStatement)) 5396 return ResTy; 5397 5398 SMLoc RBraceLoc; 5399 SMLoc LBraceLoc = getLoc(); 5400 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5401 unsigned Prefix = Operands.size(); 5402 5403 for (;;) { 5404 auto Loc = getLoc(); 5405 ResTy = parseReg(Operands); 5406 if (ResTy == MatchOperand_NoMatch) 5407 Error(Loc, "expected a register"); 5408 if (ResTy != MatchOperand_Success) 5409 return MatchOperand_ParseFail; 5410 5411 RBraceLoc = getLoc(); 5412 if (trySkipToken(AsmToken::RBrac)) 5413 break; 5414 5415 if (!skipToken(AsmToken::Comma, 5416 "expected a comma or a closing square bracket")) { 5417 return MatchOperand_ParseFail; 5418 } 5419 } 5420 5421 if (Operands.size() - Prefix > 1) { 5422 Operands.insert(Operands.begin() + Prefix, 5423 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5424 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5425 } 5426 5427 return MatchOperand_Success; 5428 } 5429 5430 return parseRegOrImm(Operands); 5431 } 5432 5433 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5434 // Clear any forced encodings from the previous instruction. 5435 setForcedEncodingSize(0); 5436 setForcedDPP(false); 5437 setForcedSDWA(false); 5438 5439 if (Name.endswith("_e64")) { 5440 setForcedEncodingSize(64); 5441 return Name.substr(0, Name.size() - 4); 5442 } else if (Name.endswith("_e32")) { 5443 setForcedEncodingSize(32); 5444 return Name.substr(0, Name.size() - 4); 5445 } else if (Name.endswith("_dpp")) { 5446 setForcedDPP(true); 5447 return Name.substr(0, Name.size() - 4); 5448 } else if (Name.endswith("_sdwa")) { 5449 setForcedSDWA(true); 5450 return Name.substr(0, Name.size() - 5); 5451 } 5452 return Name; 5453 } 5454 5455 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5456 StringRef Name, 5457 SMLoc NameLoc, OperandVector &Operands) { 5458 // Add the instruction mnemonic 5459 Name = parseMnemonicSuffix(Name); 5460 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5461 5462 bool IsMIMG = Name.startswith("image_"); 5463 5464 while (!trySkipToken(AsmToken::EndOfStatement)) { 5465 OperandMode Mode = OperandMode_Default; 5466 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5467 Mode = OperandMode_NSA; 5468 CPolSeen = 0; 5469 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5470 5471 if (Res != MatchOperand_Success) { 5472 checkUnsupportedInstruction(Name, NameLoc); 5473 if (!Parser.hasPendingError()) { 5474 // FIXME: use real operand location rather than the current location. 5475 StringRef Msg = 5476 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5477 "not a valid operand."; 5478 Error(getLoc(), Msg); 5479 } 5480 while (!trySkipToken(AsmToken::EndOfStatement)) { 5481 lex(); 5482 } 5483 return true; 5484 } 5485 5486 // Eat the comma or space if there is one. 5487 trySkipToken(AsmToken::Comma); 5488 } 5489 5490 return false; 5491 } 5492 5493 //===----------------------------------------------------------------------===// 5494 // Utility functions 5495 //===----------------------------------------------------------------------===// 5496 5497 OperandMatchResultTy 5498 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5499 5500 if (!trySkipId(Prefix, AsmToken::Colon)) 5501 return MatchOperand_NoMatch; 5502 5503 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5504 } 5505 5506 OperandMatchResultTy 5507 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5508 AMDGPUOperand::ImmTy ImmTy, 5509 bool (*ConvertResult)(int64_t&)) { 5510 SMLoc S = getLoc(); 5511 int64_t Value = 0; 5512 5513 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5514 if (Res != MatchOperand_Success) 5515 return Res; 5516 5517 if (ConvertResult && !ConvertResult(Value)) { 5518 Error(S, "invalid " + StringRef(Prefix) + " value."); 5519 } 5520 5521 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5522 return MatchOperand_Success; 5523 } 5524 5525 OperandMatchResultTy 5526 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5527 OperandVector &Operands, 5528 AMDGPUOperand::ImmTy ImmTy, 5529 bool (*ConvertResult)(int64_t&)) { 5530 SMLoc S = getLoc(); 5531 if (!trySkipId(Prefix, AsmToken::Colon)) 5532 return MatchOperand_NoMatch; 5533 5534 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5535 return MatchOperand_ParseFail; 5536 5537 unsigned Val = 0; 5538 const unsigned MaxSize = 4; 5539 5540 // FIXME: How to verify the number of elements matches the number of src 5541 // operands? 5542 for (int I = 0; ; ++I) { 5543 int64_t Op; 5544 SMLoc Loc = getLoc(); 5545 if (!parseExpr(Op)) 5546 return MatchOperand_ParseFail; 5547 5548 if (Op != 0 && Op != 1) { 5549 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5550 return MatchOperand_ParseFail; 5551 } 5552 5553 Val |= (Op << I); 5554 5555 if (trySkipToken(AsmToken::RBrac)) 5556 break; 5557 5558 if (I + 1 == MaxSize) { 5559 Error(getLoc(), "expected a closing square bracket"); 5560 return MatchOperand_ParseFail; 5561 } 5562 5563 if (!skipToken(AsmToken::Comma, "expected a comma")) 5564 return MatchOperand_ParseFail; 5565 } 5566 5567 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5568 return MatchOperand_Success; 5569 } 5570 5571 OperandMatchResultTy 5572 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5573 AMDGPUOperand::ImmTy ImmTy) { 5574 int64_t Bit; 5575 SMLoc S = getLoc(); 5576 5577 if (trySkipId(Name)) { 5578 Bit = 1; 5579 } else if (trySkipId("no", Name)) { 5580 Bit = 0; 5581 } else { 5582 return MatchOperand_NoMatch; 5583 } 5584 5585 if (Name == "r128" && !hasMIMG_R128()) { 5586 Error(S, "r128 modifier is not supported on this GPU"); 5587 return MatchOperand_ParseFail; 5588 } 5589 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5590 Error(S, "a16 modifier is not supported on this GPU"); 5591 return MatchOperand_ParseFail; 5592 } 5593 5594 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5595 ImmTy = AMDGPUOperand::ImmTyR128A16; 5596 5597 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5598 return MatchOperand_Success; 5599 } 5600 5601 OperandMatchResultTy 5602 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5603 unsigned CPolOn = 0; 5604 unsigned CPolOff = 0; 5605 SMLoc S = getLoc(); 5606 5607 if (trySkipId("glc")) 5608 CPolOn = AMDGPU::CPol::GLC; 5609 else if (trySkipId("noglc")) 5610 CPolOff = AMDGPU::CPol::GLC; 5611 else if (trySkipId("slc")) 5612 CPolOn = AMDGPU::CPol::SLC; 5613 else if (trySkipId("noslc")) 5614 CPolOff = AMDGPU::CPol::SLC; 5615 else if (trySkipId("dlc")) 5616 CPolOn = AMDGPU::CPol::DLC; 5617 else if (trySkipId("nodlc")) 5618 CPolOff = AMDGPU::CPol::DLC; 5619 else if (trySkipId("scc")) 5620 CPolOn = AMDGPU::CPol::SCC; 5621 else if (trySkipId("noscc")) 5622 CPolOff = AMDGPU::CPol::SCC; 5623 else 5624 return MatchOperand_NoMatch; 5625 5626 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5627 Error(S, "dlc modifier is not supported on this GPU"); 5628 return MatchOperand_ParseFail; 5629 } 5630 5631 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5632 Error(S, "scc modifier is not supported on this GPU"); 5633 return MatchOperand_ParseFail; 5634 } 5635 5636 if (CPolSeen & (CPolOn | CPolOff)) { 5637 Error(S, "duplicate cache policy modifier"); 5638 return MatchOperand_ParseFail; 5639 } 5640 5641 CPolSeen |= (CPolOn | CPolOff); 5642 5643 for (unsigned I = 1; I != Operands.size(); ++I) { 5644 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5645 if (Op.isCPol()) { 5646 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5647 return MatchOperand_Success; 5648 } 5649 } 5650 5651 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5652 AMDGPUOperand::ImmTyCPol)); 5653 5654 return MatchOperand_Success; 5655 } 5656 5657 static void addOptionalImmOperand( 5658 MCInst& Inst, const OperandVector& Operands, 5659 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5660 AMDGPUOperand::ImmTy ImmT, 5661 int64_t Default = 0) { 5662 auto i = OptionalIdx.find(ImmT); 5663 if (i != OptionalIdx.end()) { 5664 unsigned Idx = i->second; 5665 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5666 } else { 5667 Inst.addOperand(MCOperand::createImm(Default)); 5668 } 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5673 StringRef &Value, 5674 SMLoc &StringLoc) { 5675 if (!trySkipId(Prefix, AsmToken::Colon)) 5676 return MatchOperand_NoMatch; 5677 5678 StringLoc = getLoc(); 5679 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5680 : MatchOperand_ParseFail; 5681 } 5682 5683 //===----------------------------------------------------------------------===// 5684 // MTBUF format 5685 //===----------------------------------------------------------------------===// 5686 5687 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5688 int64_t MaxVal, 5689 int64_t &Fmt) { 5690 int64_t Val; 5691 SMLoc Loc = getLoc(); 5692 5693 auto Res = parseIntWithPrefix(Pref, Val); 5694 if (Res == MatchOperand_ParseFail) 5695 return false; 5696 if (Res == MatchOperand_NoMatch) 5697 return true; 5698 5699 if (Val < 0 || Val > MaxVal) { 5700 Error(Loc, Twine("out of range ", StringRef(Pref))); 5701 return false; 5702 } 5703 5704 Fmt = Val; 5705 return true; 5706 } 5707 5708 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5709 // values to live in a joint format operand in the MCInst encoding. 5710 OperandMatchResultTy 5711 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5712 using namespace llvm::AMDGPU::MTBUFFormat; 5713 5714 int64_t Dfmt = DFMT_UNDEF; 5715 int64_t Nfmt = NFMT_UNDEF; 5716 5717 // dfmt and nfmt can appear in either order, and each is optional. 5718 for (int I = 0; I < 2; ++I) { 5719 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5720 return MatchOperand_ParseFail; 5721 5722 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5723 return MatchOperand_ParseFail; 5724 } 5725 // Skip optional comma between dfmt/nfmt 5726 // but guard against 2 commas following each other. 5727 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5728 !peekToken().is(AsmToken::Comma)) { 5729 trySkipToken(AsmToken::Comma); 5730 } 5731 } 5732 5733 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5734 return MatchOperand_NoMatch; 5735 5736 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5737 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5738 5739 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5740 return MatchOperand_Success; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5745 using namespace llvm::AMDGPU::MTBUFFormat; 5746 5747 int64_t Fmt = UFMT_UNDEF; 5748 5749 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5750 return MatchOperand_ParseFail; 5751 5752 if (Fmt == UFMT_UNDEF) 5753 return MatchOperand_NoMatch; 5754 5755 Format = Fmt; 5756 return MatchOperand_Success; 5757 } 5758 5759 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5760 int64_t &Nfmt, 5761 StringRef FormatStr, 5762 SMLoc Loc) { 5763 using namespace llvm::AMDGPU::MTBUFFormat; 5764 int64_t Format; 5765 5766 Format = getDfmt(FormatStr); 5767 if (Format != DFMT_UNDEF) { 5768 Dfmt = Format; 5769 return true; 5770 } 5771 5772 Format = getNfmt(FormatStr, getSTI()); 5773 if (Format != NFMT_UNDEF) { 5774 Nfmt = Format; 5775 return true; 5776 } 5777 5778 Error(Loc, "unsupported format"); 5779 return false; 5780 } 5781 5782 OperandMatchResultTy 5783 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5784 SMLoc FormatLoc, 5785 int64_t &Format) { 5786 using namespace llvm::AMDGPU::MTBUFFormat; 5787 5788 int64_t Dfmt = DFMT_UNDEF; 5789 int64_t Nfmt = NFMT_UNDEF; 5790 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5791 return MatchOperand_ParseFail; 5792 5793 if (trySkipToken(AsmToken::Comma)) { 5794 StringRef Str; 5795 SMLoc Loc = getLoc(); 5796 if (!parseId(Str, "expected a format string") || 5797 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5798 return MatchOperand_ParseFail; 5799 } 5800 if (Dfmt == DFMT_UNDEF) { 5801 Error(Loc, "duplicate numeric format"); 5802 return MatchOperand_ParseFail; 5803 } else if (Nfmt == NFMT_UNDEF) { 5804 Error(Loc, "duplicate data format"); 5805 return MatchOperand_ParseFail; 5806 } 5807 } 5808 5809 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5810 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5811 5812 if (isGFX10Plus()) { 5813 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5814 if (Ufmt == UFMT_UNDEF) { 5815 Error(FormatLoc, "unsupported format"); 5816 return MatchOperand_ParseFail; 5817 } 5818 Format = Ufmt; 5819 } else { 5820 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5821 } 5822 5823 return MatchOperand_Success; 5824 } 5825 5826 OperandMatchResultTy 5827 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5828 SMLoc Loc, 5829 int64_t &Format) { 5830 using namespace llvm::AMDGPU::MTBUFFormat; 5831 5832 auto Id = getUnifiedFormat(FormatStr); 5833 if (Id == UFMT_UNDEF) 5834 return MatchOperand_NoMatch; 5835 5836 if (!isGFX10Plus()) { 5837 Error(Loc, "unified format is not supported on this GPU"); 5838 return MatchOperand_ParseFail; 5839 } 5840 5841 Format = Id; 5842 return MatchOperand_Success; 5843 } 5844 5845 OperandMatchResultTy 5846 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5847 using namespace llvm::AMDGPU::MTBUFFormat; 5848 SMLoc Loc = getLoc(); 5849 5850 if (!parseExpr(Format)) 5851 return MatchOperand_ParseFail; 5852 if (!isValidFormatEncoding(Format, getSTI())) { 5853 Error(Loc, "out of range format"); 5854 return MatchOperand_ParseFail; 5855 } 5856 5857 return MatchOperand_Success; 5858 } 5859 5860 OperandMatchResultTy 5861 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5862 using namespace llvm::AMDGPU::MTBUFFormat; 5863 5864 if (!trySkipId("format", AsmToken::Colon)) 5865 return MatchOperand_NoMatch; 5866 5867 if (trySkipToken(AsmToken::LBrac)) { 5868 StringRef FormatStr; 5869 SMLoc Loc = getLoc(); 5870 if (!parseId(FormatStr, "expected a format string")) 5871 return MatchOperand_ParseFail; 5872 5873 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5874 if (Res == MatchOperand_NoMatch) 5875 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5876 if (Res != MatchOperand_Success) 5877 return Res; 5878 5879 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5880 return MatchOperand_ParseFail; 5881 5882 return MatchOperand_Success; 5883 } 5884 5885 return parseNumericFormat(Format); 5886 } 5887 5888 OperandMatchResultTy 5889 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5890 using namespace llvm::AMDGPU::MTBUFFormat; 5891 5892 int64_t Format = getDefaultFormatEncoding(getSTI()); 5893 OperandMatchResultTy Res; 5894 SMLoc Loc = getLoc(); 5895 5896 // Parse legacy format syntax. 5897 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5898 if (Res == MatchOperand_ParseFail) 5899 return Res; 5900 5901 bool FormatFound = (Res == MatchOperand_Success); 5902 5903 Operands.push_back( 5904 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5905 5906 if (FormatFound) 5907 trySkipToken(AsmToken::Comma); 5908 5909 if (isToken(AsmToken::EndOfStatement)) { 5910 // We are expecting an soffset operand, 5911 // but let matcher handle the error. 5912 return MatchOperand_Success; 5913 } 5914 5915 // Parse soffset. 5916 Res = parseRegOrImm(Operands); 5917 if (Res != MatchOperand_Success) 5918 return Res; 5919 5920 trySkipToken(AsmToken::Comma); 5921 5922 if (!FormatFound) { 5923 Res = parseSymbolicOrNumericFormat(Format); 5924 if (Res == MatchOperand_ParseFail) 5925 return Res; 5926 if (Res == MatchOperand_Success) { 5927 auto Size = Operands.size(); 5928 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5929 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5930 Op.setImm(Format); 5931 } 5932 return MatchOperand_Success; 5933 } 5934 5935 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5936 Error(getLoc(), "duplicate format"); 5937 return MatchOperand_ParseFail; 5938 } 5939 return MatchOperand_Success; 5940 } 5941 5942 //===----------------------------------------------------------------------===// 5943 // ds 5944 //===----------------------------------------------------------------------===// 5945 5946 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5947 const OperandVector &Operands) { 5948 OptionalImmIndexMap OptionalIdx; 5949 5950 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5951 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5952 5953 // Add the register arguments 5954 if (Op.isReg()) { 5955 Op.addRegOperands(Inst, 1); 5956 continue; 5957 } 5958 5959 // Handle optional arguments 5960 OptionalIdx[Op.getImmTy()] = i; 5961 } 5962 5963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5966 5967 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5968 } 5969 5970 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5971 bool IsGdsHardcoded) { 5972 OptionalImmIndexMap OptionalIdx; 5973 5974 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5975 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5976 5977 // Add the register arguments 5978 if (Op.isReg()) { 5979 Op.addRegOperands(Inst, 1); 5980 continue; 5981 } 5982 5983 if (Op.isToken() && Op.getToken() == "gds") { 5984 IsGdsHardcoded = true; 5985 continue; 5986 } 5987 5988 // Handle optional arguments 5989 OptionalIdx[Op.getImmTy()] = i; 5990 } 5991 5992 AMDGPUOperand::ImmTy OffsetType = 5993 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5994 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5995 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5996 AMDGPUOperand::ImmTyOffset; 5997 5998 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5999 6000 if (!IsGdsHardcoded) { 6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6002 } 6003 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6004 } 6005 6006 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6007 OptionalImmIndexMap OptionalIdx; 6008 6009 unsigned OperandIdx[4]; 6010 unsigned EnMask = 0; 6011 int SrcIdx = 0; 6012 6013 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6014 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6015 6016 // Add the register arguments 6017 if (Op.isReg()) { 6018 assert(SrcIdx < 4); 6019 OperandIdx[SrcIdx] = Inst.size(); 6020 Op.addRegOperands(Inst, 1); 6021 ++SrcIdx; 6022 continue; 6023 } 6024 6025 if (Op.isOff()) { 6026 assert(SrcIdx < 4); 6027 OperandIdx[SrcIdx] = Inst.size(); 6028 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6029 ++SrcIdx; 6030 continue; 6031 } 6032 6033 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6034 Op.addImmOperands(Inst, 1); 6035 continue; 6036 } 6037 6038 if (Op.isToken() && Op.getToken() == "done") 6039 continue; 6040 6041 // Handle optional arguments 6042 OptionalIdx[Op.getImmTy()] = i; 6043 } 6044 6045 assert(SrcIdx == 4); 6046 6047 bool Compr = false; 6048 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6049 Compr = true; 6050 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6051 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6052 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6053 } 6054 6055 for (auto i = 0; i < SrcIdx; ++i) { 6056 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6057 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6058 } 6059 } 6060 6061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6063 6064 Inst.addOperand(MCOperand::createImm(EnMask)); 6065 } 6066 6067 //===----------------------------------------------------------------------===// 6068 // s_waitcnt 6069 //===----------------------------------------------------------------------===// 6070 6071 static bool 6072 encodeCnt( 6073 const AMDGPU::IsaVersion ISA, 6074 int64_t &IntVal, 6075 int64_t CntVal, 6076 bool Saturate, 6077 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6078 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6079 { 6080 bool Failed = false; 6081 6082 IntVal = encode(ISA, IntVal, CntVal); 6083 if (CntVal != decode(ISA, IntVal)) { 6084 if (Saturate) { 6085 IntVal = encode(ISA, IntVal, -1); 6086 } else { 6087 Failed = true; 6088 } 6089 } 6090 return Failed; 6091 } 6092 6093 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6094 6095 SMLoc CntLoc = getLoc(); 6096 StringRef CntName = getTokenStr(); 6097 6098 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6099 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6100 return false; 6101 6102 int64_t CntVal; 6103 SMLoc ValLoc = getLoc(); 6104 if (!parseExpr(CntVal)) 6105 return false; 6106 6107 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6108 6109 bool Failed = true; 6110 bool Sat = CntName.endswith("_sat"); 6111 6112 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6113 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6114 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6115 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6116 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6117 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6118 } else { 6119 Error(CntLoc, "invalid counter name " + CntName); 6120 return false; 6121 } 6122 6123 if (Failed) { 6124 Error(ValLoc, "too large value for " + CntName); 6125 return false; 6126 } 6127 6128 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6129 return false; 6130 6131 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6132 if (isToken(AsmToken::EndOfStatement)) { 6133 Error(getLoc(), "expected a counter name"); 6134 return false; 6135 } 6136 } 6137 6138 return true; 6139 } 6140 6141 OperandMatchResultTy 6142 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6143 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6144 int64_t Waitcnt = getWaitcntBitMask(ISA); 6145 SMLoc S = getLoc(); 6146 6147 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6148 while (!isToken(AsmToken::EndOfStatement)) { 6149 if (!parseCnt(Waitcnt)) 6150 return MatchOperand_ParseFail; 6151 } 6152 } else { 6153 if (!parseExpr(Waitcnt)) 6154 return MatchOperand_ParseFail; 6155 } 6156 6157 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6158 return MatchOperand_Success; 6159 } 6160 6161 bool 6162 AMDGPUOperand::isSWaitCnt() const { 6163 return isImm(); 6164 } 6165 6166 //===----------------------------------------------------------------------===// 6167 // hwreg 6168 //===----------------------------------------------------------------------===// 6169 6170 bool 6171 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6172 OperandInfoTy &Offset, 6173 OperandInfoTy &Width) { 6174 using namespace llvm::AMDGPU::Hwreg; 6175 6176 // The register may be specified by name or using a numeric code 6177 HwReg.Loc = getLoc(); 6178 if (isToken(AsmToken::Identifier) && 6179 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6180 HwReg.IsSymbolic = true; 6181 lex(); // skip register name 6182 } else if (!parseExpr(HwReg.Id, "a register name")) { 6183 return false; 6184 } 6185 6186 if (trySkipToken(AsmToken::RParen)) 6187 return true; 6188 6189 // parse optional params 6190 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6191 return false; 6192 6193 Offset.Loc = getLoc(); 6194 if (!parseExpr(Offset.Id)) 6195 return false; 6196 6197 if (!skipToken(AsmToken::Comma, "expected a comma")) 6198 return false; 6199 6200 Width.Loc = getLoc(); 6201 return parseExpr(Width.Id) && 6202 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6203 } 6204 6205 bool 6206 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6207 const OperandInfoTy &Offset, 6208 const OperandInfoTy &Width) { 6209 6210 using namespace llvm::AMDGPU::Hwreg; 6211 6212 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6213 Error(HwReg.Loc, 6214 "specified hardware register is not supported on this GPU"); 6215 return false; 6216 } 6217 if (!isValidHwreg(HwReg.Id)) { 6218 Error(HwReg.Loc, 6219 "invalid code of hardware register: only 6-bit values are legal"); 6220 return false; 6221 } 6222 if (!isValidHwregOffset(Offset.Id)) { 6223 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6224 return false; 6225 } 6226 if (!isValidHwregWidth(Width.Id)) { 6227 Error(Width.Loc, 6228 "invalid bitfield width: only values from 1 to 32 are legal"); 6229 return false; 6230 } 6231 return true; 6232 } 6233 6234 OperandMatchResultTy 6235 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6236 using namespace llvm::AMDGPU::Hwreg; 6237 6238 int64_t ImmVal = 0; 6239 SMLoc Loc = getLoc(); 6240 6241 if (trySkipId("hwreg", AsmToken::LParen)) { 6242 OperandInfoTy HwReg(ID_UNKNOWN_); 6243 OperandInfoTy Offset(OFFSET_DEFAULT_); 6244 OperandInfoTy Width(WIDTH_DEFAULT_); 6245 if (parseHwregBody(HwReg, Offset, Width) && 6246 validateHwreg(HwReg, Offset, Width)) { 6247 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6248 } else { 6249 return MatchOperand_ParseFail; 6250 } 6251 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6252 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6253 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6254 return MatchOperand_ParseFail; 6255 } 6256 } else { 6257 return MatchOperand_ParseFail; 6258 } 6259 6260 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6261 return MatchOperand_Success; 6262 } 6263 6264 bool AMDGPUOperand::isHwreg() const { 6265 return isImmTy(ImmTyHwreg); 6266 } 6267 6268 //===----------------------------------------------------------------------===// 6269 // sendmsg 6270 //===----------------------------------------------------------------------===// 6271 6272 bool 6273 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6274 OperandInfoTy &Op, 6275 OperandInfoTy &Stream) { 6276 using namespace llvm::AMDGPU::SendMsg; 6277 6278 Msg.Loc = getLoc(); 6279 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6280 Msg.IsSymbolic = true; 6281 lex(); // skip message name 6282 } else if (!parseExpr(Msg.Id, "a message name")) { 6283 return false; 6284 } 6285 6286 if (trySkipToken(AsmToken::Comma)) { 6287 Op.IsDefined = true; 6288 Op.Loc = getLoc(); 6289 if (isToken(AsmToken::Identifier) && 6290 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6291 lex(); // skip operation name 6292 } else if (!parseExpr(Op.Id, "an operation name")) { 6293 return false; 6294 } 6295 6296 if (trySkipToken(AsmToken::Comma)) { 6297 Stream.IsDefined = true; 6298 Stream.Loc = getLoc(); 6299 if (!parseExpr(Stream.Id)) 6300 return false; 6301 } 6302 } 6303 6304 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6305 } 6306 6307 bool 6308 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6309 const OperandInfoTy &Op, 6310 const OperandInfoTy &Stream) { 6311 using namespace llvm::AMDGPU::SendMsg; 6312 6313 // Validation strictness depends on whether message is specified 6314 // in a symbolc or in a numeric form. In the latter case 6315 // only encoding possibility is checked. 6316 bool Strict = Msg.IsSymbolic; 6317 6318 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6319 Error(Msg.Loc, "invalid message id"); 6320 return false; 6321 } 6322 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6323 if (Op.IsDefined) { 6324 Error(Op.Loc, "message does not support operations"); 6325 } else { 6326 Error(Msg.Loc, "missing message operation"); 6327 } 6328 return false; 6329 } 6330 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6331 Error(Op.Loc, "invalid operation id"); 6332 return false; 6333 } 6334 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6335 Error(Stream.Loc, "message operation does not support streams"); 6336 return false; 6337 } 6338 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6339 Error(Stream.Loc, "invalid message stream id"); 6340 return false; 6341 } 6342 return true; 6343 } 6344 6345 OperandMatchResultTy 6346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6347 using namespace llvm::AMDGPU::SendMsg; 6348 6349 int64_t ImmVal = 0; 6350 SMLoc Loc = getLoc(); 6351 6352 if (trySkipId("sendmsg", AsmToken::LParen)) { 6353 OperandInfoTy Msg(ID_UNKNOWN_); 6354 OperandInfoTy Op(OP_NONE_); 6355 OperandInfoTy Stream(STREAM_ID_NONE_); 6356 if (parseSendMsgBody(Msg, Op, Stream) && 6357 validateSendMsg(Msg, Op, Stream)) { 6358 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6359 } else { 6360 return MatchOperand_ParseFail; 6361 } 6362 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6363 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6364 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6365 return MatchOperand_ParseFail; 6366 } 6367 } else { 6368 return MatchOperand_ParseFail; 6369 } 6370 6371 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6372 return MatchOperand_Success; 6373 } 6374 6375 bool AMDGPUOperand::isSendMsg() const { 6376 return isImmTy(ImmTySendMsg); 6377 } 6378 6379 //===----------------------------------------------------------------------===// 6380 // v_interp 6381 //===----------------------------------------------------------------------===// 6382 6383 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6384 StringRef Str; 6385 SMLoc S = getLoc(); 6386 6387 if (!parseId(Str)) 6388 return MatchOperand_NoMatch; 6389 6390 int Slot = StringSwitch<int>(Str) 6391 .Case("p10", 0) 6392 .Case("p20", 1) 6393 .Case("p0", 2) 6394 .Default(-1); 6395 6396 if (Slot == -1) { 6397 Error(S, "invalid interpolation slot"); 6398 return MatchOperand_ParseFail; 6399 } 6400 6401 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6402 AMDGPUOperand::ImmTyInterpSlot)); 6403 return MatchOperand_Success; 6404 } 6405 6406 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6407 StringRef Str; 6408 SMLoc S = getLoc(); 6409 6410 if (!parseId(Str)) 6411 return MatchOperand_NoMatch; 6412 6413 if (!Str.startswith("attr")) { 6414 Error(S, "invalid interpolation attribute"); 6415 return MatchOperand_ParseFail; 6416 } 6417 6418 StringRef Chan = Str.take_back(2); 6419 int AttrChan = StringSwitch<int>(Chan) 6420 .Case(".x", 0) 6421 .Case(".y", 1) 6422 .Case(".z", 2) 6423 .Case(".w", 3) 6424 .Default(-1); 6425 if (AttrChan == -1) { 6426 Error(S, "invalid or missing interpolation attribute channel"); 6427 return MatchOperand_ParseFail; 6428 } 6429 6430 Str = Str.drop_back(2).drop_front(4); 6431 6432 uint8_t Attr; 6433 if (Str.getAsInteger(10, Attr)) { 6434 Error(S, "invalid or missing interpolation attribute number"); 6435 return MatchOperand_ParseFail; 6436 } 6437 6438 if (Attr > 63) { 6439 Error(S, "out of bounds interpolation attribute number"); 6440 return MatchOperand_ParseFail; 6441 } 6442 6443 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6444 6445 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6446 AMDGPUOperand::ImmTyInterpAttr)); 6447 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6448 AMDGPUOperand::ImmTyAttrChan)); 6449 return MatchOperand_Success; 6450 } 6451 6452 //===----------------------------------------------------------------------===// 6453 // exp 6454 //===----------------------------------------------------------------------===// 6455 6456 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6457 using namespace llvm::AMDGPU::Exp; 6458 6459 StringRef Str; 6460 SMLoc S = getLoc(); 6461 6462 if (!parseId(Str)) 6463 return MatchOperand_NoMatch; 6464 6465 unsigned Id = getTgtId(Str); 6466 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6467 Error(S, (Id == ET_INVALID) ? 6468 "invalid exp target" : 6469 "exp target is not supported on this GPU"); 6470 return MatchOperand_ParseFail; 6471 } 6472 6473 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6474 AMDGPUOperand::ImmTyExpTgt)); 6475 return MatchOperand_Success; 6476 } 6477 6478 //===----------------------------------------------------------------------===// 6479 // parser helpers 6480 //===----------------------------------------------------------------------===// 6481 6482 bool 6483 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6484 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6485 } 6486 6487 bool 6488 AMDGPUAsmParser::isId(const StringRef Id) const { 6489 return isId(getToken(), Id); 6490 } 6491 6492 bool 6493 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6494 return getTokenKind() == Kind; 6495 } 6496 6497 bool 6498 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6499 if (isId(Id)) { 6500 lex(); 6501 return true; 6502 } 6503 return false; 6504 } 6505 6506 bool 6507 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6508 if (isToken(AsmToken::Identifier)) { 6509 StringRef Tok = getTokenStr(); 6510 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6511 lex(); 6512 return true; 6513 } 6514 } 6515 return false; 6516 } 6517 6518 bool 6519 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6520 if (isId(Id) && peekToken().is(Kind)) { 6521 lex(); 6522 lex(); 6523 return true; 6524 } 6525 return false; 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6530 if (isToken(Kind)) { 6531 lex(); 6532 return true; 6533 } 6534 return false; 6535 } 6536 6537 bool 6538 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6539 const StringRef ErrMsg) { 6540 if (!trySkipToken(Kind)) { 6541 Error(getLoc(), ErrMsg); 6542 return false; 6543 } 6544 return true; 6545 } 6546 6547 bool 6548 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6549 SMLoc S = getLoc(); 6550 6551 const MCExpr *Expr; 6552 if (Parser.parseExpression(Expr)) 6553 return false; 6554 6555 if (Expr->evaluateAsAbsolute(Imm)) 6556 return true; 6557 6558 if (Expected.empty()) { 6559 Error(S, "expected absolute expression"); 6560 } else { 6561 Error(S, Twine("expected ", Expected) + 6562 Twine(" or an absolute expression")); 6563 } 6564 return false; 6565 } 6566 6567 bool 6568 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6569 SMLoc S = getLoc(); 6570 6571 const MCExpr *Expr; 6572 if (Parser.parseExpression(Expr)) 6573 return false; 6574 6575 int64_t IntVal; 6576 if (Expr->evaluateAsAbsolute(IntVal)) { 6577 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6578 } else { 6579 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6580 } 6581 return true; 6582 } 6583 6584 bool 6585 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6586 if (isToken(AsmToken::String)) { 6587 Val = getToken().getStringContents(); 6588 lex(); 6589 return true; 6590 } else { 6591 Error(getLoc(), ErrMsg); 6592 return false; 6593 } 6594 } 6595 6596 bool 6597 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6598 if (isToken(AsmToken::Identifier)) { 6599 Val = getTokenStr(); 6600 lex(); 6601 return true; 6602 } else { 6603 if (!ErrMsg.empty()) 6604 Error(getLoc(), ErrMsg); 6605 return false; 6606 } 6607 } 6608 6609 AsmToken 6610 AMDGPUAsmParser::getToken() const { 6611 return Parser.getTok(); 6612 } 6613 6614 AsmToken 6615 AMDGPUAsmParser::peekToken() { 6616 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6617 } 6618 6619 void 6620 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6621 auto TokCount = getLexer().peekTokens(Tokens); 6622 6623 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6624 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6625 } 6626 6627 AsmToken::TokenKind 6628 AMDGPUAsmParser::getTokenKind() const { 6629 return getLexer().getKind(); 6630 } 6631 6632 SMLoc 6633 AMDGPUAsmParser::getLoc() const { 6634 return getToken().getLoc(); 6635 } 6636 6637 StringRef 6638 AMDGPUAsmParser::getTokenStr() const { 6639 return getToken().getString(); 6640 } 6641 6642 void 6643 AMDGPUAsmParser::lex() { 6644 Parser.Lex(); 6645 } 6646 6647 SMLoc 6648 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6649 const OperandVector &Operands) const { 6650 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6651 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6652 if (Test(Op)) 6653 return Op.getStartLoc(); 6654 } 6655 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6656 } 6657 6658 SMLoc 6659 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6660 const OperandVector &Operands) const { 6661 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6662 return getOperandLoc(Test, Operands); 6663 } 6664 6665 SMLoc 6666 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6667 const OperandVector &Operands) const { 6668 auto Test = [=](const AMDGPUOperand& Op) { 6669 return Op.isRegKind() && Op.getReg() == Reg; 6670 }; 6671 return getOperandLoc(Test, Operands); 6672 } 6673 6674 SMLoc 6675 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6676 auto Test = [](const AMDGPUOperand& Op) { 6677 return Op.IsImmKindLiteral() || Op.isExpr(); 6678 }; 6679 return getOperandLoc(Test, Operands); 6680 } 6681 6682 SMLoc 6683 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6684 auto Test = [](const AMDGPUOperand& Op) { 6685 return Op.isImmKindConst(); 6686 }; 6687 return getOperandLoc(Test, Operands); 6688 } 6689 6690 //===----------------------------------------------------------------------===// 6691 // swizzle 6692 //===----------------------------------------------------------------------===// 6693 6694 LLVM_READNONE 6695 static unsigned 6696 encodeBitmaskPerm(const unsigned AndMask, 6697 const unsigned OrMask, 6698 const unsigned XorMask) { 6699 using namespace llvm::AMDGPU::Swizzle; 6700 6701 return BITMASK_PERM_ENC | 6702 (AndMask << BITMASK_AND_SHIFT) | 6703 (OrMask << BITMASK_OR_SHIFT) | 6704 (XorMask << BITMASK_XOR_SHIFT); 6705 } 6706 6707 bool 6708 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6709 const unsigned MinVal, 6710 const unsigned MaxVal, 6711 const StringRef ErrMsg, 6712 SMLoc &Loc) { 6713 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6714 return false; 6715 } 6716 Loc = getLoc(); 6717 if (!parseExpr(Op)) { 6718 return false; 6719 } 6720 if (Op < MinVal || Op > MaxVal) { 6721 Error(Loc, ErrMsg); 6722 return false; 6723 } 6724 6725 return true; 6726 } 6727 6728 bool 6729 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6730 const unsigned MinVal, 6731 const unsigned MaxVal, 6732 const StringRef ErrMsg) { 6733 SMLoc Loc; 6734 for (unsigned i = 0; i < OpNum; ++i) { 6735 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6736 return false; 6737 } 6738 6739 return true; 6740 } 6741 6742 bool 6743 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6744 using namespace llvm::AMDGPU::Swizzle; 6745 6746 int64_t Lane[LANE_NUM]; 6747 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6748 "expected a 2-bit lane id")) { 6749 Imm = QUAD_PERM_ENC; 6750 for (unsigned I = 0; I < LANE_NUM; ++I) { 6751 Imm |= Lane[I] << (LANE_SHIFT * I); 6752 } 6753 return true; 6754 } 6755 return false; 6756 } 6757 6758 bool 6759 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6760 using namespace llvm::AMDGPU::Swizzle; 6761 6762 SMLoc Loc; 6763 int64_t GroupSize; 6764 int64_t LaneIdx; 6765 6766 if (!parseSwizzleOperand(GroupSize, 6767 2, 32, 6768 "group size must be in the interval [2,32]", 6769 Loc)) { 6770 return false; 6771 } 6772 if (!isPowerOf2_64(GroupSize)) { 6773 Error(Loc, "group size must be a power of two"); 6774 return false; 6775 } 6776 if (parseSwizzleOperand(LaneIdx, 6777 0, GroupSize - 1, 6778 "lane id must be in the interval [0,group size - 1]", 6779 Loc)) { 6780 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6781 return true; 6782 } 6783 return false; 6784 } 6785 6786 bool 6787 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6788 using namespace llvm::AMDGPU::Swizzle; 6789 6790 SMLoc Loc; 6791 int64_t GroupSize; 6792 6793 if (!parseSwizzleOperand(GroupSize, 6794 2, 32, 6795 "group size must be in the interval [2,32]", 6796 Loc)) { 6797 return false; 6798 } 6799 if (!isPowerOf2_64(GroupSize)) { 6800 Error(Loc, "group size must be a power of two"); 6801 return false; 6802 } 6803 6804 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6805 return true; 6806 } 6807 6808 bool 6809 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6810 using namespace llvm::AMDGPU::Swizzle; 6811 6812 SMLoc Loc; 6813 int64_t GroupSize; 6814 6815 if (!parseSwizzleOperand(GroupSize, 6816 1, 16, 6817 "group size must be in the interval [1,16]", 6818 Loc)) { 6819 return false; 6820 } 6821 if (!isPowerOf2_64(GroupSize)) { 6822 Error(Loc, "group size must be a power of two"); 6823 return false; 6824 } 6825 6826 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6827 return true; 6828 } 6829 6830 bool 6831 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6832 using namespace llvm::AMDGPU::Swizzle; 6833 6834 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6835 return false; 6836 } 6837 6838 StringRef Ctl; 6839 SMLoc StrLoc = getLoc(); 6840 if (!parseString(Ctl)) { 6841 return false; 6842 } 6843 if (Ctl.size() != BITMASK_WIDTH) { 6844 Error(StrLoc, "expected a 5-character mask"); 6845 return false; 6846 } 6847 6848 unsigned AndMask = 0; 6849 unsigned OrMask = 0; 6850 unsigned XorMask = 0; 6851 6852 for (size_t i = 0; i < Ctl.size(); ++i) { 6853 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6854 switch(Ctl[i]) { 6855 default: 6856 Error(StrLoc, "invalid mask"); 6857 return false; 6858 case '0': 6859 break; 6860 case '1': 6861 OrMask |= Mask; 6862 break; 6863 case 'p': 6864 AndMask |= Mask; 6865 break; 6866 case 'i': 6867 AndMask |= Mask; 6868 XorMask |= Mask; 6869 break; 6870 } 6871 } 6872 6873 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6874 return true; 6875 } 6876 6877 bool 6878 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6879 6880 SMLoc OffsetLoc = getLoc(); 6881 6882 if (!parseExpr(Imm, "a swizzle macro")) { 6883 return false; 6884 } 6885 if (!isUInt<16>(Imm)) { 6886 Error(OffsetLoc, "expected a 16-bit offset"); 6887 return false; 6888 } 6889 return true; 6890 } 6891 6892 bool 6893 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6894 using namespace llvm::AMDGPU::Swizzle; 6895 6896 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6897 6898 SMLoc ModeLoc = getLoc(); 6899 bool Ok = false; 6900 6901 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6902 Ok = parseSwizzleQuadPerm(Imm); 6903 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6904 Ok = parseSwizzleBitmaskPerm(Imm); 6905 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6906 Ok = parseSwizzleBroadcast(Imm); 6907 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6908 Ok = parseSwizzleSwap(Imm); 6909 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6910 Ok = parseSwizzleReverse(Imm); 6911 } else { 6912 Error(ModeLoc, "expected a swizzle mode"); 6913 } 6914 6915 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6916 } 6917 6918 return false; 6919 } 6920 6921 OperandMatchResultTy 6922 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6923 SMLoc S = getLoc(); 6924 int64_t Imm = 0; 6925 6926 if (trySkipId("offset")) { 6927 6928 bool Ok = false; 6929 if (skipToken(AsmToken::Colon, "expected a colon")) { 6930 if (trySkipId("swizzle")) { 6931 Ok = parseSwizzleMacro(Imm); 6932 } else { 6933 Ok = parseSwizzleOffset(Imm); 6934 } 6935 } 6936 6937 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6938 6939 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6940 } else { 6941 // Swizzle "offset" operand is optional. 6942 // If it is omitted, try parsing other optional operands. 6943 return parseOptionalOpr(Operands); 6944 } 6945 } 6946 6947 bool 6948 AMDGPUOperand::isSwizzle() const { 6949 return isImmTy(ImmTySwizzle); 6950 } 6951 6952 //===----------------------------------------------------------------------===// 6953 // VGPR Index Mode 6954 //===----------------------------------------------------------------------===// 6955 6956 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6957 6958 using namespace llvm::AMDGPU::VGPRIndexMode; 6959 6960 if (trySkipToken(AsmToken::RParen)) { 6961 return OFF; 6962 } 6963 6964 int64_t Imm = 0; 6965 6966 while (true) { 6967 unsigned Mode = 0; 6968 SMLoc S = getLoc(); 6969 6970 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6971 if (trySkipId(IdSymbolic[ModeId])) { 6972 Mode = 1 << ModeId; 6973 break; 6974 } 6975 } 6976 6977 if (Mode == 0) { 6978 Error(S, (Imm == 0)? 6979 "expected a VGPR index mode or a closing parenthesis" : 6980 "expected a VGPR index mode"); 6981 return UNDEF; 6982 } 6983 6984 if (Imm & Mode) { 6985 Error(S, "duplicate VGPR index mode"); 6986 return UNDEF; 6987 } 6988 Imm |= Mode; 6989 6990 if (trySkipToken(AsmToken::RParen)) 6991 break; 6992 if (!skipToken(AsmToken::Comma, 6993 "expected a comma or a closing parenthesis")) 6994 return UNDEF; 6995 } 6996 6997 return Imm; 6998 } 6999 7000 OperandMatchResultTy 7001 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7002 7003 using namespace llvm::AMDGPU::VGPRIndexMode; 7004 7005 int64_t Imm = 0; 7006 SMLoc S = getLoc(); 7007 7008 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7009 Imm = parseGPRIdxMacro(); 7010 if (Imm == UNDEF) 7011 return MatchOperand_ParseFail; 7012 } else { 7013 if (getParser().parseAbsoluteExpression(Imm)) 7014 return MatchOperand_ParseFail; 7015 if (Imm < 0 || !isUInt<4>(Imm)) { 7016 Error(S, "invalid immediate: only 4-bit values are legal"); 7017 return MatchOperand_ParseFail; 7018 } 7019 } 7020 7021 Operands.push_back( 7022 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7023 return MatchOperand_Success; 7024 } 7025 7026 bool AMDGPUOperand::isGPRIdxMode() const { 7027 return isImmTy(ImmTyGprIdxMode); 7028 } 7029 7030 //===----------------------------------------------------------------------===// 7031 // sopp branch targets 7032 //===----------------------------------------------------------------------===// 7033 7034 OperandMatchResultTy 7035 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7036 7037 // Make sure we are not parsing something 7038 // that looks like a label or an expression but is not. 7039 // This will improve error messages. 7040 if (isRegister() || isModifier()) 7041 return MatchOperand_NoMatch; 7042 7043 if (!parseExpr(Operands)) 7044 return MatchOperand_ParseFail; 7045 7046 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7047 assert(Opr.isImm() || Opr.isExpr()); 7048 SMLoc Loc = Opr.getStartLoc(); 7049 7050 // Currently we do not support arbitrary expressions as branch targets. 7051 // Only labels and absolute expressions are accepted. 7052 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7053 Error(Loc, "expected an absolute expression or a label"); 7054 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7055 Error(Loc, "expected a 16-bit signed jump offset"); 7056 } 7057 7058 return MatchOperand_Success; 7059 } 7060 7061 //===----------------------------------------------------------------------===// 7062 // Boolean holding registers 7063 //===----------------------------------------------------------------------===// 7064 7065 OperandMatchResultTy 7066 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7067 return parseReg(Operands); 7068 } 7069 7070 //===----------------------------------------------------------------------===// 7071 // mubuf 7072 //===----------------------------------------------------------------------===// 7073 7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7075 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7076 } 7077 7078 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7079 const OperandVector &Operands, 7080 bool IsAtomic, 7081 bool IsLds) { 7082 bool IsLdsOpcode = IsLds; 7083 bool HasLdsModifier = false; 7084 OptionalImmIndexMap OptionalIdx; 7085 unsigned FirstOperandIdx = 1; 7086 bool IsAtomicReturn = false; 7087 7088 if (IsAtomic) { 7089 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7090 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7091 if (!Op.isCPol()) 7092 continue; 7093 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7094 break; 7095 } 7096 7097 if (!IsAtomicReturn) { 7098 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7099 if (NewOpc != -1) 7100 Inst.setOpcode(NewOpc); 7101 } 7102 7103 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7104 SIInstrFlags::IsAtomicRet; 7105 } 7106 7107 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7108 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7109 7110 // Add the register arguments 7111 if (Op.isReg()) { 7112 Op.addRegOperands(Inst, 1); 7113 // Insert a tied src for atomic return dst. 7114 // This cannot be postponed as subsequent calls to 7115 // addImmOperands rely on correct number of MC operands. 7116 if (IsAtomicReturn && i == FirstOperandIdx) 7117 Op.addRegOperands(Inst, 1); 7118 continue; 7119 } 7120 7121 // Handle the case where soffset is an immediate 7122 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7123 Op.addImmOperands(Inst, 1); 7124 continue; 7125 } 7126 7127 HasLdsModifier |= Op.isLDS(); 7128 7129 // Handle tokens like 'offen' which are sometimes hard-coded into the 7130 // asm string. There are no MCInst operands for these. 7131 if (Op.isToken()) { 7132 continue; 7133 } 7134 assert(Op.isImm()); 7135 7136 // Handle optional arguments 7137 OptionalIdx[Op.getImmTy()] = i; 7138 } 7139 7140 // This is a workaround for an llvm quirk which may result in an 7141 // incorrect instruction selection. Lds and non-lds versions of 7142 // MUBUF instructions are identical except that lds versions 7143 // have mandatory 'lds' modifier. However this modifier follows 7144 // optional modifiers and llvm asm matcher regards this 'lds' 7145 // modifier as an optional one. As a result, an lds version 7146 // of opcode may be selected even if it has no 'lds' modifier. 7147 if (IsLdsOpcode && !HasLdsModifier) { 7148 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7149 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7150 Inst.setOpcode(NoLdsOpcode); 7151 IsLdsOpcode = false; 7152 } 7153 } 7154 7155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7157 7158 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7160 } 7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7162 } 7163 7164 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7165 OptionalImmIndexMap OptionalIdx; 7166 7167 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7168 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7169 7170 // Add the register arguments 7171 if (Op.isReg()) { 7172 Op.addRegOperands(Inst, 1); 7173 continue; 7174 } 7175 7176 // Handle the case where soffset is an immediate 7177 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7178 Op.addImmOperands(Inst, 1); 7179 continue; 7180 } 7181 7182 // Handle tokens like 'offen' which are sometimes hard-coded into the 7183 // asm string. There are no MCInst operands for these. 7184 if (Op.isToken()) { 7185 continue; 7186 } 7187 assert(Op.isImm()); 7188 7189 // Handle optional arguments 7190 OptionalIdx[Op.getImmTy()] = i; 7191 } 7192 7193 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7194 AMDGPUOperand::ImmTyOffset); 7195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7199 } 7200 7201 //===----------------------------------------------------------------------===// 7202 // mimg 7203 //===----------------------------------------------------------------------===// 7204 7205 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7206 bool IsAtomic) { 7207 unsigned I = 1; 7208 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7209 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7210 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7211 } 7212 7213 if (IsAtomic) { 7214 // Add src, same as dst 7215 assert(Desc.getNumDefs() == 1); 7216 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7217 } 7218 7219 OptionalImmIndexMap OptionalIdx; 7220 7221 for (unsigned E = Operands.size(); I != E; ++I) { 7222 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7223 7224 // Add the register arguments 7225 if (Op.isReg()) { 7226 Op.addRegOperands(Inst, 1); 7227 } else if (Op.isImmModifier()) { 7228 OptionalIdx[Op.getImmTy()] = I; 7229 } else if (!Op.isToken()) { 7230 llvm_unreachable("unexpected operand type"); 7231 } 7232 } 7233 7234 bool IsGFX10Plus = isGFX10Plus(); 7235 7236 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7237 if (IsGFX10Plus) 7238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7242 if (IsGFX10Plus) 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7244 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7247 if (!IsGFX10Plus) 7248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7250 } 7251 7252 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7253 cvtMIMG(Inst, Operands, true); 7254 } 7255 7256 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7257 OptionalImmIndexMap OptionalIdx; 7258 bool IsAtomicReturn = false; 7259 7260 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7261 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7262 if (!Op.isCPol()) 7263 continue; 7264 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7265 break; 7266 } 7267 7268 if (!IsAtomicReturn) { 7269 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7270 if (NewOpc != -1) 7271 Inst.setOpcode(NewOpc); 7272 } 7273 7274 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7275 SIInstrFlags::IsAtomicRet; 7276 7277 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7278 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7279 7280 // Add the register arguments 7281 if (Op.isReg()) { 7282 Op.addRegOperands(Inst, 1); 7283 if (IsAtomicReturn && i == 1) 7284 Op.addRegOperands(Inst, 1); 7285 continue; 7286 } 7287 7288 // Handle the case where soffset is an immediate 7289 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7290 Op.addImmOperands(Inst, 1); 7291 continue; 7292 } 7293 7294 // Handle tokens like 'offen' which are sometimes hard-coded into the 7295 // asm string. There are no MCInst operands for these. 7296 if (Op.isToken()) { 7297 continue; 7298 } 7299 assert(Op.isImm()); 7300 7301 // Handle optional arguments 7302 OptionalIdx[Op.getImmTy()] = i; 7303 } 7304 7305 if ((int)Inst.getNumOperands() <= 7306 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7309 } 7310 7311 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7312 const OperandVector &Operands) { 7313 for (unsigned I = 1; I < Operands.size(); ++I) { 7314 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7315 if (Operand.isReg()) 7316 Operand.addRegOperands(Inst, 1); 7317 } 7318 7319 Inst.addOperand(MCOperand::createImm(1)); // a16 7320 } 7321 7322 //===----------------------------------------------------------------------===// 7323 // smrd 7324 //===----------------------------------------------------------------------===// 7325 7326 bool AMDGPUOperand::isSMRDOffset8() const { 7327 return isImm() && isUInt<8>(getImm()); 7328 } 7329 7330 bool AMDGPUOperand::isSMEMOffset() const { 7331 return isImm(); // Offset range is checked later by validator. 7332 } 7333 7334 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7335 // 32-bit literals are only supported on CI and we only want to use them 7336 // when the offset is > 8-bits. 7337 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7338 } 7339 7340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7341 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7342 } 7343 7344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7345 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7346 } 7347 7348 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7349 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7350 } 7351 7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7353 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7354 } 7355 7356 //===----------------------------------------------------------------------===// 7357 // vop3 7358 //===----------------------------------------------------------------------===// 7359 7360 static bool ConvertOmodMul(int64_t &Mul) { 7361 if (Mul != 1 && Mul != 2 && Mul != 4) 7362 return false; 7363 7364 Mul >>= 1; 7365 return true; 7366 } 7367 7368 static bool ConvertOmodDiv(int64_t &Div) { 7369 if (Div == 1) { 7370 Div = 0; 7371 return true; 7372 } 7373 7374 if (Div == 2) { 7375 Div = 3; 7376 return true; 7377 } 7378 7379 return false; 7380 } 7381 7382 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7383 // This is intentional and ensures compatibility with sp3. 7384 // See bug 35397 for details. 7385 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7386 if (BoundCtrl == 0 || BoundCtrl == 1) { 7387 BoundCtrl = 1; 7388 return true; 7389 } 7390 return false; 7391 } 7392 7393 // Note: the order in this table matches the order of operands in AsmString. 7394 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7395 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7396 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7397 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7398 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7399 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7400 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7401 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7402 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7403 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7404 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7405 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7406 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7407 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7408 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7409 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7410 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7411 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7412 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7413 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7414 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7415 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7416 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7417 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7418 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7419 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7420 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7421 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7422 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7423 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7424 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7425 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7426 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7427 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7428 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7429 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7430 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7431 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7432 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7433 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7434 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7435 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7436 }; 7437 7438 void AMDGPUAsmParser::onBeginOfFile() { 7439 if (!getParser().getStreamer().getTargetStreamer() || 7440 getSTI().getTargetTriple().getArch() == Triple::r600) 7441 return; 7442 7443 if (!getTargetStreamer().getTargetID()) 7444 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7445 7446 if (isHsaAbiVersion3AndAbove(&getSTI())) 7447 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7448 } 7449 7450 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7451 7452 OperandMatchResultTy res = parseOptionalOpr(Operands); 7453 7454 // This is a hack to enable hardcoded mandatory operands which follow 7455 // optional operands. 7456 // 7457 // Current design assumes that all operands after the first optional operand 7458 // are also optional. However implementation of some instructions violates 7459 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7460 // 7461 // To alleviate this problem, we have to (implicitly) parse extra operands 7462 // to make sure autogenerated parser of custom operands never hit hardcoded 7463 // mandatory operands. 7464 7465 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7466 if (res != MatchOperand_Success || 7467 isToken(AsmToken::EndOfStatement)) 7468 break; 7469 7470 trySkipToken(AsmToken::Comma); 7471 res = parseOptionalOpr(Operands); 7472 } 7473 7474 return res; 7475 } 7476 7477 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7478 OperandMatchResultTy res; 7479 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7480 // try to parse any optional operand here 7481 if (Op.IsBit) { 7482 res = parseNamedBit(Op.Name, Operands, Op.Type); 7483 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7484 res = parseOModOperand(Operands); 7485 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7486 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7487 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7488 res = parseSDWASel(Operands, Op.Name, Op.Type); 7489 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7490 res = parseSDWADstUnused(Operands); 7491 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7492 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7493 Op.Type == AMDGPUOperand::ImmTyNegLo || 7494 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7495 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7496 Op.ConvertResult); 7497 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7498 res = parseDim(Operands); 7499 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7500 res = parseCPol(Operands); 7501 } else { 7502 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7503 } 7504 if (res != MatchOperand_NoMatch) { 7505 return res; 7506 } 7507 } 7508 return MatchOperand_NoMatch; 7509 } 7510 7511 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7512 StringRef Name = getTokenStr(); 7513 if (Name == "mul") { 7514 return parseIntWithPrefix("mul", Operands, 7515 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7516 } 7517 7518 if (Name == "div") { 7519 return parseIntWithPrefix("div", Operands, 7520 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7521 } 7522 7523 return MatchOperand_NoMatch; 7524 } 7525 7526 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7527 cvtVOP3P(Inst, Operands); 7528 7529 int Opc = Inst.getOpcode(); 7530 7531 int SrcNum; 7532 const int Ops[] = { AMDGPU::OpName::src0, 7533 AMDGPU::OpName::src1, 7534 AMDGPU::OpName::src2 }; 7535 for (SrcNum = 0; 7536 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7537 ++SrcNum); 7538 assert(SrcNum > 0); 7539 7540 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7541 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7542 7543 if ((OpSel & (1 << SrcNum)) != 0) { 7544 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7545 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7546 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7547 } 7548 } 7549 7550 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7551 // 1. This operand is input modifiers 7552 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7553 // 2. This is not last operand 7554 && Desc.NumOperands > (OpNum + 1) 7555 // 3. Next operand is register class 7556 && Desc.OpInfo[OpNum + 1].RegClass != -1 7557 // 4. Next register is not tied to any other operand 7558 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7559 } 7560 7561 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7562 { 7563 OptionalImmIndexMap OptionalIdx; 7564 unsigned Opc = Inst.getOpcode(); 7565 7566 unsigned I = 1; 7567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7568 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7569 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7570 } 7571 7572 for (unsigned E = Operands.size(); I != E; ++I) { 7573 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7574 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7575 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7576 } else if (Op.isInterpSlot() || 7577 Op.isInterpAttr() || 7578 Op.isAttrChan()) { 7579 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7580 } else if (Op.isImmModifier()) { 7581 OptionalIdx[Op.getImmTy()] = I; 7582 } else { 7583 llvm_unreachable("unhandled operand type"); 7584 } 7585 } 7586 7587 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7589 } 7590 7591 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7593 } 7594 7595 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7597 } 7598 } 7599 7600 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7601 OptionalImmIndexMap &OptionalIdx) { 7602 unsigned Opc = Inst.getOpcode(); 7603 7604 unsigned I = 1; 7605 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7606 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7607 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7608 } 7609 7610 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7611 // This instruction has src modifiers 7612 for (unsigned E = Operands.size(); I != E; ++I) { 7613 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7614 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7615 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7616 } else if (Op.isImmModifier()) { 7617 OptionalIdx[Op.getImmTy()] = I; 7618 } else if (Op.isRegOrImm()) { 7619 Op.addRegOrImmOperands(Inst, 1); 7620 } else { 7621 llvm_unreachable("unhandled operand type"); 7622 } 7623 } 7624 } else { 7625 // No src modifiers 7626 for (unsigned E = Operands.size(); I != E; ++I) { 7627 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7628 if (Op.isMod()) { 7629 OptionalIdx[Op.getImmTy()] = I; 7630 } else { 7631 Op.addRegOrImmOperands(Inst, 1); 7632 } 7633 } 7634 } 7635 7636 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7638 } 7639 7640 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7642 } 7643 7644 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7645 // it has src2 register operand that is tied to dst operand 7646 // we don't allow modifiers for this operand in assembler so src2_modifiers 7647 // should be 0. 7648 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7649 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7650 Opc == AMDGPU::V_MAC_F32_e64_vi || 7651 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7652 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7653 Opc == AMDGPU::V_MAC_F16_e64_vi || 7654 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7655 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7656 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7657 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7658 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7659 auto it = Inst.begin(); 7660 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7661 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7662 ++it; 7663 // Copy the operand to ensure it's not invalidated when Inst grows. 7664 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7665 } 7666 } 7667 7668 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7669 OptionalImmIndexMap OptionalIdx; 7670 cvtVOP3(Inst, Operands, OptionalIdx); 7671 } 7672 7673 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7674 OptionalImmIndexMap &OptIdx) { 7675 const int Opc = Inst.getOpcode(); 7676 const MCInstrDesc &Desc = MII.get(Opc); 7677 7678 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7679 7680 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7681 assert(!IsPacked); 7682 Inst.addOperand(Inst.getOperand(0)); 7683 } 7684 7685 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7686 // instruction, and then figure out where to actually put the modifiers 7687 7688 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7689 if (OpSelIdx != -1) { 7690 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7691 } 7692 7693 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7694 if (OpSelHiIdx != -1) { 7695 int DefaultVal = IsPacked ? -1 : 0; 7696 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7697 DefaultVal); 7698 } 7699 7700 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7701 if (NegLoIdx != -1) { 7702 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7703 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7704 } 7705 7706 const int Ops[] = { AMDGPU::OpName::src0, 7707 AMDGPU::OpName::src1, 7708 AMDGPU::OpName::src2 }; 7709 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7710 AMDGPU::OpName::src1_modifiers, 7711 AMDGPU::OpName::src2_modifiers }; 7712 7713 unsigned OpSel = 0; 7714 unsigned OpSelHi = 0; 7715 unsigned NegLo = 0; 7716 unsigned NegHi = 0; 7717 7718 if (OpSelIdx != -1) 7719 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7720 7721 if (OpSelHiIdx != -1) 7722 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7723 7724 if (NegLoIdx != -1) { 7725 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7726 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7727 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7728 } 7729 7730 for (int J = 0; J < 3; ++J) { 7731 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7732 if (OpIdx == -1) 7733 break; 7734 7735 uint32_t ModVal = 0; 7736 7737 if ((OpSel & (1 << J)) != 0) 7738 ModVal |= SISrcMods::OP_SEL_0; 7739 7740 if ((OpSelHi & (1 << J)) != 0) 7741 ModVal |= SISrcMods::OP_SEL_1; 7742 7743 if ((NegLo & (1 << J)) != 0) 7744 ModVal |= SISrcMods::NEG; 7745 7746 if ((NegHi & (1 << J)) != 0) 7747 ModVal |= SISrcMods::NEG_HI; 7748 7749 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7750 7751 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7752 } 7753 } 7754 7755 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7756 OptionalImmIndexMap OptIdx; 7757 cvtVOP3(Inst, Operands, OptIdx); 7758 cvtVOP3P(Inst, Operands, OptIdx); 7759 } 7760 7761 //===----------------------------------------------------------------------===// 7762 // dpp 7763 //===----------------------------------------------------------------------===// 7764 7765 bool AMDGPUOperand::isDPP8() const { 7766 return isImmTy(ImmTyDPP8); 7767 } 7768 7769 bool AMDGPUOperand::isDPPCtrl() const { 7770 using namespace AMDGPU::DPP; 7771 7772 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7773 if (result) { 7774 int64_t Imm = getImm(); 7775 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7776 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7777 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7778 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7779 (Imm == DppCtrl::WAVE_SHL1) || 7780 (Imm == DppCtrl::WAVE_ROL1) || 7781 (Imm == DppCtrl::WAVE_SHR1) || 7782 (Imm == DppCtrl::WAVE_ROR1) || 7783 (Imm == DppCtrl::ROW_MIRROR) || 7784 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7785 (Imm == DppCtrl::BCAST15) || 7786 (Imm == DppCtrl::BCAST31) || 7787 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7788 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7789 } 7790 return false; 7791 } 7792 7793 //===----------------------------------------------------------------------===// 7794 // mAI 7795 //===----------------------------------------------------------------------===// 7796 7797 bool AMDGPUOperand::isBLGP() const { 7798 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7799 } 7800 7801 bool AMDGPUOperand::isCBSZ() const { 7802 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7803 } 7804 7805 bool AMDGPUOperand::isABID() const { 7806 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7807 } 7808 7809 bool AMDGPUOperand::isS16Imm() const { 7810 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7811 } 7812 7813 bool AMDGPUOperand::isU16Imm() const { 7814 return isImm() && isUInt<16>(getImm()); 7815 } 7816 7817 //===----------------------------------------------------------------------===// 7818 // dim 7819 //===----------------------------------------------------------------------===// 7820 7821 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7822 // We want to allow "dim:1D" etc., 7823 // but the initial 1 is tokenized as an integer. 7824 std::string Token; 7825 if (isToken(AsmToken::Integer)) { 7826 SMLoc Loc = getToken().getEndLoc(); 7827 Token = std::string(getTokenStr()); 7828 lex(); 7829 if (getLoc() != Loc) 7830 return false; 7831 } 7832 7833 StringRef Suffix; 7834 if (!parseId(Suffix)) 7835 return false; 7836 Token += Suffix; 7837 7838 StringRef DimId = Token; 7839 if (DimId.startswith("SQ_RSRC_IMG_")) 7840 DimId = DimId.drop_front(12); 7841 7842 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7843 if (!DimInfo) 7844 return false; 7845 7846 Encoding = DimInfo->Encoding; 7847 return true; 7848 } 7849 7850 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7851 if (!isGFX10Plus()) 7852 return MatchOperand_NoMatch; 7853 7854 SMLoc S = getLoc(); 7855 7856 if (!trySkipId("dim", AsmToken::Colon)) 7857 return MatchOperand_NoMatch; 7858 7859 unsigned Encoding; 7860 SMLoc Loc = getLoc(); 7861 if (!parseDimId(Encoding)) { 7862 Error(Loc, "invalid dim value"); 7863 return MatchOperand_ParseFail; 7864 } 7865 7866 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7867 AMDGPUOperand::ImmTyDim)); 7868 return MatchOperand_Success; 7869 } 7870 7871 //===----------------------------------------------------------------------===// 7872 // dpp 7873 //===----------------------------------------------------------------------===// 7874 7875 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7876 SMLoc S = getLoc(); 7877 7878 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7879 return MatchOperand_NoMatch; 7880 7881 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7882 7883 int64_t Sels[8]; 7884 7885 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7886 return MatchOperand_ParseFail; 7887 7888 for (size_t i = 0; i < 8; ++i) { 7889 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7890 return MatchOperand_ParseFail; 7891 7892 SMLoc Loc = getLoc(); 7893 if (getParser().parseAbsoluteExpression(Sels[i])) 7894 return MatchOperand_ParseFail; 7895 if (0 > Sels[i] || 7 < Sels[i]) { 7896 Error(Loc, "expected a 3-bit value"); 7897 return MatchOperand_ParseFail; 7898 } 7899 } 7900 7901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7902 return MatchOperand_ParseFail; 7903 7904 unsigned DPP8 = 0; 7905 for (size_t i = 0; i < 8; ++i) 7906 DPP8 |= (Sels[i] << (i * 3)); 7907 7908 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7909 return MatchOperand_Success; 7910 } 7911 7912 bool 7913 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7914 const OperandVector &Operands) { 7915 if (Ctrl == "row_newbcast") 7916 return isGFX90A(); 7917 7918 if (Ctrl == "row_share" || 7919 Ctrl == "row_xmask") 7920 return isGFX10Plus(); 7921 7922 if (Ctrl == "wave_shl" || 7923 Ctrl == "wave_shr" || 7924 Ctrl == "wave_rol" || 7925 Ctrl == "wave_ror" || 7926 Ctrl == "row_bcast") 7927 return isVI() || isGFX9(); 7928 7929 return Ctrl == "row_mirror" || 7930 Ctrl == "row_half_mirror" || 7931 Ctrl == "quad_perm" || 7932 Ctrl == "row_shl" || 7933 Ctrl == "row_shr" || 7934 Ctrl == "row_ror"; 7935 } 7936 7937 int64_t 7938 AMDGPUAsmParser::parseDPPCtrlPerm() { 7939 // quad_perm:[%d,%d,%d,%d] 7940 7941 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7942 return -1; 7943 7944 int64_t Val = 0; 7945 for (int i = 0; i < 4; ++i) { 7946 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7947 return -1; 7948 7949 int64_t Temp; 7950 SMLoc Loc = getLoc(); 7951 if (getParser().parseAbsoluteExpression(Temp)) 7952 return -1; 7953 if (Temp < 0 || Temp > 3) { 7954 Error(Loc, "expected a 2-bit value"); 7955 return -1; 7956 } 7957 7958 Val += (Temp << i * 2); 7959 } 7960 7961 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7962 return -1; 7963 7964 return Val; 7965 } 7966 7967 int64_t 7968 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7969 using namespace AMDGPU::DPP; 7970 7971 // sel:%d 7972 7973 int64_t Val; 7974 SMLoc Loc = getLoc(); 7975 7976 if (getParser().parseAbsoluteExpression(Val)) 7977 return -1; 7978 7979 struct DppCtrlCheck { 7980 int64_t Ctrl; 7981 int Lo; 7982 int Hi; 7983 }; 7984 7985 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7986 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7987 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7988 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7989 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7990 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7991 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7992 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7993 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7994 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7995 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7996 .Default({-1, 0, 0}); 7997 7998 bool Valid; 7999 if (Check.Ctrl == -1) { 8000 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8001 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8002 } else { 8003 Valid = Check.Lo <= Val && Val <= Check.Hi; 8004 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8005 } 8006 8007 if (!Valid) { 8008 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8009 return -1; 8010 } 8011 8012 return Val; 8013 } 8014 8015 OperandMatchResultTy 8016 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8017 using namespace AMDGPU::DPP; 8018 8019 if (!isToken(AsmToken::Identifier) || 8020 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8021 return MatchOperand_NoMatch; 8022 8023 SMLoc S = getLoc(); 8024 int64_t Val = -1; 8025 StringRef Ctrl; 8026 8027 parseId(Ctrl); 8028 8029 if (Ctrl == "row_mirror") { 8030 Val = DppCtrl::ROW_MIRROR; 8031 } else if (Ctrl == "row_half_mirror") { 8032 Val = DppCtrl::ROW_HALF_MIRROR; 8033 } else { 8034 if (skipToken(AsmToken::Colon, "expected a colon")) { 8035 if (Ctrl == "quad_perm") { 8036 Val = parseDPPCtrlPerm(); 8037 } else { 8038 Val = parseDPPCtrlSel(Ctrl); 8039 } 8040 } 8041 } 8042 8043 if (Val == -1) 8044 return MatchOperand_ParseFail; 8045 8046 Operands.push_back( 8047 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8048 return MatchOperand_Success; 8049 } 8050 8051 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8052 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8053 } 8054 8055 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8056 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8057 } 8058 8059 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8060 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8061 } 8062 8063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8064 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8065 } 8066 8067 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8068 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8069 } 8070 8071 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8072 OptionalImmIndexMap OptionalIdx; 8073 8074 unsigned Opc = Inst.getOpcode(); 8075 bool HasModifiers = 8076 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8077 unsigned I = 1; 8078 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8079 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8080 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8081 } 8082 8083 int Fi = 0; 8084 for (unsigned E = Operands.size(); I != E; ++I) { 8085 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8086 MCOI::TIED_TO); 8087 if (TiedTo != -1) { 8088 assert((unsigned)TiedTo < Inst.getNumOperands()); 8089 // handle tied old or src2 for MAC instructions 8090 Inst.addOperand(Inst.getOperand(TiedTo)); 8091 } 8092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8093 // Add the register arguments 8094 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8095 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8096 // Skip it. 8097 continue; 8098 } 8099 8100 if (IsDPP8) { 8101 if (Op.isDPP8()) { 8102 Op.addImmOperands(Inst, 1); 8103 } else if (HasModifiers && 8104 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8105 Op.addRegWithFPInputModsOperands(Inst, 2); 8106 } else if (Op.isFI()) { 8107 Fi = Op.getImm(); 8108 } else if (Op.isReg()) { 8109 Op.addRegOperands(Inst, 1); 8110 } else { 8111 llvm_unreachable("Invalid operand type"); 8112 } 8113 } else { 8114 if (HasModifiers && 8115 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8116 Op.addRegWithFPInputModsOperands(Inst, 2); 8117 } else if (Op.isReg()) { 8118 Op.addRegOperands(Inst, 1); 8119 } else if (Op.isDPPCtrl()) { 8120 Op.addImmOperands(Inst, 1); 8121 } else if (Op.isImm()) { 8122 // Handle optional arguments 8123 OptionalIdx[Op.getImmTy()] = I; 8124 } else { 8125 llvm_unreachable("Invalid operand type"); 8126 } 8127 } 8128 } 8129 8130 if (IsDPP8) { 8131 using namespace llvm::AMDGPU::DPP; 8132 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8133 } else { 8134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8137 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8138 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8139 } 8140 } 8141 } 8142 8143 //===----------------------------------------------------------------------===// 8144 // sdwa 8145 //===----------------------------------------------------------------------===// 8146 8147 OperandMatchResultTy 8148 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8149 AMDGPUOperand::ImmTy Type) { 8150 using namespace llvm::AMDGPU::SDWA; 8151 8152 SMLoc S = getLoc(); 8153 StringRef Value; 8154 OperandMatchResultTy res; 8155 8156 SMLoc StringLoc; 8157 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8158 if (res != MatchOperand_Success) { 8159 return res; 8160 } 8161 8162 int64_t Int; 8163 Int = StringSwitch<int64_t>(Value) 8164 .Case("BYTE_0", SdwaSel::BYTE_0) 8165 .Case("BYTE_1", SdwaSel::BYTE_1) 8166 .Case("BYTE_2", SdwaSel::BYTE_2) 8167 .Case("BYTE_3", SdwaSel::BYTE_3) 8168 .Case("WORD_0", SdwaSel::WORD_0) 8169 .Case("WORD_1", SdwaSel::WORD_1) 8170 .Case("DWORD", SdwaSel::DWORD) 8171 .Default(0xffffffff); 8172 8173 if (Int == 0xffffffff) { 8174 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8175 return MatchOperand_ParseFail; 8176 } 8177 8178 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8179 return MatchOperand_Success; 8180 } 8181 8182 OperandMatchResultTy 8183 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8184 using namespace llvm::AMDGPU::SDWA; 8185 8186 SMLoc S = getLoc(); 8187 StringRef Value; 8188 OperandMatchResultTy res; 8189 8190 SMLoc StringLoc; 8191 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8192 if (res != MatchOperand_Success) { 8193 return res; 8194 } 8195 8196 int64_t Int; 8197 Int = StringSwitch<int64_t>(Value) 8198 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8199 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8200 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8201 .Default(0xffffffff); 8202 8203 if (Int == 0xffffffff) { 8204 Error(StringLoc, "invalid dst_unused value"); 8205 return MatchOperand_ParseFail; 8206 } 8207 8208 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8209 return MatchOperand_Success; 8210 } 8211 8212 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8213 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8214 } 8215 8216 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8217 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8218 } 8219 8220 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8221 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8222 } 8223 8224 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8225 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8226 } 8227 8228 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8229 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8230 } 8231 8232 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8233 uint64_t BasicInstType, 8234 bool SkipDstVcc, 8235 bool SkipSrcVcc) { 8236 using namespace llvm::AMDGPU::SDWA; 8237 8238 OptionalImmIndexMap OptionalIdx; 8239 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8240 bool SkippedVcc = false; 8241 8242 unsigned I = 1; 8243 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8244 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8245 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8246 } 8247 8248 for (unsigned E = Operands.size(); I != E; ++I) { 8249 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8250 if (SkipVcc && !SkippedVcc && Op.isReg() && 8251 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8252 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8253 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8254 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8255 // Skip VCC only if we didn't skip it on previous iteration. 8256 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8257 if (BasicInstType == SIInstrFlags::VOP2 && 8258 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8259 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8260 SkippedVcc = true; 8261 continue; 8262 } else if (BasicInstType == SIInstrFlags::VOPC && 8263 Inst.getNumOperands() == 0) { 8264 SkippedVcc = true; 8265 continue; 8266 } 8267 } 8268 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8269 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8270 } else if (Op.isImm()) { 8271 // Handle optional arguments 8272 OptionalIdx[Op.getImmTy()] = I; 8273 } else { 8274 llvm_unreachable("Invalid operand type"); 8275 } 8276 SkippedVcc = false; 8277 } 8278 8279 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8280 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8281 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8282 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8283 switch (BasicInstType) { 8284 case SIInstrFlags::VOP1: 8285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8286 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8288 } 8289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8292 break; 8293 8294 case SIInstrFlags::VOP2: 8295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8296 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8298 } 8299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8300 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8303 break; 8304 8305 case SIInstrFlags::VOPC: 8306 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8310 break; 8311 8312 default: 8313 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8314 } 8315 } 8316 8317 // special case v_mac_{f16, f32}: 8318 // it has src2 register operand that is tied to dst operand 8319 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8320 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8321 auto it = Inst.begin(); 8322 std::advance( 8323 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8324 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8325 } 8326 } 8327 8328 //===----------------------------------------------------------------------===// 8329 // mAI 8330 //===----------------------------------------------------------------------===// 8331 8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8333 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8334 } 8335 8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8337 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8338 } 8339 8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8341 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8342 } 8343 8344 /// Force static initialization. 8345 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8346 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8347 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8348 } 8349 8350 #define GET_REGISTER_MATCHER 8351 #define GET_MATCHER_IMPLEMENTATION 8352 #define GET_MNEMONIC_SPELL_CHECKER 8353 #define GET_MNEMONIC_CHECKER 8354 #include "AMDGPUGenAsmMatcher.inc" 8355 8356 // This fuction should be defined after auto-generated include so that we have 8357 // MatchClassKind enum defined 8358 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8359 unsigned Kind) { 8360 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8361 // But MatchInstructionImpl() expects to meet token and fails to validate 8362 // operand. This method checks if we are given immediate operand but expect to 8363 // get corresponding token. 8364 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8365 switch (Kind) { 8366 case MCK_addr64: 8367 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8368 case MCK_gds: 8369 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8370 case MCK_lds: 8371 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8372 case MCK_idxen: 8373 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8374 case MCK_offen: 8375 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8376 case MCK_SSrcB32: 8377 // When operands have expression values, they will return true for isToken, 8378 // because it is not possible to distinguish between a token and an 8379 // expression at parse time. MatchInstructionImpl() will always try to 8380 // match an operand as a token, when isToken returns true, and when the 8381 // name of the expression is not a valid token, the match will fail, 8382 // so we need to handle it here. 8383 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8384 case MCK_SSrcF32: 8385 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8386 case MCK_SoppBrTarget: 8387 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8388 case MCK_VReg32OrOff: 8389 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8390 case MCK_InterpSlot: 8391 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8392 case MCK_Attr: 8393 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8394 case MCK_AttrChan: 8395 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8396 case MCK_ImmSMEMOffset: 8397 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8398 case MCK_SReg_64: 8399 case MCK_SReg_64_XEXEC: 8400 // Null is defined as a 32-bit register but 8401 // it should also be enabled with 64-bit operands. 8402 // The following code enables it for SReg_64 operands 8403 // used as source and destination. Remaining source 8404 // operands are handled in isInlinableImm. 8405 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8406 default: 8407 return Match_InvalidOperand; 8408 } 8409 } 8410 8411 //===----------------------------------------------------------------------===// 8412 // endpgm 8413 //===----------------------------------------------------------------------===// 8414 8415 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8416 SMLoc S = getLoc(); 8417 int64_t Imm = 0; 8418 8419 if (!parseExpr(Imm)) { 8420 // The operand is optional, if not present default to 0 8421 Imm = 0; 8422 } 8423 8424 if (!isUInt<16>(Imm)) { 8425 Error(S, "expected a 16-bit value"); 8426 return MatchOperand_ParseFail; 8427 } 8428 8429 Operands.push_back( 8430 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8431 return MatchOperand_Success; 8432 } 8433 8434 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8435