1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 bool isGFX90A() const { 1390 return AMDGPU::isGFX90A(getSTI()); 1391 } 1392 1393 bool isGFX9Plus() const { 1394 return AMDGPU::isGFX9Plus(getSTI()); 1395 } 1396 1397 bool isGFX10() const { 1398 return AMDGPU::isGFX10(getSTI()); 1399 } 1400 1401 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1402 1403 bool isGFX10_BEncoding() const { 1404 return AMDGPU::isGFX10_BEncoding(getSTI()); 1405 } 1406 1407 bool hasInv2PiInlineImm() const { 1408 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1409 } 1410 1411 bool hasFlatOffsets() const { 1412 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1413 } 1414 1415 bool hasArchitectedFlatScratch() const { 1416 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1417 } 1418 1419 bool hasSGPR102_SGPR103() const { 1420 return !isVI() && !isGFX9(); 1421 } 1422 1423 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1424 1425 bool hasIntClamp() const { 1426 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1427 } 1428 1429 AMDGPUTargetStreamer &getTargetStreamer() { 1430 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1431 return static_cast<AMDGPUTargetStreamer &>(TS); 1432 } 1433 1434 const MCRegisterInfo *getMRI() const { 1435 // We need this const_cast because for some reason getContext() is not const 1436 // in MCAsmParser. 1437 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1438 } 1439 1440 const MCInstrInfo *getMII() const { 1441 return &MII; 1442 } 1443 1444 const FeatureBitset &getFeatureBits() const { 1445 return getSTI().getFeatureBits(); 1446 } 1447 1448 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1449 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1450 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1451 1452 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1453 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1454 bool isForcedDPP() const { return ForcedDPP; } 1455 bool isForcedSDWA() const { return ForcedSDWA; } 1456 ArrayRef<unsigned> getMatchedVariants() const; 1457 StringRef getMatchedVariantName() const; 1458 1459 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1460 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1461 bool RestoreOnFailure); 1462 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1463 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1464 SMLoc &EndLoc) override; 1465 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1466 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1467 unsigned Kind) override; 1468 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1469 OperandVector &Operands, MCStreamer &Out, 1470 uint64_t &ErrorInfo, 1471 bool MatchingInlineAsm) override; 1472 bool ParseDirective(AsmToken DirectiveID) override; 1473 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1474 OperandMode Mode = OperandMode_Default); 1475 StringRef parseMnemonicSuffix(StringRef Name); 1476 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1477 SMLoc NameLoc, OperandVector &Operands) override; 1478 //bool ProcessInstruction(MCInst &Inst); 1479 1480 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1481 1482 OperandMatchResultTy 1483 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1484 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1485 bool (*ConvertResult)(int64_t &) = nullptr); 1486 1487 OperandMatchResultTy 1488 parseOperandArrayWithPrefix(const char *Prefix, 1489 OperandVector &Operands, 1490 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1491 bool (*ConvertResult)(int64_t&) = nullptr); 1492 1493 OperandMatchResultTy 1494 parseNamedBit(StringRef Name, OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1496 OperandMatchResultTy parseCPol(OperandVector &Operands); 1497 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1498 StringRef &Value, 1499 SMLoc &StringLoc); 1500 1501 bool isModifier(); 1502 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1503 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1504 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1505 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1506 bool parseSP3NegModifier(); 1507 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1508 OperandMatchResultTy parseReg(OperandVector &Operands); 1509 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1510 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1511 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1512 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1513 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1514 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1515 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1516 OperandMatchResultTy parseUfmt(int64_t &Format); 1517 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1518 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1519 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1520 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1521 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1522 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1523 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1524 1525 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1526 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1527 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1528 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1529 1530 bool parseCnt(int64_t &IntVal); 1531 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1532 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1533 1534 private: 1535 struct OperandInfoTy { 1536 SMLoc Loc; 1537 int64_t Id; 1538 bool IsSymbolic = false; 1539 bool IsDefined = false; 1540 1541 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1542 }; 1543 1544 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1545 bool validateSendMsg(const OperandInfoTy &Msg, 1546 const OperandInfoTy &Op, 1547 const OperandInfoTy &Stream); 1548 1549 bool parseHwregBody(OperandInfoTy &HwReg, 1550 OperandInfoTy &Offset, 1551 OperandInfoTy &Width); 1552 bool validateHwreg(const OperandInfoTy &HwReg, 1553 const OperandInfoTy &Offset, 1554 const OperandInfoTy &Width); 1555 1556 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1557 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1558 1559 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1560 const OperandVector &Operands) const; 1561 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1562 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1563 SMLoc getLitLoc(const OperandVector &Operands) const; 1564 SMLoc getConstLoc(const OperandVector &Operands) const; 1565 1566 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1567 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1568 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1569 bool validateSOPLiteral(const MCInst &Inst) const; 1570 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1571 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1572 bool validateIntClampSupported(const MCInst &Inst); 1573 bool validateMIMGAtomicDMask(const MCInst &Inst); 1574 bool validateMIMGGatherDMask(const MCInst &Inst); 1575 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1576 bool validateMIMGDataSize(const MCInst &Inst); 1577 bool validateMIMGAddrSize(const MCInst &Inst); 1578 bool validateMIMGD16(const MCInst &Inst); 1579 bool validateMIMGDim(const MCInst &Inst); 1580 bool validateMIMGMSAA(const MCInst &Inst); 1581 bool validateOpSel(const MCInst &Inst); 1582 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1583 bool validateVccOperand(unsigned Reg) const; 1584 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1585 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1586 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1587 bool validateAGPRLdSt(const MCInst &Inst) const; 1588 bool validateVGPRAlign(const MCInst &Inst) const; 1589 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateDivScale(const MCInst &Inst); 1591 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1592 const SMLoc &IDLoc); 1593 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1594 unsigned getConstantBusLimit(unsigned Opcode) const; 1595 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1596 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1597 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1598 1599 bool isSupportedMnemo(StringRef Mnemo, 1600 const FeatureBitset &FBS); 1601 bool isSupportedMnemo(StringRef Mnemo, 1602 const FeatureBitset &FBS, 1603 ArrayRef<unsigned> Variants); 1604 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1605 1606 bool isId(const StringRef Id) const; 1607 bool isId(const AsmToken &Token, const StringRef Id) const; 1608 bool isToken(const AsmToken::TokenKind Kind) const; 1609 bool trySkipId(const StringRef Id); 1610 bool trySkipId(const StringRef Pref, const StringRef Id); 1611 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1612 bool trySkipToken(const AsmToken::TokenKind Kind); 1613 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1614 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1615 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1616 1617 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1618 AsmToken::TokenKind getTokenKind() const; 1619 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1620 bool parseExpr(OperandVector &Operands); 1621 StringRef getTokenStr() const; 1622 AsmToken peekToken(); 1623 AsmToken getToken() const; 1624 SMLoc getLoc() const; 1625 void lex(); 1626 1627 public: 1628 void onBeginOfFile() override; 1629 1630 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1631 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1632 1633 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1634 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1635 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1636 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1637 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1638 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1639 1640 bool parseSwizzleOperand(int64_t &Op, 1641 const unsigned MinVal, 1642 const unsigned MaxVal, 1643 const StringRef ErrMsg, 1644 SMLoc &Loc); 1645 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1646 const unsigned MinVal, 1647 const unsigned MaxVal, 1648 const StringRef ErrMsg); 1649 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1650 bool parseSwizzleOffset(int64_t &Imm); 1651 bool parseSwizzleMacro(int64_t &Imm); 1652 bool parseSwizzleQuadPerm(int64_t &Imm); 1653 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1654 bool parseSwizzleBroadcast(int64_t &Imm); 1655 bool parseSwizzleSwap(int64_t &Imm); 1656 bool parseSwizzleReverse(int64_t &Imm); 1657 1658 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1659 int64_t parseGPRIdxMacro(); 1660 1661 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1662 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1663 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1664 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1665 1666 AMDGPUOperand::Ptr defaultCPol() const; 1667 1668 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1669 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1670 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1671 AMDGPUOperand::Ptr defaultFlatOffset() const; 1672 1673 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1674 1675 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1676 OptionalImmIndexMap &OptionalIdx); 1677 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1678 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1679 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1680 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1681 OptionalImmIndexMap &OptionalIdx); 1682 1683 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1684 1685 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1686 bool IsAtomic = false); 1687 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1688 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1689 1690 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1691 1692 bool parseDimId(unsigned &Encoding); 1693 OperandMatchResultTy parseDim(OperandVector &Operands); 1694 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1695 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1696 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1697 int64_t parseDPPCtrlSel(StringRef Ctrl); 1698 int64_t parseDPPCtrlPerm(); 1699 AMDGPUOperand::Ptr defaultRowMask() const; 1700 AMDGPUOperand::Ptr defaultBankMask() const; 1701 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1702 AMDGPUOperand::Ptr defaultFI() const; 1703 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1704 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1705 1706 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1707 AMDGPUOperand::ImmTy Type); 1708 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1709 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1710 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1711 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1712 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1713 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1714 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1715 uint64_t BasicInstType, 1716 bool SkipDstVcc = false, 1717 bool SkipSrcVcc = false); 1718 1719 AMDGPUOperand::Ptr defaultBLGP() const; 1720 AMDGPUOperand::Ptr defaultCBSZ() const; 1721 AMDGPUOperand::Ptr defaultABID() const; 1722 1723 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1724 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1725 }; 1726 1727 struct OptionalOperand { 1728 const char *Name; 1729 AMDGPUOperand::ImmTy Type; 1730 bool IsBit; 1731 bool (*ConvertResult)(int64_t&); 1732 }; 1733 1734 } // end anonymous namespace 1735 1736 // May be called with integer type with equivalent bitwidth. 1737 static const fltSemantics *getFltSemantics(unsigned Size) { 1738 switch (Size) { 1739 case 4: 1740 return &APFloat::IEEEsingle(); 1741 case 8: 1742 return &APFloat::IEEEdouble(); 1743 case 2: 1744 return &APFloat::IEEEhalf(); 1745 default: 1746 llvm_unreachable("unsupported fp type"); 1747 } 1748 } 1749 1750 static const fltSemantics *getFltSemantics(MVT VT) { 1751 return getFltSemantics(VT.getSizeInBits() / 8); 1752 } 1753 1754 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1755 switch (OperandType) { 1756 case AMDGPU::OPERAND_REG_IMM_INT32: 1757 case AMDGPU::OPERAND_REG_IMM_FP32: 1758 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1759 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1760 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1764 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1766 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1767 case AMDGPU::OPERAND_KIMM32: 1768 return &APFloat::IEEEsingle(); 1769 case AMDGPU::OPERAND_REG_IMM_INT64: 1770 case AMDGPU::OPERAND_REG_IMM_FP64: 1771 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1772 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1773 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1774 return &APFloat::IEEEdouble(); 1775 case AMDGPU::OPERAND_REG_IMM_INT16: 1776 case AMDGPU::OPERAND_REG_IMM_FP16: 1777 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1778 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1779 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1780 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1781 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1782 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1785 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1786 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1787 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1788 case AMDGPU::OPERAND_KIMM16: 1789 return &APFloat::IEEEhalf(); 1790 default: 1791 llvm_unreachable("unsupported fp type"); 1792 } 1793 } 1794 1795 //===----------------------------------------------------------------------===// 1796 // Operand 1797 //===----------------------------------------------------------------------===// 1798 1799 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1800 bool Lost; 1801 1802 // Convert literal to single precision 1803 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1804 APFloat::rmNearestTiesToEven, 1805 &Lost); 1806 // We allow precision lost but not overflow or underflow 1807 if (Status != APFloat::opOK && 1808 Lost && 1809 ((Status & APFloat::opOverflow) != 0 || 1810 (Status & APFloat::opUnderflow) != 0)) { 1811 return false; 1812 } 1813 1814 return true; 1815 } 1816 1817 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1818 return isUIntN(Size, Val) || isIntN(Size, Val); 1819 } 1820 1821 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1822 if (VT.getScalarType() == MVT::i16) { 1823 // FP immediate values are broken. 1824 return isInlinableIntLiteral(Val); 1825 } 1826 1827 // f16/v2f16 operands work correctly for all values. 1828 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1829 } 1830 1831 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1832 1833 // This is a hack to enable named inline values like 1834 // shared_base with both 32-bit and 64-bit operands. 1835 // Note that these values are defined as 1836 // 32-bit operands only. 1837 if (isInlineValue()) { 1838 return true; 1839 } 1840 1841 if (!isImmTy(ImmTyNone)) { 1842 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1843 return false; 1844 } 1845 // TODO: We should avoid using host float here. It would be better to 1846 // check the float bit values which is what a few other places do. 1847 // We've had bot failures before due to weird NaN support on mips hosts. 1848 1849 APInt Literal(64, Imm.Val); 1850 1851 if (Imm.IsFPImm) { // We got fp literal token 1852 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1853 return AMDGPU::isInlinableLiteral64(Imm.Val, 1854 AsmParser->hasInv2PiInlineImm()); 1855 } 1856 1857 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1858 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1859 return false; 1860 1861 if (type.getScalarSizeInBits() == 16) { 1862 return isInlineableLiteralOp16( 1863 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1864 type, AsmParser->hasInv2PiInlineImm()); 1865 } 1866 1867 // Check if single precision literal is inlinable 1868 return AMDGPU::isInlinableLiteral32( 1869 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1870 AsmParser->hasInv2PiInlineImm()); 1871 } 1872 1873 // We got int literal token. 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1880 return false; 1881 } 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 return AMDGPU::isInlinableLiteral32( 1890 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1891 AsmParser->hasInv2PiInlineImm()); 1892 } 1893 1894 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1895 // Check that this immediate can be added as literal 1896 if (!isImmTy(ImmTyNone)) { 1897 return false; 1898 } 1899 1900 if (!Imm.IsFPImm) { 1901 // We got int literal token. 1902 1903 if (type == MVT::f64 && hasFPModifiers()) { 1904 // Cannot apply fp modifiers to int literals preserving the same semantics 1905 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1906 // disable these cases. 1907 return false; 1908 } 1909 1910 unsigned Size = type.getSizeInBits(); 1911 if (Size == 64) 1912 Size = 32; 1913 1914 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1915 // types. 1916 return isSafeTruncation(Imm.Val, Size); 1917 } 1918 1919 // We got fp literal token 1920 if (type == MVT::f64) { // Expected 64-bit fp operand 1921 // We would set low 64-bits of literal to zeroes but we accept this literals 1922 return true; 1923 } 1924 1925 if (type == MVT::i64) { // Expected 64-bit int operand 1926 // We don't allow fp literals in 64-bit integer instructions. It is 1927 // unclear how we should encode them. 1928 return false; 1929 } 1930 1931 // We allow fp literals with f16x2 operands assuming that the specified 1932 // literal goes into the lower half and the upper half is zero. We also 1933 // require that the literal may be losslessly converted to f16. 1934 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1935 (type == MVT::v2i16)? MVT::i16 : 1936 (type == MVT::v2f32)? MVT::f32 : type; 1937 1938 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1939 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1940 } 1941 1942 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1943 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1944 } 1945 1946 bool AMDGPUOperand::isVRegWithInputMods() const { 1947 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1948 // GFX90A allows DPP on 64-bit operands. 1949 (isRegClass(AMDGPU::VReg_64RegClassID) && 1950 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1951 } 1952 1953 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1954 if (AsmParser->isVI()) 1955 return isVReg32(); 1956 else if (AsmParser->isGFX9Plus()) 1957 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1958 else 1959 return false; 1960 } 1961 1962 bool AMDGPUOperand::isSDWAFP16Operand() const { 1963 return isSDWAOperand(MVT::f16); 1964 } 1965 1966 bool AMDGPUOperand::isSDWAFP32Operand() const { 1967 return isSDWAOperand(MVT::f32); 1968 } 1969 1970 bool AMDGPUOperand::isSDWAInt16Operand() const { 1971 return isSDWAOperand(MVT::i16); 1972 } 1973 1974 bool AMDGPUOperand::isSDWAInt32Operand() const { 1975 return isSDWAOperand(MVT::i32); 1976 } 1977 1978 bool AMDGPUOperand::isBoolReg() const { 1979 auto FB = AsmParser->getFeatureBits(); 1980 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1981 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1982 } 1983 1984 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1985 { 1986 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1987 assert(Size == 2 || Size == 4 || Size == 8); 1988 1989 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1990 1991 if (Imm.Mods.Abs) { 1992 Val &= ~FpSignMask; 1993 } 1994 if (Imm.Mods.Neg) { 1995 Val ^= FpSignMask; 1996 } 1997 1998 return Val; 1999 } 2000 2001 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2002 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2003 Inst.getNumOperands())) { 2004 addLiteralImmOperand(Inst, Imm.Val, 2005 ApplyModifiers & 2006 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2007 } else { 2008 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2009 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2010 setImmKindNone(); 2011 } 2012 } 2013 2014 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2015 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2016 auto OpNum = Inst.getNumOperands(); 2017 // Check that this operand accepts literals 2018 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2019 2020 if (ApplyModifiers) { 2021 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2022 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2023 Val = applyInputFPModifiers(Val, Size); 2024 } 2025 2026 APInt Literal(64, Val); 2027 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2028 2029 if (Imm.IsFPImm) { // We got fp literal token 2030 switch (OpTy) { 2031 case AMDGPU::OPERAND_REG_IMM_INT64: 2032 case AMDGPU::OPERAND_REG_IMM_FP64: 2033 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2034 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2036 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2037 AsmParser->hasInv2PiInlineImm())) { 2038 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2039 setImmKindConst(); 2040 return; 2041 } 2042 2043 // Non-inlineable 2044 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2045 // For fp operands we check if low 32 bits are zeros 2046 if (Literal.getLoBits(32) != 0) { 2047 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2048 "Can't encode literal as exact 64-bit floating-point operand. " 2049 "Low 32-bits will be set to zero"); 2050 } 2051 2052 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2053 setImmKindLiteral(); 2054 return; 2055 } 2056 2057 // We don't allow fp literals in 64-bit integer instructions. It is 2058 // unclear how we should encode them. This case should be checked earlier 2059 // in predicate methods (isLiteralImm()) 2060 llvm_unreachable("fp literal in 64-bit integer instruction."); 2061 2062 case AMDGPU::OPERAND_REG_IMM_INT32: 2063 case AMDGPU::OPERAND_REG_IMM_FP32: 2064 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_INT16: 2070 case AMDGPU::OPERAND_REG_IMM_FP16: 2071 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2072 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2073 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2075 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2076 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2077 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2078 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2079 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2080 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2081 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2082 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2083 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2084 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2086 case AMDGPU::OPERAND_KIMM32: 2087 case AMDGPU::OPERAND_KIMM16: { 2088 bool lost; 2089 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2090 // Convert literal to single precision 2091 FPLiteral.convert(*getOpFltSemantics(OpTy), 2092 APFloat::rmNearestTiesToEven, &lost); 2093 // We allow precision lost but not overflow or underflow. This should be 2094 // checked earlier in isLiteralImm() 2095 2096 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2097 Inst.addOperand(MCOperand::createImm(ImmVal)); 2098 setImmKindLiteral(); 2099 return; 2100 } 2101 default: 2102 llvm_unreachable("invalid operand size"); 2103 } 2104 2105 return; 2106 } 2107 2108 // We got int literal token. 2109 // Only sign extend inline immediates. 2110 switch (OpTy) { 2111 case AMDGPU::OPERAND_REG_IMM_INT32: 2112 case AMDGPU::OPERAND_REG_IMM_FP32: 2113 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2119 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2120 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2122 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2123 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2124 if (isSafeTruncation(Val, 32) && 2125 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2126 AsmParser->hasInv2PiInlineImm())) { 2127 Inst.addOperand(MCOperand::createImm(Val)); 2128 setImmKindConst(); 2129 return; 2130 } 2131 2132 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2133 setImmKindLiteral(); 2134 return; 2135 2136 case AMDGPU::OPERAND_REG_IMM_INT64: 2137 case AMDGPU::OPERAND_REG_IMM_FP64: 2138 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2139 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2140 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2141 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2142 Inst.addOperand(MCOperand::createImm(Val)); 2143 setImmKindConst(); 2144 return; 2145 } 2146 2147 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2148 setImmKindLiteral(); 2149 return; 2150 2151 case AMDGPU::OPERAND_REG_IMM_INT16: 2152 case AMDGPU::OPERAND_REG_IMM_FP16: 2153 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2156 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2158 if (isSafeTruncation(Val, 16) && 2159 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2160 AsmParser->hasInv2PiInlineImm())) { 2161 Inst.addOperand(MCOperand::createImm(Val)); 2162 setImmKindConst(); 2163 return; 2164 } 2165 2166 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2167 setImmKindLiteral(); 2168 return; 2169 2170 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2171 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2172 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2173 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2174 assert(isSafeTruncation(Val, 16)); 2175 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2176 AsmParser->hasInv2PiInlineImm())); 2177 2178 Inst.addOperand(MCOperand::createImm(Val)); 2179 return; 2180 } 2181 case AMDGPU::OPERAND_KIMM32: 2182 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2183 setImmKindNone(); 2184 return; 2185 case AMDGPU::OPERAND_KIMM16: 2186 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2187 setImmKindNone(); 2188 return; 2189 default: 2190 llvm_unreachable("invalid operand size"); 2191 } 2192 } 2193 2194 template <unsigned Bitwidth> 2195 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2196 APInt Literal(64, Imm.Val); 2197 setImmKindNone(); 2198 2199 if (!Imm.IsFPImm) { 2200 // We got int literal token. 2201 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2202 return; 2203 } 2204 2205 bool Lost; 2206 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2207 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2208 APFloat::rmNearestTiesToEven, &Lost); 2209 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2210 } 2211 2212 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2213 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2214 } 2215 2216 static bool isInlineValue(unsigned Reg) { 2217 switch (Reg) { 2218 case AMDGPU::SRC_SHARED_BASE: 2219 case AMDGPU::SRC_SHARED_LIMIT: 2220 case AMDGPU::SRC_PRIVATE_BASE: 2221 case AMDGPU::SRC_PRIVATE_LIMIT: 2222 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2223 return true; 2224 case AMDGPU::SRC_VCCZ: 2225 case AMDGPU::SRC_EXECZ: 2226 case AMDGPU::SRC_SCC: 2227 return true; 2228 case AMDGPU::SGPR_NULL: 2229 return true; 2230 default: 2231 return false; 2232 } 2233 } 2234 2235 bool AMDGPUOperand::isInlineValue() const { 2236 return isRegKind() && ::isInlineValue(getReg()); 2237 } 2238 2239 //===----------------------------------------------------------------------===// 2240 // AsmParser 2241 //===----------------------------------------------------------------------===// 2242 2243 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2244 if (Is == IS_VGPR) { 2245 switch (RegWidth) { 2246 default: return -1; 2247 case 1: return AMDGPU::VGPR_32RegClassID; 2248 case 2: return AMDGPU::VReg_64RegClassID; 2249 case 3: return AMDGPU::VReg_96RegClassID; 2250 case 4: return AMDGPU::VReg_128RegClassID; 2251 case 5: return AMDGPU::VReg_160RegClassID; 2252 case 6: return AMDGPU::VReg_192RegClassID; 2253 case 7: return AMDGPU::VReg_224RegClassID; 2254 case 8: return AMDGPU::VReg_256RegClassID; 2255 case 16: return AMDGPU::VReg_512RegClassID; 2256 case 32: return AMDGPU::VReg_1024RegClassID; 2257 } 2258 } else if (Is == IS_TTMP) { 2259 switch (RegWidth) { 2260 default: return -1; 2261 case 1: return AMDGPU::TTMP_32RegClassID; 2262 case 2: return AMDGPU::TTMP_64RegClassID; 2263 case 4: return AMDGPU::TTMP_128RegClassID; 2264 case 8: return AMDGPU::TTMP_256RegClassID; 2265 case 16: return AMDGPU::TTMP_512RegClassID; 2266 } 2267 } else if (Is == IS_SGPR) { 2268 switch (RegWidth) { 2269 default: return -1; 2270 case 1: return AMDGPU::SGPR_32RegClassID; 2271 case 2: return AMDGPU::SGPR_64RegClassID; 2272 case 3: return AMDGPU::SGPR_96RegClassID; 2273 case 4: return AMDGPU::SGPR_128RegClassID; 2274 case 5: return AMDGPU::SGPR_160RegClassID; 2275 case 6: return AMDGPU::SGPR_192RegClassID; 2276 case 7: return AMDGPU::SGPR_224RegClassID; 2277 case 8: return AMDGPU::SGPR_256RegClassID; 2278 case 16: return AMDGPU::SGPR_512RegClassID; 2279 } 2280 } else if (Is == IS_AGPR) { 2281 switch (RegWidth) { 2282 default: return -1; 2283 case 1: return AMDGPU::AGPR_32RegClassID; 2284 case 2: return AMDGPU::AReg_64RegClassID; 2285 case 3: return AMDGPU::AReg_96RegClassID; 2286 case 4: return AMDGPU::AReg_128RegClassID; 2287 case 5: return AMDGPU::AReg_160RegClassID; 2288 case 6: return AMDGPU::AReg_192RegClassID; 2289 case 7: return AMDGPU::AReg_224RegClassID; 2290 case 8: return AMDGPU::AReg_256RegClassID; 2291 case 16: return AMDGPU::AReg_512RegClassID; 2292 case 32: return AMDGPU::AReg_1024RegClassID; 2293 } 2294 } 2295 return -1; 2296 } 2297 2298 static unsigned getSpecialRegForName(StringRef RegName) { 2299 return StringSwitch<unsigned>(RegName) 2300 .Case("exec", AMDGPU::EXEC) 2301 .Case("vcc", AMDGPU::VCC) 2302 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2303 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2304 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2305 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2306 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2307 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2308 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2309 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2310 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2311 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2312 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2313 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2314 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2315 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2316 .Case("m0", AMDGPU::M0) 2317 .Case("vccz", AMDGPU::SRC_VCCZ) 2318 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2319 .Case("execz", AMDGPU::SRC_EXECZ) 2320 .Case("src_execz", AMDGPU::SRC_EXECZ) 2321 .Case("scc", AMDGPU::SRC_SCC) 2322 .Case("src_scc", AMDGPU::SRC_SCC) 2323 .Case("tba", AMDGPU::TBA) 2324 .Case("tma", AMDGPU::TMA) 2325 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2326 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2327 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2328 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2329 .Case("vcc_lo", AMDGPU::VCC_LO) 2330 .Case("vcc_hi", AMDGPU::VCC_HI) 2331 .Case("exec_lo", AMDGPU::EXEC_LO) 2332 .Case("exec_hi", AMDGPU::EXEC_HI) 2333 .Case("tma_lo", AMDGPU::TMA_LO) 2334 .Case("tma_hi", AMDGPU::TMA_HI) 2335 .Case("tba_lo", AMDGPU::TBA_LO) 2336 .Case("tba_hi", AMDGPU::TBA_HI) 2337 .Case("pc", AMDGPU::PC_REG) 2338 .Case("null", AMDGPU::SGPR_NULL) 2339 .Default(AMDGPU::NoRegister); 2340 } 2341 2342 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2343 SMLoc &EndLoc, bool RestoreOnFailure) { 2344 auto R = parseRegister(); 2345 if (!R) return true; 2346 assert(R->isReg()); 2347 RegNo = R->getReg(); 2348 StartLoc = R->getStartLoc(); 2349 EndLoc = R->getEndLoc(); 2350 return false; 2351 } 2352 2353 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2354 SMLoc &EndLoc) { 2355 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2356 } 2357 2358 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2359 SMLoc &StartLoc, 2360 SMLoc &EndLoc) { 2361 bool Result = 2362 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2363 bool PendingErrors = getParser().hasPendingError(); 2364 getParser().clearPendingErrors(); 2365 if (PendingErrors) 2366 return MatchOperand_ParseFail; 2367 if (Result) 2368 return MatchOperand_NoMatch; 2369 return MatchOperand_Success; 2370 } 2371 2372 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2373 RegisterKind RegKind, unsigned Reg1, 2374 SMLoc Loc) { 2375 switch (RegKind) { 2376 case IS_SPECIAL: 2377 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2378 Reg = AMDGPU::EXEC; 2379 RegWidth = 2; 2380 return true; 2381 } 2382 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2383 Reg = AMDGPU::FLAT_SCR; 2384 RegWidth = 2; 2385 return true; 2386 } 2387 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2388 Reg = AMDGPU::XNACK_MASK; 2389 RegWidth = 2; 2390 return true; 2391 } 2392 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2393 Reg = AMDGPU::VCC; 2394 RegWidth = 2; 2395 return true; 2396 } 2397 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2398 Reg = AMDGPU::TBA; 2399 RegWidth = 2; 2400 return true; 2401 } 2402 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2403 Reg = AMDGPU::TMA; 2404 RegWidth = 2; 2405 return true; 2406 } 2407 Error(Loc, "register does not fit in the list"); 2408 return false; 2409 case IS_VGPR: 2410 case IS_SGPR: 2411 case IS_AGPR: 2412 case IS_TTMP: 2413 if (Reg1 != Reg + RegWidth) { 2414 Error(Loc, "registers in a list must have consecutive indices"); 2415 return false; 2416 } 2417 RegWidth++; 2418 return true; 2419 default: 2420 llvm_unreachable("unexpected register kind"); 2421 } 2422 } 2423 2424 struct RegInfo { 2425 StringLiteral Name; 2426 RegisterKind Kind; 2427 }; 2428 2429 static constexpr RegInfo RegularRegisters[] = { 2430 {{"v"}, IS_VGPR}, 2431 {{"s"}, IS_SGPR}, 2432 {{"ttmp"}, IS_TTMP}, 2433 {{"acc"}, IS_AGPR}, 2434 {{"a"}, IS_AGPR}, 2435 }; 2436 2437 static bool isRegularReg(RegisterKind Kind) { 2438 return Kind == IS_VGPR || 2439 Kind == IS_SGPR || 2440 Kind == IS_TTMP || 2441 Kind == IS_AGPR; 2442 } 2443 2444 static const RegInfo* getRegularRegInfo(StringRef Str) { 2445 for (const RegInfo &Reg : RegularRegisters) 2446 if (Str.startswith(Reg.Name)) 2447 return &Reg; 2448 return nullptr; 2449 } 2450 2451 static bool getRegNum(StringRef Str, unsigned& Num) { 2452 return !Str.getAsInteger(10, Num); 2453 } 2454 2455 bool 2456 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2457 const AsmToken &NextToken) const { 2458 2459 // A list of consecutive registers: [s0,s1,s2,s3] 2460 if (Token.is(AsmToken::LBrac)) 2461 return true; 2462 2463 if (!Token.is(AsmToken::Identifier)) 2464 return false; 2465 2466 // A single register like s0 or a range of registers like s[0:1] 2467 2468 StringRef Str = Token.getString(); 2469 const RegInfo *Reg = getRegularRegInfo(Str); 2470 if (Reg) { 2471 StringRef RegName = Reg->Name; 2472 StringRef RegSuffix = Str.substr(RegName.size()); 2473 if (!RegSuffix.empty()) { 2474 unsigned Num; 2475 // A single register with an index: rXX 2476 if (getRegNum(RegSuffix, Num)) 2477 return true; 2478 } else { 2479 // A range of registers: r[XX:YY]. 2480 if (NextToken.is(AsmToken::LBrac)) 2481 return true; 2482 } 2483 } 2484 2485 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2486 } 2487 2488 bool 2489 AMDGPUAsmParser::isRegister() 2490 { 2491 return isRegister(getToken(), peekToken()); 2492 } 2493 2494 unsigned 2495 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2496 unsigned RegNum, 2497 unsigned RegWidth, 2498 SMLoc Loc) { 2499 2500 assert(isRegularReg(RegKind)); 2501 2502 unsigned AlignSize = 1; 2503 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2504 // SGPR and TTMP registers must be aligned. 2505 // Max required alignment is 4 dwords. 2506 AlignSize = std::min(RegWidth, 4u); 2507 } 2508 2509 if (RegNum % AlignSize != 0) { 2510 Error(Loc, "invalid register alignment"); 2511 return AMDGPU::NoRegister; 2512 } 2513 2514 unsigned RegIdx = RegNum / AlignSize; 2515 int RCID = getRegClass(RegKind, RegWidth); 2516 if (RCID == -1) { 2517 Error(Loc, "invalid or unsupported register size"); 2518 return AMDGPU::NoRegister; 2519 } 2520 2521 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2522 const MCRegisterClass RC = TRI->getRegClass(RCID); 2523 if (RegIdx >= RC.getNumRegs()) { 2524 Error(Loc, "register index is out of range"); 2525 return AMDGPU::NoRegister; 2526 } 2527 2528 return RC.getRegister(RegIdx); 2529 } 2530 2531 bool 2532 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2533 int64_t RegLo, RegHi; 2534 if (!skipToken(AsmToken::LBrac, "missing register index")) 2535 return false; 2536 2537 SMLoc FirstIdxLoc = getLoc(); 2538 SMLoc SecondIdxLoc; 2539 2540 if (!parseExpr(RegLo)) 2541 return false; 2542 2543 if (trySkipToken(AsmToken::Colon)) { 2544 SecondIdxLoc = getLoc(); 2545 if (!parseExpr(RegHi)) 2546 return false; 2547 } else { 2548 RegHi = RegLo; 2549 } 2550 2551 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2552 return false; 2553 2554 if (!isUInt<32>(RegLo)) { 2555 Error(FirstIdxLoc, "invalid register index"); 2556 return false; 2557 } 2558 2559 if (!isUInt<32>(RegHi)) { 2560 Error(SecondIdxLoc, "invalid register index"); 2561 return false; 2562 } 2563 2564 if (RegLo > RegHi) { 2565 Error(FirstIdxLoc, "first register index should not exceed second index"); 2566 return false; 2567 } 2568 2569 Num = static_cast<unsigned>(RegLo); 2570 Width = (RegHi - RegLo) + 1; 2571 return true; 2572 } 2573 2574 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2575 unsigned &RegNum, unsigned &RegWidth, 2576 SmallVectorImpl<AsmToken> &Tokens) { 2577 assert(isToken(AsmToken::Identifier)); 2578 unsigned Reg = getSpecialRegForName(getTokenStr()); 2579 if (Reg) { 2580 RegNum = 0; 2581 RegWidth = 1; 2582 RegKind = IS_SPECIAL; 2583 Tokens.push_back(getToken()); 2584 lex(); // skip register name 2585 } 2586 return Reg; 2587 } 2588 2589 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2590 unsigned &RegNum, unsigned &RegWidth, 2591 SmallVectorImpl<AsmToken> &Tokens) { 2592 assert(isToken(AsmToken::Identifier)); 2593 StringRef RegName = getTokenStr(); 2594 auto Loc = getLoc(); 2595 2596 const RegInfo *RI = getRegularRegInfo(RegName); 2597 if (!RI) { 2598 Error(Loc, "invalid register name"); 2599 return AMDGPU::NoRegister; 2600 } 2601 2602 Tokens.push_back(getToken()); 2603 lex(); // skip register name 2604 2605 RegKind = RI->Kind; 2606 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2607 if (!RegSuffix.empty()) { 2608 // Single 32-bit register: vXX. 2609 if (!getRegNum(RegSuffix, RegNum)) { 2610 Error(Loc, "invalid register index"); 2611 return AMDGPU::NoRegister; 2612 } 2613 RegWidth = 1; 2614 } else { 2615 // Range of registers: v[XX:YY]. ":YY" is optional. 2616 if (!ParseRegRange(RegNum, RegWidth)) 2617 return AMDGPU::NoRegister; 2618 } 2619 2620 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2621 } 2622 2623 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2624 unsigned &RegWidth, 2625 SmallVectorImpl<AsmToken> &Tokens) { 2626 unsigned Reg = AMDGPU::NoRegister; 2627 auto ListLoc = getLoc(); 2628 2629 if (!skipToken(AsmToken::LBrac, 2630 "expected a register or a list of registers")) { 2631 return AMDGPU::NoRegister; 2632 } 2633 2634 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2635 2636 auto Loc = getLoc(); 2637 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2638 return AMDGPU::NoRegister; 2639 if (RegWidth != 1) { 2640 Error(Loc, "expected a single 32-bit register"); 2641 return AMDGPU::NoRegister; 2642 } 2643 2644 for (; trySkipToken(AsmToken::Comma); ) { 2645 RegisterKind NextRegKind; 2646 unsigned NextReg, NextRegNum, NextRegWidth; 2647 Loc = getLoc(); 2648 2649 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2650 NextRegNum, NextRegWidth, 2651 Tokens)) { 2652 return AMDGPU::NoRegister; 2653 } 2654 if (NextRegWidth != 1) { 2655 Error(Loc, "expected a single 32-bit register"); 2656 return AMDGPU::NoRegister; 2657 } 2658 if (NextRegKind != RegKind) { 2659 Error(Loc, "registers in a list must be of the same kind"); 2660 return AMDGPU::NoRegister; 2661 } 2662 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2663 return AMDGPU::NoRegister; 2664 } 2665 2666 if (!skipToken(AsmToken::RBrac, 2667 "expected a comma or a closing square bracket")) { 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 if (isRegularReg(RegKind)) 2672 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2673 2674 return Reg; 2675 } 2676 2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2678 unsigned &RegNum, unsigned &RegWidth, 2679 SmallVectorImpl<AsmToken> &Tokens) { 2680 auto Loc = getLoc(); 2681 Reg = AMDGPU::NoRegister; 2682 2683 if (isToken(AsmToken::Identifier)) { 2684 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2685 if (Reg == AMDGPU::NoRegister) 2686 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2687 } else { 2688 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2689 } 2690 2691 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2692 if (Reg == AMDGPU::NoRegister) { 2693 assert(Parser.hasPendingError()); 2694 return false; 2695 } 2696 2697 if (!subtargetHasRegister(*TRI, Reg)) { 2698 if (Reg == AMDGPU::SGPR_NULL) { 2699 Error(Loc, "'null' operand is not supported on this GPU"); 2700 } else { 2701 Error(Loc, "register not available on this GPU"); 2702 } 2703 return false; 2704 } 2705 2706 return true; 2707 } 2708 2709 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2710 unsigned &RegNum, unsigned &RegWidth, 2711 bool RestoreOnFailure /*=false*/) { 2712 Reg = AMDGPU::NoRegister; 2713 2714 SmallVector<AsmToken, 1> Tokens; 2715 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2716 if (RestoreOnFailure) { 2717 while (!Tokens.empty()) { 2718 getLexer().UnLex(Tokens.pop_back_val()); 2719 } 2720 } 2721 return true; 2722 } 2723 return false; 2724 } 2725 2726 Optional<StringRef> 2727 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2728 switch (RegKind) { 2729 case IS_VGPR: 2730 return StringRef(".amdgcn.next_free_vgpr"); 2731 case IS_SGPR: 2732 return StringRef(".amdgcn.next_free_sgpr"); 2733 default: 2734 return None; 2735 } 2736 } 2737 2738 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2739 auto SymbolName = getGprCountSymbolName(RegKind); 2740 assert(SymbolName && "initializing invalid register kind"); 2741 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2742 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2743 } 2744 2745 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2746 unsigned DwordRegIndex, 2747 unsigned RegWidth) { 2748 // Symbols are only defined for GCN targets 2749 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2750 return true; 2751 2752 auto SymbolName = getGprCountSymbolName(RegKind); 2753 if (!SymbolName) 2754 return true; 2755 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2756 2757 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2758 int64_t OldCount; 2759 2760 if (!Sym->isVariable()) 2761 return !Error(getLoc(), 2762 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2763 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2764 return !Error( 2765 getLoc(), 2766 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2767 2768 if (OldCount <= NewMax) 2769 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2770 2771 return true; 2772 } 2773 2774 std::unique_ptr<AMDGPUOperand> 2775 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2776 const auto &Tok = getToken(); 2777 SMLoc StartLoc = Tok.getLoc(); 2778 SMLoc EndLoc = Tok.getEndLoc(); 2779 RegisterKind RegKind; 2780 unsigned Reg, RegNum, RegWidth; 2781 2782 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2783 return nullptr; 2784 } 2785 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2786 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2787 return nullptr; 2788 } else 2789 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2790 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2791 } 2792 2793 OperandMatchResultTy 2794 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2795 // TODO: add syntactic sugar for 1/(2*PI) 2796 2797 assert(!isRegister()); 2798 assert(!isModifier()); 2799 2800 const auto& Tok = getToken(); 2801 const auto& NextTok = peekToken(); 2802 bool IsReal = Tok.is(AsmToken::Real); 2803 SMLoc S = getLoc(); 2804 bool Negate = false; 2805 2806 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2807 lex(); 2808 IsReal = true; 2809 Negate = true; 2810 } 2811 2812 if (IsReal) { 2813 // Floating-point expressions are not supported. 2814 // Can only allow floating-point literals with an 2815 // optional sign. 2816 2817 StringRef Num = getTokenStr(); 2818 lex(); 2819 2820 APFloat RealVal(APFloat::IEEEdouble()); 2821 auto roundMode = APFloat::rmNearestTiesToEven; 2822 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2823 return MatchOperand_ParseFail; 2824 } 2825 if (Negate) 2826 RealVal.changeSign(); 2827 2828 Operands.push_back( 2829 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2830 AMDGPUOperand::ImmTyNone, true)); 2831 2832 return MatchOperand_Success; 2833 2834 } else { 2835 int64_t IntVal; 2836 const MCExpr *Expr; 2837 SMLoc S = getLoc(); 2838 2839 if (HasSP3AbsModifier) { 2840 // This is a workaround for handling expressions 2841 // as arguments of SP3 'abs' modifier, for example: 2842 // |1.0| 2843 // |-1| 2844 // |1+x| 2845 // This syntax is not compatible with syntax of standard 2846 // MC expressions (due to the trailing '|'). 2847 SMLoc EndLoc; 2848 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2849 return MatchOperand_ParseFail; 2850 } else { 2851 if (Parser.parseExpression(Expr)) 2852 return MatchOperand_ParseFail; 2853 } 2854 2855 if (Expr->evaluateAsAbsolute(IntVal)) { 2856 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2857 } else { 2858 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2859 } 2860 2861 return MatchOperand_Success; 2862 } 2863 2864 return MatchOperand_NoMatch; 2865 } 2866 2867 OperandMatchResultTy 2868 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2869 if (!isRegister()) 2870 return MatchOperand_NoMatch; 2871 2872 if (auto R = parseRegister()) { 2873 assert(R->isReg()); 2874 Operands.push_back(std::move(R)); 2875 return MatchOperand_Success; 2876 } 2877 return MatchOperand_ParseFail; 2878 } 2879 2880 OperandMatchResultTy 2881 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2882 auto res = parseReg(Operands); 2883 if (res != MatchOperand_NoMatch) { 2884 return res; 2885 } else if (isModifier()) { 2886 return MatchOperand_NoMatch; 2887 } else { 2888 return parseImm(Operands, HasSP3AbsMod); 2889 } 2890 } 2891 2892 bool 2893 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2894 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2895 const auto &str = Token.getString(); 2896 return str == "abs" || str == "neg" || str == "sext"; 2897 } 2898 return false; 2899 } 2900 2901 bool 2902 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2903 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2904 } 2905 2906 bool 2907 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2908 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2909 } 2910 2911 bool 2912 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2913 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2914 } 2915 2916 // Check if this is an operand modifier or an opcode modifier 2917 // which may look like an expression but it is not. We should 2918 // avoid parsing these modifiers as expressions. Currently 2919 // recognized sequences are: 2920 // |...| 2921 // abs(...) 2922 // neg(...) 2923 // sext(...) 2924 // -reg 2925 // -|...| 2926 // -abs(...) 2927 // name:... 2928 // Note that simple opcode modifiers like 'gds' may be parsed as 2929 // expressions; this is a special case. See getExpressionAsToken. 2930 // 2931 bool 2932 AMDGPUAsmParser::isModifier() { 2933 2934 AsmToken Tok = getToken(); 2935 AsmToken NextToken[2]; 2936 peekTokens(NextToken); 2937 2938 return isOperandModifier(Tok, NextToken[0]) || 2939 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2940 isOpcodeModifierWithVal(Tok, NextToken[0]); 2941 } 2942 2943 // Check if the current token is an SP3 'neg' modifier. 2944 // Currently this modifier is allowed in the following context: 2945 // 2946 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2947 // 2. Before an 'abs' modifier: -abs(...) 2948 // 3. Before an SP3 'abs' modifier: -|...| 2949 // 2950 // In all other cases "-" is handled as a part 2951 // of an expression that follows the sign. 2952 // 2953 // Note: When "-" is followed by an integer literal, 2954 // this is interpreted as integer negation rather 2955 // than a floating-point NEG modifier applied to N. 2956 // Beside being contr-intuitive, such use of floating-point 2957 // NEG modifier would have resulted in different meaning 2958 // of integer literals used with VOP1/2/C and VOP3, 2959 // for example: 2960 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2961 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2962 // Negative fp literals with preceding "-" are 2963 // handled likewise for uniformity 2964 // 2965 bool 2966 AMDGPUAsmParser::parseSP3NegModifier() { 2967 2968 AsmToken NextToken[2]; 2969 peekTokens(NextToken); 2970 2971 if (isToken(AsmToken::Minus) && 2972 (isRegister(NextToken[0], NextToken[1]) || 2973 NextToken[0].is(AsmToken::Pipe) || 2974 isId(NextToken[0], "abs"))) { 2975 lex(); 2976 return true; 2977 } 2978 2979 return false; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2984 bool AllowImm) { 2985 bool Neg, SP3Neg; 2986 bool Abs, SP3Abs; 2987 SMLoc Loc; 2988 2989 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2990 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2991 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2992 return MatchOperand_ParseFail; 2993 } 2994 2995 SP3Neg = parseSP3NegModifier(); 2996 2997 Loc = getLoc(); 2998 Neg = trySkipId("neg"); 2999 if (Neg && SP3Neg) { 3000 Error(Loc, "expected register or immediate"); 3001 return MatchOperand_ParseFail; 3002 } 3003 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3004 return MatchOperand_ParseFail; 3005 3006 Abs = trySkipId("abs"); 3007 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3008 return MatchOperand_ParseFail; 3009 3010 Loc = getLoc(); 3011 SP3Abs = trySkipToken(AsmToken::Pipe); 3012 if (Abs && SP3Abs) { 3013 Error(Loc, "expected register or immediate"); 3014 return MatchOperand_ParseFail; 3015 } 3016 3017 OperandMatchResultTy Res; 3018 if (AllowImm) { 3019 Res = parseRegOrImm(Operands, SP3Abs); 3020 } else { 3021 Res = parseReg(Operands); 3022 } 3023 if (Res != MatchOperand_Success) { 3024 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3025 } 3026 3027 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3028 return MatchOperand_ParseFail; 3029 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3030 return MatchOperand_ParseFail; 3031 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3032 return MatchOperand_ParseFail; 3033 3034 AMDGPUOperand::Modifiers Mods; 3035 Mods.Abs = Abs || SP3Abs; 3036 Mods.Neg = Neg || SP3Neg; 3037 3038 if (Mods.hasFPModifiers()) { 3039 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3040 if (Op.isExpr()) { 3041 Error(Op.getStartLoc(), "expected an absolute expression"); 3042 return MatchOperand_ParseFail; 3043 } 3044 Op.setModifiers(Mods); 3045 } 3046 return MatchOperand_Success; 3047 } 3048 3049 OperandMatchResultTy 3050 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3051 bool AllowImm) { 3052 bool Sext = trySkipId("sext"); 3053 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3054 return MatchOperand_ParseFail; 3055 3056 OperandMatchResultTy Res; 3057 if (AllowImm) { 3058 Res = parseRegOrImm(Operands); 3059 } else { 3060 Res = parseReg(Operands); 3061 } 3062 if (Res != MatchOperand_Success) { 3063 return Sext? MatchOperand_ParseFail : Res; 3064 } 3065 3066 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3067 return MatchOperand_ParseFail; 3068 3069 AMDGPUOperand::Modifiers Mods; 3070 Mods.Sext = Sext; 3071 3072 if (Mods.hasIntModifiers()) { 3073 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3074 if (Op.isExpr()) { 3075 Error(Op.getStartLoc(), "expected an absolute expression"); 3076 return MatchOperand_ParseFail; 3077 } 3078 Op.setModifiers(Mods); 3079 } 3080 3081 return MatchOperand_Success; 3082 } 3083 3084 OperandMatchResultTy 3085 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3086 return parseRegOrImmWithFPInputMods(Operands, false); 3087 } 3088 3089 OperandMatchResultTy 3090 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3091 return parseRegOrImmWithIntInputMods(Operands, false); 3092 } 3093 3094 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3095 auto Loc = getLoc(); 3096 if (trySkipId("off")) { 3097 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3098 AMDGPUOperand::ImmTyOff, false)); 3099 return MatchOperand_Success; 3100 } 3101 3102 if (!isRegister()) 3103 return MatchOperand_NoMatch; 3104 3105 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3106 if (Reg) { 3107 Operands.push_back(std::move(Reg)); 3108 return MatchOperand_Success; 3109 } 3110 3111 return MatchOperand_ParseFail; 3112 3113 } 3114 3115 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3116 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3117 3118 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3119 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3120 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3121 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3122 return Match_InvalidOperand; 3123 3124 if ((TSFlags & SIInstrFlags::VOP3) && 3125 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3126 getForcedEncodingSize() != 64) 3127 return Match_PreferE32; 3128 3129 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3130 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3131 // v_mac_f32/16 allow only dst_sel == DWORD; 3132 auto OpNum = 3133 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3134 const auto &Op = Inst.getOperand(OpNum); 3135 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3136 return Match_InvalidOperand; 3137 } 3138 } 3139 3140 return Match_Success; 3141 } 3142 3143 static ArrayRef<unsigned> getAllVariants() { 3144 static const unsigned Variants[] = { 3145 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3146 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3147 }; 3148 3149 return makeArrayRef(Variants); 3150 } 3151 3152 // What asm variants we should check 3153 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3154 if (getForcedEncodingSize() == 32) { 3155 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3156 return makeArrayRef(Variants); 3157 } 3158 3159 if (isForcedVOP3()) { 3160 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3161 return makeArrayRef(Variants); 3162 } 3163 3164 if (isForcedSDWA()) { 3165 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3166 AMDGPUAsmVariants::SDWA9}; 3167 return makeArrayRef(Variants); 3168 } 3169 3170 if (isForcedDPP()) { 3171 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3172 return makeArrayRef(Variants); 3173 } 3174 3175 return getAllVariants(); 3176 } 3177 3178 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3179 if (getForcedEncodingSize() == 32) 3180 return "e32"; 3181 3182 if (isForcedVOP3()) 3183 return "e64"; 3184 3185 if (isForcedSDWA()) 3186 return "sdwa"; 3187 3188 if (isForcedDPP()) 3189 return "dpp"; 3190 3191 return ""; 3192 } 3193 3194 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3195 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3196 const unsigned Num = Desc.getNumImplicitUses(); 3197 for (unsigned i = 0; i < Num; ++i) { 3198 unsigned Reg = Desc.ImplicitUses[i]; 3199 switch (Reg) { 3200 case AMDGPU::FLAT_SCR: 3201 case AMDGPU::VCC: 3202 case AMDGPU::VCC_LO: 3203 case AMDGPU::VCC_HI: 3204 case AMDGPU::M0: 3205 return Reg; 3206 default: 3207 break; 3208 } 3209 } 3210 return AMDGPU::NoRegister; 3211 } 3212 3213 // NB: This code is correct only when used to check constant 3214 // bus limitations because GFX7 support no f16 inline constants. 3215 // Note that there are no cases when a GFX7 opcode violates 3216 // constant bus limitations due to the use of an f16 constant. 3217 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3218 unsigned OpIdx) const { 3219 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3220 3221 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3222 return false; 3223 } 3224 3225 const MCOperand &MO = Inst.getOperand(OpIdx); 3226 3227 int64_t Val = MO.getImm(); 3228 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3229 3230 switch (OpSize) { // expected operand size 3231 case 8: 3232 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3233 case 4: 3234 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3235 case 2: { 3236 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3237 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3238 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3239 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3240 return AMDGPU::isInlinableIntLiteral(Val); 3241 3242 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3243 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3245 return AMDGPU::isInlinableIntLiteralV216(Val); 3246 3247 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3248 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3249 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3250 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3251 3252 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3253 } 3254 default: 3255 llvm_unreachable("invalid operand size"); 3256 } 3257 } 3258 3259 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3260 if (!isGFX10Plus()) 3261 return 1; 3262 3263 switch (Opcode) { 3264 // 64-bit shift instructions can use only one scalar value input 3265 case AMDGPU::V_LSHLREV_B64_e64: 3266 case AMDGPU::V_LSHLREV_B64_gfx10: 3267 case AMDGPU::V_LSHRREV_B64_e64: 3268 case AMDGPU::V_LSHRREV_B64_gfx10: 3269 case AMDGPU::V_ASHRREV_I64_e64: 3270 case AMDGPU::V_ASHRREV_I64_gfx10: 3271 case AMDGPU::V_LSHL_B64_e64: 3272 case AMDGPU::V_LSHR_B64_e64: 3273 case AMDGPU::V_ASHR_I64_e64: 3274 return 1; 3275 default: 3276 return 2; 3277 } 3278 } 3279 3280 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3281 const MCOperand &MO = Inst.getOperand(OpIdx); 3282 if (MO.isImm()) { 3283 return !isInlineConstant(Inst, OpIdx); 3284 } else if (MO.isReg()) { 3285 auto Reg = MO.getReg(); 3286 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3287 auto PReg = mc2PseudoReg(Reg); 3288 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3289 } else { 3290 return true; 3291 } 3292 } 3293 3294 bool 3295 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3296 const OperandVector &Operands) { 3297 const unsigned Opcode = Inst.getOpcode(); 3298 const MCInstrDesc &Desc = MII.get(Opcode); 3299 unsigned LastSGPR = AMDGPU::NoRegister; 3300 unsigned ConstantBusUseCount = 0; 3301 unsigned NumLiterals = 0; 3302 unsigned LiteralSize; 3303 3304 if (Desc.TSFlags & 3305 (SIInstrFlags::VOPC | 3306 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3307 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3308 SIInstrFlags::SDWA)) { 3309 // Check special imm operands (used by madmk, etc) 3310 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3311 ++NumLiterals; 3312 LiteralSize = 4; 3313 } 3314 3315 SmallDenseSet<unsigned> SGPRsUsed; 3316 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3317 if (SGPRUsed != AMDGPU::NoRegister) { 3318 SGPRsUsed.insert(SGPRUsed); 3319 ++ConstantBusUseCount; 3320 } 3321 3322 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3323 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3324 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3325 3326 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3327 3328 for (int OpIdx : OpIndices) { 3329 if (OpIdx == -1) break; 3330 3331 const MCOperand &MO = Inst.getOperand(OpIdx); 3332 if (usesConstantBus(Inst, OpIdx)) { 3333 if (MO.isReg()) { 3334 LastSGPR = mc2PseudoReg(MO.getReg()); 3335 // Pairs of registers with a partial intersections like these 3336 // s0, s[0:1] 3337 // flat_scratch_lo, flat_scratch 3338 // flat_scratch_lo, flat_scratch_hi 3339 // are theoretically valid but they are disabled anyway. 3340 // Note that this code mimics SIInstrInfo::verifyInstruction 3341 if (!SGPRsUsed.count(LastSGPR)) { 3342 SGPRsUsed.insert(LastSGPR); 3343 ++ConstantBusUseCount; 3344 } 3345 } else { // Expression or a literal 3346 3347 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3348 continue; // special operand like VINTERP attr_chan 3349 3350 // An instruction may use only one literal. 3351 // This has been validated on the previous step. 3352 // See validateVOPLiteral. 3353 // This literal may be used as more than one operand. 3354 // If all these operands are of the same size, 3355 // this literal counts as one scalar value. 3356 // Otherwise it counts as 2 scalar values. 3357 // See "GFX10 Shader Programming", section 3.6.2.3. 3358 3359 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3360 if (Size < 4) Size = 4; 3361 3362 if (NumLiterals == 0) { 3363 NumLiterals = 1; 3364 LiteralSize = Size; 3365 } else if (LiteralSize != Size) { 3366 NumLiterals = 2; 3367 } 3368 } 3369 } 3370 } 3371 } 3372 ConstantBusUseCount += NumLiterals; 3373 3374 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3375 return true; 3376 3377 SMLoc LitLoc = getLitLoc(Operands); 3378 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3379 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3380 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3381 return false; 3382 } 3383 3384 bool 3385 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3386 const OperandVector &Operands) { 3387 const unsigned Opcode = Inst.getOpcode(); 3388 const MCInstrDesc &Desc = MII.get(Opcode); 3389 3390 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3391 if (DstIdx == -1 || 3392 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3393 return true; 3394 } 3395 3396 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3397 3398 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3399 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3400 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3401 3402 assert(DstIdx != -1); 3403 const MCOperand &Dst = Inst.getOperand(DstIdx); 3404 assert(Dst.isReg()); 3405 3406 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3407 3408 for (int SrcIdx : SrcIndices) { 3409 if (SrcIdx == -1) break; 3410 const MCOperand &Src = Inst.getOperand(SrcIdx); 3411 if (Src.isReg()) { 3412 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3413 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3414 Error(getRegLoc(SrcReg, Operands), 3415 "destination must be different than all sources"); 3416 return false; 3417 } 3418 } 3419 } 3420 3421 return true; 3422 } 3423 3424 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3425 3426 const unsigned Opc = Inst.getOpcode(); 3427 const MCInstrDesc &Desc = MII.get(Opc); 3428 3429 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3430 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3431 assert(ClampIdx != -1); 3432 return Inst.getOperand(ClampIdx).getImm() == 0; 3433 } 3434 3435 return true; 3436 } 3437 3438 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3439 3440 const unsigned Opc = Inst.getOpcode(); 3441 const MCInstrDesc &Desc = MII.get(Opc); 3442 3443 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3444 return true; 3445 3446 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3447 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3448 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3449 3450 assert(VDataIdx != -1); 3451 3452 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3453 return true; 3454 3455 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3456 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3457 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3458 if (DMask == 0) 3459 DMask = 1; 3460 3461 unsigned DataSize = 3462 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3463 if (hasPackedD16()) { 3464 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3465 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3466 DataSize = (DataSize + 1) / 2; 3467 } 3468 3469 return (VDataSize / 4) == DataSize + TFESize; 3470 } 3471 3472 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3473 const unsigned Opc = Inst.getOpcode(); 3474 const MCInstrDesc &Desc = MII.get(Opc); 3475 3476 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3477 return true; 3478 3479 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3480 3481 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3482 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3483 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3484 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3485 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3486 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3487 3488 assert(VAddr0Idx != -1); 3489 assert(SrsrcIdx != -1); 3490 assert(SrsrcIdx > VAddr0Idx); 3491 3492 if (DimIdx == -1) 3493 return true; // intersect_ray 3494 3495 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3496 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3497 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3498 unsigned ActualAddrSize = 3499 IsNSA ? SrsrcIdx - VAddr0Idx 3500 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3501 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3502 3503 unsigned ExpectedAddrSize = 3504 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3505 3506 if (!IsNSA) { 3507 if (ExpectedAddrSize > 8) 3508 ExpectedAddrSize = 16; 3509 3510 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3511 // This provides backward compatibility for assembly created 3512 // before 160b/192b/224b types were directly supported. 3513 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3514 return true; 3515 } 3516 3517 return ActualAddrSize == ExpectedAddrSize; 3518 } 3519 3520 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3521 3522 const unsigned Opc = Inst.getOpcode(); 3523 const MCInstrDesc &Desc = MII.get(Opc); 3524 3525 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3526 return true; 3527 if (!Desc.mayLoad() || !Desc.mayStore()) 3528 return true; // Not atomic 3529 3530 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3531 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3532 3533 // This is an incomplete check because image_atomic_cmpswap 3534 // may only use 0x3 and 0xf while other atomic operations 3535 // may use 0x1 and 0x3. However these limitations are 3536 // verified when we check that dmask matches dst size. 3537 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3538 } 3539 3540 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3541 3542 const unsigned Opc = Inst.getOpcode(); 3543 const MCInstrDesc &Desc = MII.get(Opc); 3544 3545 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3546 return true; 3547 3548 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3549 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3550 3551 // GATHER4 instructions use dmask in a different fashion compared to 3552 // other MIMG instructions. The only useful DMASK values are 3553 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3554 // (red,red,red,red) etc.) The ISA document doesn't mention 3555 // this. 3556 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3557 } 3558 3559 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3560 const unsigned Opc = Inst.getOpcode(); 3561 const MCInstrDesc &Desc = MII.get(Opc); 3562 3563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3564 return true; 3565 3566 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3567 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3568 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3569 3570 if (!BaseOpcode->MSAA) 3571 return true; 3572 3573 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3574 assert(DimIdx != -1); 3575 3576 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3577 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3578 3579 return DimInfo->MSAA; 3580 } 3581 3582 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3583 { 3584 switch (Opcode) { 3585 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3586 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3587 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3588 return true; 3589 default: 3590 return false; 3591 } 3592 } 3593 3594 // movrels* opcodes should only allow VGPRS as src0. 3595 // This is specified in .td description for vop1/vop3, 3596 // but sdwa is handled differently. See isSDWAOperand. 3597 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3598 const OperandVector &Operands) { 3599 3600 const unsigned Opc = Inst.getOpcode(); 3601 const MCInstrDesc &Desc = MII.get(Opc); 3602 3603 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3604 return true; 3605 3606 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3607 assert(Src0Idx != -1); 3608 3609 SMLoc ErrLoc; 3610 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3611 if (Src0.isReg()) { 3612 auto Reg = mc2PseudoReg(Src0.getReg()); 3613 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3614 if (!isSGPR(Reg, TRI)) 3615 return true; 3616 ErrLoc = getRegLoc(Reg, Operands); 3617 } else { 3618 ErrLoc = getConstLoc(Operands); 3619 } 3620 3621 Error(ErrLoc, "source operand must be a VGPR"); 3622 return false; 3623 } 3624 3625 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3626 const OperandVector &Operands) { 3627 3628 const unsigned Opc = Inst.getOpcode(); 3629 3630 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3631 return true; 3632 3633 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3634 assert(Src0Idx != -1); 3635 3636 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3637 if (!Src0.isReg()) 3638 return true; 3639 3640 auto Reg = mc2PseudoReg(Src0.getReg()); 3641 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3642 if (isSGPR(Reg, TRI)) { 3643 Error(getRegLoc(Reg, Operands), 3644 "source operand must be either a VGPR or an inline constant"); 3645 return false; 3646 } 3647 3648 return true; 3649 } 3650 3651 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3652 const OperandVector &Operands) { 3653 const unsigned Opc = Inst.getOpcode(); 3654 const MCInstrDesc &Desc = MII.get(Opc); 3655 3656 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3657 return true; 3658 3659 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3660 if (Src2Idx == -1) 3661 return true; 3662 3663 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3664 if (!Src2.isReg()) 3665 return true; 3666 3667 MCRegister Src2Reg = Src2.getReg(); 3668 MCRegister DstReg = Inst.getOperand(0).getReg(); 3669 if (Src2Reg == DstReg) 3670 return true; 3671 3672 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3673 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3674 return true; 3675 3676 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3677 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3678 "source 2 operand must not partially overlap with dst"); 3679 return false; 3680 } 3681 3682 return true; 3683 } 3684 3685 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3686 switch (Inst.getOpcode()) { 3687 default: 3688 return true; 3689 case V_DIV_SCALE_F32_gfx6_gfx7: 3690 case V_DIV_SCALE_F32_vi: 3691 case V_DIV_SCALE_F32_gfx10: 3692 case V_DIV_SCALE_F64_gfx6_gfx7: 3693 case V_DIV_SCALE_F64_vi: 3694 case V_DIV_SCALE_F64_gfx10: 3695 break; 3696 } 3697 3698 // TODO: Check that src0 = src1 or src2. 3699 3700 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3701 AMDGPU::OpName::src2_modifiers, 3702 AMDGPU::OpName::src2_modifiers}) { 3703 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3704 .getImm() & 3705 SISrcMods::ABS) { 3706 return false; 3707 } 3708 } 3709 3710 return true; 3711 } 3712 3713 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3714 3715 const unsigned Opc = Inst.getOpcode(); 3716 const MCInstrDesc &Desc = MII.get(Opc); 3717 3718 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3719 return true; 3720 3721 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3722 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3723 if (isCI() || isSI()) 3724 return false; 3725 } 3726 3727 return true; 3728 } 3729 3730 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3731 const unsigned Opc = Inst.getOpcode(); 3732 const MCInstrDesc &Desc = MII.get(Opc); 3733 3734 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3735 return true; 3736 3737 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3738 if (DimIdx < 0) 3739 return true; 3740 3741 long Imm = Inst.getOperand(DimIdx).getImm(); 3742 if (Imm < 0 || Imm >= 8) 3743 return false; 3744 3745 return true; 3746 } 3747 3748 static bool IsRevOpcode(const unsigned Opcode) 3749 { 3750 switch (Opcode) { 3751 case AMDGPU::V_SUBREV_F32_e32: 3752 case AMDGPU::V_SUBREV_F32_e64: 3753 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3754 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3755 case AMDGPU::V_SUBREV_F32_e32_vi: 3756 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3757 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3758 case AMDGPU::V_SUBREV_F32_e64_vi: 3759 3760 case AMDGPU::V_SUBREV_CO_U32_e32: 3761 case AMDGPU::V_SUBREV_CO_U32_e64: 3762 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3763 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3764 3765 case AMDGPU::V_SUBBREV_U32_e32: 3766 case AMDGPU::V_SUBBREV_U32_e64: 3767 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3768 case AMDGPU::V_SUBBREV_U32_e32_vi: 3769 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3770 case AMDGPU::V_SUBBREV_U32_e64_vi: 3771 3772 case AMDGPU::V_SUBREV_U32_e32: 3773 case AMDGPU::V_SUBREV_U32_e64: 3774 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3775 case AMDGPU::V_SUBREV_U32_e32_vi: 3776 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3777 case AMDGPU::V_SUBREV_U32_e64_vi: 3778 3779 case AMDGPU::V_SUBREV_F16_e32: 3780 case AMDGPU::V_SUBREV_F16_e64: 3781 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3782 case AMDGPU::V_SUBREV_F16_e32_vi: 3783 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3784 case AMDGPU::V_SUBREV_F16_e64_vi: 3785 3786 case AMDGPU::V_SUBREV_U16_e32: 3787 case AMDGPU::V_SUBREV_U16_e64: 3788 case AMDGPU::V_SUBREV_U16_e32_vi: 3789 case AMDGPU::V_SUBREV_U16_e64_vi: 3790 3791 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3792 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3793 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3794 3795 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3796 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3797 3798 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3799 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3800 3801 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3802 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3803 3804 case AMDGPU::V_LSHRREV_B32_e32: 3805 case AMDGPU::V_LSHRREV_B32_e64: 3806 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3807 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3808 case AMDGPU::V_LSHRREV_B32_e32_vi: 3809 case AMDGPU::V_LSHRREV_B32_e64_vi: 3810 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3811 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3812 3813 case AMDGPU::V_ASHRREV_I32_e32: 3814 case AMDGPU::V_ASHRREV_I32_e64: 3815 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3816 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3817 case AMDGPU::V_ASHRREV_I32_e32_vi: 3818 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3819 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3820 case AMDGPU::V_ASHRREV_I32_e64_vi: 3821 3822 case AMDGPU::V_LSHLREV_B32_e32: 3823 case AMDGPU::V_LSHLREV_B32_e64: 3824 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3825 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3826 case AMDGPU::V_LSHLREV_B32_e32_vi: 3827 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3828 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3829 case AMDGPU::V_LSHLREV_B32_e64_vi: 3830 3831 case AMDGPU::V_LSHLREV_B16_e32: 3832 case AMDGPU::V_LSHLREV_B16_e64: 3833 case AMDGPU::V_LSHLREV_B16_e32_vi: 3834 case AMDGPU::V_LSHLREV_B16_e64_vi: 3835 case AMDGPU::V_LSHLREV_B16_gfx10: 3836 3837 case AMDGPU::V_LSHRREV_B16_e32: 3838 case AMDGPU::V_LSHRREV_B16_e64: 3839 case AMDGPU::V_LSHRREV_B16_e32_vi: 3840 case AMDGPU::V_LSHRREV_B16_e64_vi: 3841 case AMDGPU::V_LSHRREV_B16_gfx10: 3842 3843 case AMDGPU::V_ASHRREV_I16_e32: 3844 case AMDGPU::V_ASHRREV_I16_e64: 3845 case AMDGPU::V_ASHRREV_I16_e32_vi: 3846 case AMDGPU::V_ASHRREV_I16_e64_vi: 3847 case AMDGPU::V_ASHRREV_I16_gfx10: 3848 3849 case AMDGPU::V_LSHLREV_B64_e64: 3850 case AMDGPU::V_LSHLREV_B64_gfx10: 3851 case AMDGPU::V_LSHLREV_B64_vi: 3852 3853 case AMDGPU::V_LSHRREV_B64_e64: 3854 case AMDGPU::V_LSHRREV_B64_gfx10: 3855 case AMDGPU::V_LSHRREV_B64_vi: 3856 3857 case AMDGPU::V_ASHRREV_I64_e64: 3858 case AMDGPU::V_ASHRREV_I64_gfx10: 3859 case AMDGPU::V_ASHRREV_I64_vi: 3860 3861 case AMDGPU::V_PK_LSHLREV_B16: 3862 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3863 case AMDGPU::V_PK_LSHLREV_B16_vi: 3864 3865 case AMDGPU::V_PK_LSHRREV_B16: 3866 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3867 case AMDGPU::V_PK_LSHRREV_B16_vi: 3868 case AMDGPU::V_PK_ASHRREV_I16: 3869 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3870 case AMDGPU::V_PK_ASHRREV_I16_vi: 3871 return true; 3872 default: 3873 return false; 3874 } 3875 } 3876 3877 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3878 3879 using namespace SIInstrFlags; 3880 const unsigned Opcode = Inst.getOpcode(); 3881 const MCInstrDesc &Desc = MII.get(Opcode); 3882 3883 // lds_direct register is defined so that it can be used 3884 // with 9-bit operands only. Ignore encodings which do not accept these. 3885 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3886 if ((Desc.TSFlags & Enc) == 0) 3887 return None; 3888 3889 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3890 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3891 if (SrcIdx == -1) 3892 break; 3893 const auto &Src = Inst.getOperand(SrcIdx); 3894 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3895 3896 if (isGFX90A()) 3897 return StringRef("lds_direct is not supported on this GPU"); 3898 3899 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3900 return StringRef("lds_direct cannot be used with this instruction"); 3901 3902 if (SrcName != OpName::src0) 3903 return StringRef("lds_direct may be used as src0 only"); 3904 } 3905 } 3906 3907 return None; 3908 } 3909 3910 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3911 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3912 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3913 if (Op.isFlatOffset()) 3914 return Op.getStartLoc(); 3915 } 3916 return getLoc(); 3917 } 3918 3919 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3920 const OperandVector &Operands) { 3921 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3922 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3923 return true; 3924 3925 auto Opcode = Inst.getOpcode(); 3926 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3927 assert(OpNum != -1); 3928 3929 const auto &Op = Inst.getOperand(OpNum); 3930 if (!hasFlatOffsets() && Op.getImm() != 0) { 3931 Error(getFlatOffsetLoc(Operands), 3932 "flat offset modifier is not supported on this GPU"); 3933 return false; 3934 } 3935 3936 // For FLAT segment the offset must be positive; 3937 // MSB is ignored and forced to zero. 3938 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3939 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3940 if (!isIntN(OffsetSize, Op.getImm())) { 3941 Error(getFlatOffsetLoc(Operands), 3942 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3943 return false; 3944 } 3945 } else { 3946 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3947 if (!isUIntN(OffsetSize, Op.getImm())) { 3948 Error(getFlatOffsetLoc(Operands), 3949 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3950 return false; 3951 } 3952 } 3953 3954 return true; 3955 } 3956 3957 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3958 // Start with second operand because SMEM Offset cannot be dst or src0. 3959 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3960 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3961 if (Op.isSMEMOffset()) 3962 return Op.getStartLoc(); 3963 } 3964 return getLoc(); 3965 } 3966 3967 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3968 const OperandVector &Operands) { 3969 if (isCI() || isSI()) 3970 return true; 3971 3972 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3973 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3974 return true; 3975 3976 auto Opcode = Inst.getOpcode(); 3977 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3978 if (OpNum == -1) 3979 return true; 3980 3981 const auto &Op = Inst.getOperand(OpNum); 3982 if (!Op.isImm()) 3983 return true; 3984 3985 uint64_t Offset = Op.getImm(); 3986 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3987 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3988 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3989 return true; 3990 3991 Error(getSMEMOffsetLoc(Operands), 3992 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3993 "expected a 21-bit signed offset"); 3994 3995 return false; 3996 } 3997 3998 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3999 unsigned Opcode = Inst.getOpcode(); 4000 const MCInstrDesc &Desc = MII.get(Opcode); 4001 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4002 return true; 4003 4004 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4005 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4006 4007 const int OpIndices[] = { Src0Idx, Src1Idx }; 4008 4009 unsigned NumExprs = 0; 4010 unsigned NumLiterals = 0; 4011 uint32_t LiteralValue; 4012 4013 for (int OpIdx : OpIndices) { 4014 if (OpIdx == -1) break; 4015 4016 const MCOperand &MO = Inst.getOperand(OpIdx); 4017 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4018 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4019 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4020 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4021 if (NumLiterals == 0 || LiteralValue != Value) { 4022 LiteralValue = Value; 4023 ++NumLiterals; 4024 } 4025 } else if (MO.isExpr()) { 4026 ++NumExprs; 4027 } 4028 } 4029 } 4030 4031 return NumLiterals + NumExprs <= 1; 4032 } 4033 4034 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4035 const unsigned Opc = Inst.getOpcode(); 4036 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4037 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4038 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4039 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4040 4041 if (OpSel & ~3) 4042 return false; 4043 } 4044 return true; 4045 } 4046 4047 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4048 const OperandVector &Operands) { 4049 const unsigned Opc = Inst.getOpcode(); 4050 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4051 if (DppCtrlIdx < 0) 4052 return true; 4053 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4054 4055 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4056 // DPP64 is supported for row_newbcast only. 4057 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4058 if (Src0Idx >= 0 && 4059 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4060 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4061 Error(S, "64 bit dpp only supports row_newbcast"); 4062 return false; 4063 } 4064 } 4065 4066 return true; 4067 } 4068 4069 // Check if VCC register matches wavefront size 4070 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4071 auto FB = getFeatureBits(); 4072 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4073 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4074 } 4075 4076 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4077 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4078 const OperandVector &Operands) { 4079 unsigned Opcode = Inst.getOpcode(); 4080 const MCInstrDesc &Desc = MII.get(Opcode); 4081 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4082 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4083 ImmIdx == -1) 4084 return true; 4085 4086 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4087 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4088 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4089 4090 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4091 4092 unsigned NumExprs = 0; 4093 unsigned NumLiterals = 0; 4094 uint32_t LiteralValue; 4095 4096 for (int OpIdx : OpIndices) { 4097 if (OpIdx == -1) 4098 continue; 4099 4100 const MCOperand &MO = Inst.getOperand(OpIdx); 4101 if (!MO.isImm() && !MO.isExpr()) 4102 continue; 4103 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4104 continue; 4105 4106 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4107 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4108 Error(getConstLoc(Operands), 4109 "inline constants are not allowed for this operand"); 4110 return false; 4111 } 4112 4113 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4114 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4115 if (NumLiterals == 0 || LiteralValue != Value) { 4116 LiteralValue = Value; 4117 ++NumLiterals; 4118 } 4119 } else if (MO.isExpr()) { 4120 ++NumExprs; 4121 } 4122 } 4123 NumLiterals += NumExprs; 4124 4125 if (!NumLiterals) 4126 return true; 4127 4128 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4129 Error(getLitLoc(Operands), "literal operands are not supported"); 4130 return false; 4131 } 4132 4133 if (NumLiterals > 1) { 4134 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4135 return false; 4136 } 4137 4138 return true; 4139 } 4140 4141 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4142 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4143 const MCRegisterInfo *MRI) { 4144 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4145 if (OpIdx < 0) 4146 return -1; 4147 4148 const MCOperand &Op = Inst.getOperand(OpIdx); 4149 if (!Op.isReg()) 4150 return -1; 4151 4152 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4153 auto Reg = Sub ? Sub : Op.getReg(); 4154 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4155 return AGPR32.contains(Reg) ? 1 : 0; 4156 } 4157 4158 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4159 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4160 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4161 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4162 SIInstrFlags::DS)) == 0) 4163 return true; 4164 4165 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4166 : AMDGPU::OpName::vdata; 4167 4168 const MCRegisterInfo *MRI = getMRI(); 4169 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4170 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4171 4172 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4173 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4174 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4175 return false; 4176 } 4177 4178 auto FB = getFeatureBits(); 4179 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4180 if (DataAreg < 0 || DstAreg < 0) 4181 return true; 4182 return DstAreg == DataAreg; 4183 } 4184 4185 return DstAreg < 1 && DataAreg < 1; 4186 } 4187 4188 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4189 auto FB = getFeatureBits(); 4190 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4191 return true; 4192 4193 const MCRegisterInfo *MRI = getMRI(); 4194 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4195 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4196 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4197 const MCOperand &Op = Inst.getOperand(I); 4198 if (!Op.isReg()) 4199 continue; 4200 4201 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4202 if (!Sub) 4203 continue; 4204 4205 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4206 return false; 4207 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4208 return false; 4209 } 4210 4211 return true; 4212 } 4213 4214 // gfx90a has an undocumented limitation: 4215 // DS_GWS opcodes must use even aligned registers. 4216 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4217 const OperandVector &Operands) { 4218 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4219 return true; 4220 4221 int Opc = Inst.getOpcode(); 4222 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4223 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4224 return true; 4225 4226 const MCRegisterInfo *MRI = getMRI(); 4227 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4228 int Data0Pos = 4229 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4230 assert(Data0Pos != -1); 4231 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4232 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4233 if (RegIdx & 1) { 4234 SMLoc RegLoc = getRegLoc(Reg, Operands); 4235 Error(RegLoc, "vgpr must be even aligned"); 4236 return false; 4237 } 4238 4239 return true; 4240 } 4241 4242 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4243 const OperandVector &Operands, 4244 const SMLoc &IDLoc) { 4245 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4246 AMDGPU::OpName::cpol); 4247 if (CPolPos == -1) 4248 return true; 4249 4250 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4251 4252 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4253 if ((TSFlags & (SIInstrFlags::SMRD)) && 4254 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4255 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4256 return false; 4257 } 4258 4259 if (isGFX90A() && (CPol & CPol::SCC)) { 4260 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4261 StringRef CStr(S.getPointer()); 4262 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4263 Error(S, "scc is not supported on this GPU"); 4264 return false; 4265 } 4266 4267 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4268 return true; 4269 4270 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4271 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4272 Error(IDLoc, "instruction must use glc"); 4273 return false; 4274 } 4275 } else { 4276 if (CPol & CPol::GLC) { 4277 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4278 StringRef CStr(S.getPointer()); 4279 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4280 Error(S, "instruction must not use glc"); 4281 return false; 4282 } 4283 } 4284 4285 return true; 4286 } 4287 4288 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4289 const SMLoc &IDLoc, 4290 const OperandVector &Operands) { 4291 if (auto ErrMsg = validateLdsDirect(Inst)) { 4292 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4293 return false; 4294 } 4295 if (!validateSOPLiteral(Inst)) { 4296 Error(getLitLoc(Operands), 4297 "only one literal operand is allowed"); 4298 return false; 4299 } 4300 if (!validateVOPLiteral(Inst, Operands)) { 4301 return false; 4302 } 4303 if (!validateConstantBusLimitations(Inst, Operands)) { 4304 return false; 4305 } 4306 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4307 return false; 4308 } 4309 if (!validateIntClampSupported(Inst)) { 4310 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4311 "integer clamping is not supported on this GPU"); 4312 return false; 4313 } 4314 if (!validateOpSel(Inst)) { 4315 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4316 "invalid op_sel operand"); 4317 return false; 4318 } 4319 if (!validateDPP(Inst, Operands)) { 4320 return false; 4321 } 4322 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4323 if (!validateMIMGD16(Inst)) { 4324 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4325 "d16 modifier is not supported on this GPU"); 4326 return false; 4327 } 4328 if (!validateMIMGDim(Inst)) { 4329 Error(IDLoc, "dim modifier is required on this GPU"); 4330 return false; 4331 } 4332 if (!validateMIMGMSAA(Inst)) { 4333 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4334 "invalid dim; must be MSAA type"); 4335 return false; 4336 } 4337 if (!validateMIMGDataSize(Inst)) { 4338 Error(IDLoc, 4339 "image data size does not match dmask and tfe"); 4340 return false; 4341 } 4342 if (!validateMIMGAddrSize(Inst)) { 4343 Error(IDLoc, 4344 "image address size does not match dim and a16"); 4345 return false; 4346 } 4347 if (!validateMIMGAtomicDMask(Inst)) { 4348 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4349 "invalid atomic image dmask"); 4350 return false; 4351 } 4352 if (!validateMIMGGatherDMask(Inst)) { 4353 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4354 "invalid image_gather dmask: only one bit must be set"); 4355 return false; 4356 } 4357 if (!validateMovrels(Inst, Operands)) { 4358 return false; 4359 } 4360 if (!validateFlatOffset(Inst, Operands)) { 4361 return false; 4362 } 4363 if (!validateSMEMOffset(Inst, Operands)) { 4364 return false; 4365 } 4366 if (!validateMAIAccWrite(Inst, Operands)) { 4367 return false; 4368 } 4369 if (!validateMFMA(Inst, Operands)) { 4370 return false; 4371 } 4372 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4373 return false; 4374 } 4375 4376 if (!validateAGPRLdSt(Inst)) { 4377 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4378 ? "invalid register class: data and dst should be all VGPR or AGPR" 4379 : "invalid register class: agpr loads and stores not supported on this GPU" 4380 ); 4381 return false; 4382 } 4383 if (!validateVGPRAlign(Inst)) { 4384 Error(IDLoc, 4385 "invalid register class: vgpr tuples must be 64 bit aligned"); 4386 return false; 4387 } 4388 if (!validateGWS(Inst, Operands)) { 4389 return false; 4390 } 4391 4392 if (!validateDivScale(Inst)) { 4393 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4394 return false; 4395 } 4396 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4397 return false; 4398 } 4399 4400 return true; 4401 } 4402 4403 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4404 const FeatureBitset &FBS, 4405 unsigned VariantID = 0); 4406 4407 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4408 const FeatureBitset &AvailableFeatures, 4409 unsigned VariantID); 4410 4411 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4412 const FeatureBitset &FBS) { 4413 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4414 } 4415 4416 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4417 const FeatureBitset &FBS, 4418 ArrayRef<unsigned> Variants) { 4419 for (auto Variant : Variants) { 4420 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4421 return true; 4422 } 4423 4424 return false; 4425 } 4426 4427 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4428 const SMLoc &IDLoc) { 4429 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4430 4431 // Check if requested instruction variant is supported. 4432 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4433 return false; 4434 4435 // This instruction is not supported. 4436 // Clear any other pending errors because they are no longer relevant. 4437 getParser().clearPendingErrors(); 4438 4439 // Requested instruction variant is not supported. 4440 // Check if any other variants are supported. 4441 StringRef VariantName = getMatchedVariantName(); 4442 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4443 return Error(IDLoc, 4444 Twine(VariantName, 4445 " variant of this instruction is not supported")); 4446 } 4447 4448 // Finally check if this instruction is supported on any other GPU. 4449 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4450 return Error(IDLoc, "instruction not supported on this GPU"); 4451 } 4452 4453 // Instruction not supported on any GPU. Probably a typo. 4454 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4455 return Error(IDLoc, "invalid instruction" + Suggestion); 4456 } 4457 4458 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4459 OperandVector &Operands, 4460 MCStreamer &Out, 4461 uint64_t &ErrorInfo, 4462 bool MatchingInlineAsm) { 4463 MCInst Inst; 4464 unsigned Result = Match_Success; 4465 for (auto Variant : getMatchedVariants()) { 4466 uint64_t EI; 4467 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4468 Variant); 4469 // We order match statuses from least to most specific. We use most specific 4470 // status as resulting 4471 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4472 if ((R == Match_Success) || 4473 (R == Match_PreferE32) || 4474 (R == Match_MissingFeature && Result != Match_PreferE32) || 4475 (R == Match_InvalidOperand && Result != Match_MissingFeature 4476 && Result != Match_PreferE32) || 4477 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4478 && Result != Match_MissingFeature 4479 && Result != Match_PreferE32)) { 4480 Result = R; 4481 ErrorInfo = EI; 4482 } 4483 if (R == Match_Success) 4484 break; 4485 } 4486 4487 if (Result == Match_Success) { 4488 if (!validateInstruction(Inst, IDLoc, Operands)) { 4489 return true; 4490 } 4491 Inst.setLoc(IDLoc); 4492 Out.emitInstruction(Inst, getSTI()); 4493 return false; 4494 } 4495 4496 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4497 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4498 return true; 4499 } 4500 4501 switch (Result) { 4502 default: break; 4503 case Match_MissingFeature: 4504 // It has been verified that the specified instruction 4505 // mnemonic is valid. A match was found but it requires 4506 // features which are not supported on this GPU. 4507 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4508 4509 case Match_InvalidOperand: { 4510 SMLoc ErrorLoc = IDLoc; 4511 if (ErrorInfo != ~0ULL) { 4512 if (ErrorInfo >= Operands.size()) { 4513 return Error(IDLoc, "too few operands for instruction"); 4514 } 4515 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4516 if (ErrorLoc == SMLoc()) 4517 ErrorLoc = IDLoc; 4518 } 4519 return Error(ErrorLoc, "invalid operand for instruction"); 4520 } 4521 4522 case Match_PreferE32: 4523 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4524 "should be encoded as e32"); 4525 case Match_MnemonicFail: 4526 llvm_unreachable("Invalid instructions should have been handled already"); 4527 } 4528 llvm_unreachable("Implement any new match types added!"); 4529 } 4530 4531 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4532 int64_t Tmp = -1; 4533 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4534 return true; 4535 } 4536 if (getParser().parseAbsoluteExpression(Tmp)) { 4537 return true; 4538 } 4539 Ret = static_cast<uint32_t>(Tmp); 4540 return false; 4541 } 4542 4543 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4544 uint32_t &Minor) { 4545 if (ParseAsAbsoluteExpression(Major)) 4546 return TokError("invalid major version"); 4547 4548 if (!trySkipToken(AsmToken::Comma)) 4549 return TokError("minor version number required, comma expected"); 4550 4551 if (ParseAsAbsoluteExpression(Minor)) 4552 return TokError("invalid minor version"); 4553 4554 return false; 4555 } 4556 4557 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4558 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4559 return TokError("directive only supported for amdgcn architecture"); 4560 4561 std::string TargetIDDirective; 4562 SMLoc TargetStart = getTok().getLoc(); 4563 if (getParser().parseEscapedString(TargetIDDirective)) 4564 return true; 4565 4566 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4567 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4568 return getParser().Error(TargetRange.Start, 4569 (Twine(".amdgcn_target directive's target id ") + 4570 Twine(TargetIDDirective) + 4571 Twine(" does not match the specified target id ") + 4572 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4573 4574 return false; 4575 } 4576 4577 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4578 return Error(Range.Start, "value out of range", Range); 4579 } 4580 4581 bool AMDGPUAsmParser::calculateGPRBlocks( 4582 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4583 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4584 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4585 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4586 // TODO(scott.linder): These calculations are duplicated from 4587 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4588 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4589 4590 unsigned NumVGPRs = NextFreeVGPR; 4591 unsigned NumSGPRs = NextFreeSGPR; 4592 4593 if (Version.Major >= 10) 4594 NumSGPRs = 0; 4595 else { 4596 unsigned MaxAddressableNumSGPRs = 4597 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4598 4599 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4600 NumSGPRs > MaxAddressableNumSGPRs) 4601 return OutOfRangeError(SGPRRange); 4602 4603 NumSGPRs += 4604 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4605 4606 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4607 NumSGPRs > MaxAddressableNumSGPRs) 4608 return OutOfRangeError(SGPRRange); 4609 4610 if (Features.test(FeatureSGPRInitBug)) 4611 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4612 } 4613 4614 VGPRBlocks = 4615 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4616 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4617 4618 return false; 4619 } 4620 4621 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4622 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4623 return TokError("directive only supported for amdgcn architecture"); 4624 4625 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4626 return TokError("directive only supported for amdhsa OS"); 4627 4628 StringRef KernelName; 4629 if (getParser().parseIdentifier(KernelName)) 4630 return true; 4631 4632 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4633 4634 StringSet<> Seen; 4635 4636 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4637 4638 SMRange VGPRRange; 4639 uint64_t NextFreeVGPR = 0; 4640 uint64_t AccumOffset = 0; 4641 SMRange SGPRRange; 4642 uint64_t NextFreeSGPR = 0; 4643 4644 // Count the number of user SGPRs implied from the enabled feature bits. 4645 unsigned ImpliedUserSGPRCount = 0; 4646 4647 // Track if the asm explicitly contains the directive for the user SGPR 4648 // count. 4649 Optional<unsigned> ExplicitUserSGPRCount; 4650 bool ReserveVCC = true; 4651 bool ReserveFlatScr = true; 4652 Optional<bool> EnableWavefrontSize32; 4653 4654 while (true) { 4655 while (trySkipToken(AsmToken::EndOfStatement)); 4656 4657 StringRef ID; 4658 SMRange IDRange = getTok().getLocRange(); 4659 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4660 return true; 4661 4662 if (ID == ".end_amdhsa_kernel") 4663 break; 4664 4665 if (Seen.find(ID) != Seen.end()) 4666 return TokError(".amdhsa_ directives cannot be repeated"); 4667 Seen.insert(ID); 4668 4669 SMLoc ValStart = getLoc(); 4670 int64_t IVal; 4671 if (getParser().parseAbsoluteExpression(IVal)) 4672 return true; 4673 SMLoc ValEnd = getLoc(); 4674 SMRange ValRange = SMRange(ValStart, ValEnd); 4675 4676 if (IVal < 0) 4677 return OutOfRangeError(ValRange); 4678 4679 uint64_t Val = IVal; 4680 4681 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4682 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4683 return OutOfRangeError(RANGE); \ 4684 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4685 4686 if (ID == ".amdhsa_group_segment_fixed_size") { 4687 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4688 return OutOfRangeError(ValRange); 4689 KD.group_segment_fixed_size = Val; 4690 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4691 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4692 return OutOfRangeError(ValRange); 4693 KD.private_segment_fixed_size = Val; 4694 } else if (ID == ".amdhsa_kernarg_size") { 4695 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4696 return OutOfRangeError(ValRange); 4697 KD.kernarg_size = Val; 4698 } else if (ID == ".amdhsa_user_sgpr_count") { 4699 ExplicitUserSGPRCount = Val; 4700 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4701 if (hasArchitectedFlatScratch()) 4702 return Error(IDRange.Start, 4703 "directive is not supported with architected flat scratch", 4704 IDRange); 4705 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4706 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4707 Val, ValRange); 4708 if (Val) 4709 ImpliedUserSGPRCount += 4; 4710 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4711 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4712 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4713 ValRange); 4714 if (Val) 4715 ImpliedUserSGPRCount += 2; 4716 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4717 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4718 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4719 ValRange); 4720 if (Val) 4721 ImpliedUserSGPRCount += 2; 4722 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4723 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4724 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4725 Val, ValRange); 4726 if (Val) 4727 ImpliedUserSGPRCount += 2; 4728 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4729 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4730 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4731 ValRange); 4732 if (Val) 4733 ImpliedUserSGPRCount += 2; 4734 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4735 if (hasArchitectedFlatScratch()) 4736 return Error(IDRange.Start, 4737 "directive is not supported with architected flat scratch", 4738 IDRange); 4739 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4740 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4741 ValRange); 4742 if (Val) 4743 ImpliedUserSGPRCount += 2; 4744 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4745 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4747 Val, ValRange); 4748 if (Val) 4749 ImpliedUserSGPRCount += 1; 4750 } else if (ID == ".amdhsa_wavefront_size32") { 4751 if (IVersion.Major < 10) 4752 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4753 EnableWavefrontSize32 = Val; 4754 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4755 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4756 Val, ValRange); 4757 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4758 if (hasArchitectedFlatScratch()) 4759 return Error(IDRange.Start, 4760 "directive is not supported with architected flat scratch", 4761 IDRange); 4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4763 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4764 } else if (ID == ".amdhsa_enable_private_segment") { 4765 if (!hasArchitectedFlatScratch()) 4766 return Error( 4767 IDRange.Start, 4768 "directive is not supported without architected flat scratch", 4769 IDRange); 4770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4771 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4772 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4773 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4774 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4775 ValRange); 4776 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4778 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4779 ValRange); 4780 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4782 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4783 ValRange); 4784 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4786 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4787 ValRange); 4788 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4790 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4791 ValRange); 4792 } else if (ID == ".amdhsa_next_free_vgpr") { 4793 VGPRRange = ValRange; 4794 NextFreeVGPR = Val; 4795 } else if (ID == ".amdhsa_next_free_sgpr") { 4796 SGPRRange = ValRange; 4797 NextFreeSGPR = Val; 4798 } else if (ID == ".amdhsa_accum_offset") { 4799 if (!isGFX90A()) 4800 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4801 AccumOffset = Val; 4802 } else if (ID == ".amdhsa_reserve_vcc") { 4803 if (!isUInt<1>(Val)) 4804 return OutOfRangeError(ValRange); 4805 ReserveVCC = Val; 4806 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4807 if (IVersion.Major < 7) 4808 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4809 if (hasArchitectedFlatScratch()) 4810 return Error(IDRange.Start, 4811 "directive is not supported with architected flat scratch", 4812 IDRange); 4813 if (!isUInt<1>(Val)) 4814 return OutOfRangeError(ValRange); 4815 ReserveFlatScr = Val; 4816 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4817 if (IVersion.Major < 8) 4818 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4819 if (!isUInt<1>(Val)) 4820 return OutOfRangeError(ValRange); 4821 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4822 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4823 IDRange); 4824 } else if (ID == ".amdhsa_float_round_mode_32") { 4825 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4826 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4827 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4828 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4829 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4830 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4831 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4832 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4833 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4834 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4835 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4836 ValRange); 4837 } else if (ID == ".amdhsa_dx10_clamp") { 4838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4839 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4840 } else if (ID == ".amdhsa_ieee_mode") { 4841 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4842 Val, ValRange); 4843 } else if (ID == ".amdhsa_fp16_overflow") { 4844 if (IVersion.Major < 9) 4845 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4846 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4847 ValRange); 4848 } else if (ID == ".amdhsa_tg_split") { 4849 if (!isGFX90A()) 4850 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4852 ValRange); 4853 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4854 if (IVersion.Major < 10) 4855 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4857 ValRange); 4858 } else if (ID == ".amdhsa_memory_ordered") { 4859 if (IVersion.Major < 10) 4860 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4862 ValRange); 4863 } else if (ID == ".amdhsa_forward_progress") { 4864 if (IVersion.Major < 10) 4865 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4866 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4867 ValRange); 4868 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4869 PARSE_BITS_ENTRY( 4870 KD.compute_pgm_rsrc2, 4871 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4872 ValRange); 4873 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4874 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4875 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4876 Val, ValRange); 4877 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4878 PARSE_BITS_ENTRY( 4879 KD.compute_pgm_rsrc2, 4880 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4881 ValRange); 4882 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4883 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4884 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4885 Val, ValRange); 4886 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4887 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4888 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4889 Val, ValRange); 4890 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4891 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4892 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4893 Val, ValRange); 4894 } else if (ID == ".amdhsa_exception_int_div_zero") { 4895 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4896 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4897 Val, ValRange); 4898 } else { 4899 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4900 } 4901 4902 #undef PARSE_BITS_ENTRY 4903 } 4904 4905 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4906 return TokError(".amdhsa_next_free_vgpr directive is required"); 4907 4908 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4909 return TokError(".amdhsa_next_free_sgpr directive is required"); 4910 4911 unsigned VGPRBlocks; 4912 unsigned SGPRBlocks; 4913 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4914 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4915 EnableWavefrontSize32, NextFreeVGPR, 4916 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4917 SGPRBlocks)) 4918 return true; 4919 4920 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4921 VGPRBlocks)) 4922 return OutOfRangeError(VGPRRange); 4923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4924 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4925 4926 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4927 SGPRBlocks)) 4928 return OutOfRangeError(SGPRRange); 4929 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4930 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4931 SGPRBlocks); 4932 4933 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4934 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4935 "enabled user SGPRs"); 4936 4937 unsigned UserSGPRCount = 4938 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4939 4940 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4941 return TokError("too many user SGPRs enabled"); 4942 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4943 UserSGPRCount); 4944 4945 if (isGFX90A()) { 4946 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4947 return TokError(".amdhsa_accum_offset directive is required"); 4948 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4949 return TokError("accum_offset should be in range [4..256] in " 4950 "increments of 4"); 4951 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4952 return TokError("accum_offset exceeds total VGPR allocation"); 4953 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4954 (AccumOffset / 4 - 1)); 4955 } 4956 4957 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4958 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4959 ReserveFlatScr); 4960 return false; 4961 } 4962 4963 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4964 uint32_t Major; 4965 uint32_t Minor; 4966 4967 if (ParseDirectiveMajorMinor(Major, Minor)) 4968 return true; 4969 4970 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4971 return false; 4972 } 4973 4974 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4975 uint32_t Major; 4976 uint32_t Minor; 4977 uint32_t Stepping; 4978 StringRef VendorName; 4979 StringRef ArchName; 4980 4981 // If this directive has no arguments, then use the ISA version for the 4982 // targeted GPU. 4983 if (isToken(AsmToken::EndOfStatement)) { 4984 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4985 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4986 ISA.Stepping, 4987 "AMD", "AMDGPU"); 4988 return false; 4989 } 4990 4991 if (ParseDirectiveMajorMinor(Major, Minor)) 4992 return true; 4993 4994 if (!trySkipToken(AsmToken::Comma)) 4995 return TokError("stepping version number required, comma expected"); 4996 4997 if (ParseAsAbsoluteExpression(Stepping)) 4998 return TokError("invalid stepping version"); 4999 5000 if (!trySkipToken(AsmToken::Comma)) 5001 return TokError("vendor name required, comma expected"); 5002 5003 if (!parseString(VendorName, "invalid vendor name")) 5004 return true; 5005 5006 if (!trySkipToken(AsmToken::Comma)) 5007 return TokError("arch name required, comma expected"); 5008 5009 if (!parseString(ArchName, "invalid arch name")) 5010 return true; 5011 5012 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5013 VendorName, ArchName); 5014 return false; 5015 } 5016 5017 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5018 amd_kernel_code_t &Header) { 5019 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5020 // assembly for backwards compatibility. 5021 if (ID == "max_scratch_backing_memory_byte_size") { 5022 Parser.eatToEndOfStatement(); 5023 return false; 5024 } 5025 5026 SmallString<40> ErrStr; 5027 raw_svector_ostream Err(ErrStr); 5028 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5029 return TokError(Err.str()); 5030 } 5031 Lex(); 5032 5033 if (ID == "enable_wavefront_size32") { 5034 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5035 if (!isGFX10Plus()) 5036 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5037 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5038 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5039 } else { 5040 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5041 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5042 } 5043 } 5044 5045 if (ID == "wavefront_size") { 5046 if (Header.wavefront_size == 5) { 5047 if (!isGFX10Plus()) 5048 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5049 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5050 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5051 } else if (Header.wavefront_size == 6) { 5052 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5053 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5054 } 5055 } 5056 5057 if (ID == "enable_wgp_mode") { 5058 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5059 !isGFX10Plus()) 5060 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5061 } 5062 5063 if (ID == "enable_mem_ordered") { 5064 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5065 !isGFX10Plus()) 5066 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5067 } 5068 5069 if (ID == "enable_fwd_progress") { 5070 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5071 !isGFX10Plus()) 5072 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5073 } 5074 5075 return false; 5076 } 5077 5078 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5079 amd_kernel_code_t Header; 5080 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5081 5082 while (true) { 5083 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5084 // will set the current token to EndOfStatement. 5085 while(trySkipToken(AsmToken::EndOfStatement)); 5086 5087 StringRef ID; 5088 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5089 return true; 5090 5091 if (ID == ".end_amd_kernel_code_t") 5092 break; 5093 5094 if (ParseAMDKernelCodeTValue(ID, Header)) 5095 return true; 5096 } 5097 5098 getTargetStreamer().EmitAMDKernelCodeT(Header); 5099 5100 return false; 5101 } 5102 5103 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5104 StringRef KernelName; 5105 if (!parseId(KernelName, "expected symbol name")) 5106 return true; 5107 5108 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5109 ELF::STT_AMDGPU_HSA_KERNEL); 5110 5111 KernelScope.initialize(getContext()); 5112 return false; 5113 } 5114 5115 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5116 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5117 return Error(getLoc(), 5118 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5119 "architectures"); 5120 } 5121 5122 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5123 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5124 return Error(getParser().getTok().getLoc(), "target id must match options"); 5125 5126 getTargetStreamer().EmitISAVersion(); 5127 Lex(); 5128 5129 return false; 5130 } 5131 5132 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5133 const char *AssemblerDirectiveBegin; 5134 const char *AssemblerDirectiveEnd; 5135 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5136 isHsaAbiVersion3AndAbove(&getSTI()) 5137 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5138 HSAMD::V3::AssemblerDirectiveEnd) 5139 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5140 HSAMD::AssemblerDirectiveEnd); 5141 5142 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5143 return Error(getLoc(), 5144 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5145 "not available on non-amdhsa OSes")).str()); 5146 } 5147 5148 std::string HSAMetadataString; 5149 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5150 HSAMetadataString)) 5151 return true; 5152 5153 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5154 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5155 return Error(getLoc(), "invalid HSA metadata"); 5156 } else { 5157 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5158 return Error(getLoc(), "invalid HSA metadata"); 5159 } 5160 5161 return false; 5162 } 5163 5164 /// Common code to parse out a block of text (typically YAML) between start and 5165 /// end directives. 5166 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5167 const char *AssemblerDirectiveEnd, 5168 std::string &CollectString) { 5169 5170 raw_string_ostream CollectStream(CollectString); 5171 5172 getLexer().setSkipSpace(false); 5173 5174 bool FoundEnd = false; 5175 while (!isToken(AsmToken::Eof)) { 5176 while (isToken(AsmToken::Space)) { 5177 CollectStream << getTokenStr(); 5178 Lex(); 5179 } 5180 5181 if (trySkipId(AssemblerDirectiveEnd)) { 5182 FoundEnd = true; 5183 break; 5184 } 5185 5186 CollectStream << Parser.parseStringToEndOfStatement() 5187 << getContext().getAsmInfo()->getSeparatorString(); 5188 5189 Parser.eatToEndOfStatement(); 5190 } 5191 5192 getLexer().setSkipSpace(true); 5193 5194 if (isToken(AsmToken::Eof) && !FoundEnd) { 5195 return TokError(Twine("expected directive ") + 5196 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5197 } 5198 5199 CollectStream.flush(); 5200 return false; 5201 } 5202 5203 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5204 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5205 std::string String; 5206 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5207 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5208 return true; 5209 5210 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5211 if (!PALMetadata->setFromString(String)) 5212 return Error(getLoc(), "invalid PAL metadata"); 5213 return false; 5214 } 5215 5216 /// Parse the assembler directive for old linear-format PAL metadata. 5217 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5218 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5219 return Error(getLoc(), 5220 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5221 "not available on non-amdpal OSes")).str()); 5222 } 5223 5224 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5225 PALMetadata->setLegacy(); 5226 for (;;) { 5227 uint32_t Key, Value; 5228 if (ParseAsAbsoluteExpression(Key)) { 5229 return TokError(Twine("invalid value in ") + 5230 Twine(PALMD::AssemblerDirective)); 5231 } 5232 if (!trySkipToken(AsmToken::Comma)) { 5233 return TokError(Twine("expected an even number of values in ") + 5234 Twine(PALMD::AssemblerDirective)); 5235 } 5236 if (ParseAsAbsoluteExpression(Value)) { 5237 return TokError(Twine("invalid value in ") + 5238 Twine(PALMD::AssemblerDirective)); 5239 } 5240 PALMetadata->setRegister(Key, Value); 5241 if (!trySkipToken(AsmToken::Comma)) 5242 break; 5243 } 5244 return false; 5245 } 5246 5247 /// ParseDirectiveAMDGPULDS 5248 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5249 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5250 if (getParser().checkForValidSection()) 5251 return true; 5252 5253 StringRef Name; 5254 SMLoc NameLoc = getLoc(); 5255 if (getParser().parseIdentifier(Name)) 5256 return TokError("expected identifier in directive"); 5257 5258 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5259 if (parseToken(AsmToken::Comma, "expected ','")) 5260 return true; 5261 5262 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5263 5264 int64_t Size; 5265 SMLoc SizeLoc = getLoc(); 5266 if (getParser().parseAbsoluteExpression(Size)) 5267 return true; 5268 if (Size < 0) 5269 return Error(SizeLoc, "size must be non-negative"); 5270 if (Size > LocalMemorySize) 5271 return Error(SizeLoc, "size is too large"); 5272 5273 int64_t Alignment = 4; 5274 if (trySkipToken(AsmToken::Comma)) { 5275 SMLoc AlignLoc = getLoc(); 5276 if (getParser().parseAbsoluteExpression(Alignment)) 5277 return true; 5278 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5279 return Error(AlignLoc, "alignment must be a power of two"); 5280 5281 // Alignment larger than the size of LDS is possible in theory, as long 5282 // as the linker manages to place to symbol at address 0, but we do want 5283 // to make sure the alignment fits nicely into a 32-bit integer. 5284 if (Alignment >= 1u << 31) 5285 return Error(AlignLoc, "alignment is too large"); 5286 } 5287 5288 if (parseToken(AsmToken::EndOfStatement, 5289 "unexpected token in '.amdgpu_lds' directive")) 5290 return true; 5291 5292 Symbol->redefineIfPossible(); 5293 if (!Symbol->isUndefined()) 5294 return Error(NameLoc, "invalid symbol redefinition"); 5295 5296 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5297 return false; 5298 } 5299 5300 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5301 StringRef IDVal = DirectiveID.getString(); 5302 5303 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5304 if (IDVal == ".amdhsa_kernel") 5305 return ParseDirectiveAMDHSAKernel(); 5306 5307 // TODO: Restructure/combine with PAL metadata directive. 5308 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5309 return ParseDirectiveHSAMetadata(); 5310 } else { 5311 if (IDVal == ".hsa_code_object_version") 5312 return ParseDirectiveHSACodeObjectVersion(); 5313 5314 if (IDVal == ".hsa_code_object_isa") 5315 return ParseDirectiveHSACodeObjectISA(); 5316 5317 if (IDVal == ".amd_kernel_code_t") 5318 return ParseDirectiveAMDKernelCodeT(); 5319 5320 if (IDVal == ".amdgpu_hsa_kernel") 5321 return ParseDirectiveAMDGPUHsaKernel(); 5322 5323 if (IDVal == ".amd_amdgpu_isa") 5324 return ParseDirectiveISAVersion(); 5325 5326 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5327 return ParseDirectiveHSAMetadata(); 5328 } 5329 5330 if (IDVal == ".amdgcn_target") 5331 return ParseDirectiveAMDGCNTarget(); 5332 5333 if (IDVal == ".amdgpu_lds") 5334 return ParseDirectiveAMDGPULDS(); 5335 5336 if (IDVal == PALMD::AssemblerDirectiveBegin) 5337 return ParseDirectivePALMetadataBegin(); 5338 5339 if (IDVal == PALMD::AssemblerDirective) 5340 return ParseDirectivePALMetadata(); 5341 5342 return true; 5343 } 5344 5345 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5346 unsigned RegNo) { 5347 5348 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5349 R.isValid(); ++R) { 5350 if (*R == RegNo) 5351 return isGFX9Plus(); 5352 } 5353 5354 // GFX10 has 2 more SGPRs 104 and 105. 5355 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5356 R.isValid(); ++R) { 5357 if (*R == RegNo) 5358 return hasSGPR104_SGPR105(); 5359 } 5360 5361 switch (RegNo) { 5362 case AMDGPU::SRC_SHARED_BASE: 5363 case AMDGPU::SRC_SHARED_LIMIT: 5364 case AMDGPU::SRC_PRIVATE_BASE: 5365 case AMDGPU::SRC_PRIVATE_LIMIT: 5366 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5367 return isGFX9Plus(); 5368 case AMDGPU::TBA: 5369 case AMDGPU::TBA_LO: 5370 case AMDGPU::TBA_HI: 5371 case AMDGPU::TMA: 5372 case AMDGPU::TMA_LO: 5373 case AMDGPU::TMA_HI: 5374 return !isGFX9Plus(); 5375 case AMDGPU::XNACK_MASK: 5376 case AMDGPU::XNACK_MASK_LO: 5377 case AMDGPU::XNACK_MASK_HI: 5378 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5379 case AMDGPU::SGPR_NULL: 5380 return isGFX10Plus(); 5381 default: 5382 break; 5383 } 5384 5385 if (isCI()) 5386 return true; 5387 5388 if (isSI() || isGFX10Plus()) { 5389 // No flat_scr on SI. 5390 // On GFX10 flat scratch is not a valid register operand and can only be 5391 // accessed with s_setreg/s_getreg. 5392 switch (RegNo) { 5393 case AMDGPU::FLAT_SCR: 5394 case AMDGPU::FLAT_SCR_LO: 5395 case AMDGPU::FLAT_SCR_HI: 5396 return false; 5397 default: 5398 return true; 5399 } 5400 } 5401 5402 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5403 // SI/CI have. 5404 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5405 R.isValid(); ++R) { 5406 if (*R == RegNo) 5407 return hasSGPR102_SGPR103(); 5408 } 5409 5410 return true; 5411 } 5412 5413 OperandMatchResultTy 5414 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5415 OperandMode Mode) { 5416 // Try to parse with a custom parser 5417 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5418 5419 // If we successfully parsed the operand or if there as an error parsing, 5420 // we are done. 5421 // 5422 // If we are parsing after we reach EndOfStatement then this means we 5423 // are appending default values to the Operands list. This is only done 5424 // by custom parser, so we shouldn't continue on to the generic parsing. 5425 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5426 isToken(AsmToken::EndOfStatement)) 5427 return ResTy; 5428 5429 SMLoc RBraceLoc; 5430 SMLoc LBraceLoc = getLoc(); 5431 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5432 unsigned Prefix = Operands.size(); 5433 5434 for (;;) { 5435 auto Loc = getLoc(); 5436 ResTy = parseReg(Operands); 5437 if (ResTy == MatchOperand_NoMatch) 5438 Error(Loc, "expected a register"); 5439 if (ResTy != MatchOperand_Success) 5440 return MatchOperand_ParseFail; 5441 5442 RBraceLoc = getLoc(); 5443 if (trySkipToken(AsmToken::RBrac)) 5444 break; 5445 5446 if (!skipToken(AsmToken::Comma, 5447 "expected a comma or a closing square bracket")) { 5448 return MatchOperand_ParseFail; 5449 } 5450 } 5451 5452 if (Operands.size() - Prefix > 1) { 5453 Operands.insert(Operands.begin() + Prefix, 5454 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5455 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5456 } 5457 5458 return MatchOperand_Success; 5459 } 5460 5461 return parseRegOrImm(Operands); 5462 } 5463 5464 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5465 // Clear any forced encodings from the previous instruction. 5466 setForcedEncodingSize(0); 5467 setForcedDPP(false); 5468 setForcedSDWA(false); 5469 5470 if (Name.endswith("_e64")) { 5471 setForcedEncodingSize(64); 5472 return Name.substr(0, Name.size() - 4); 5473 } else if (Name.endswith("_e32")) { 5474 setForcedEncodingSize(32); 5475 return Name.substr(0, Name.size() - 4); 5476 } else if (Name.endswith("_dpp")) { 5477 setForcedDPP(true); 5478 return Name.substr(0, Name.size() - 4); 5479 } else if (Name.endswith("_sdwa")) { 5480 setForcedSDWA(true); 5481 return Name.substr(0, Name.size() - 5); 5482 } 5483 return Name; 5484 } 5485 5486 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5487 StringRef Name, 5488 SMLoc NameLoc, OperandVector &Operands) { 5489 // Add the instruction mnemonic 5490 Name = parseMnemonicSuffix(Name); 5491 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5492 5493 bool IsMIMG = Name.startswith("image_"); 5494 5495 while (!trySkipToken(AsmToken::EndOfStatement)) { 5496 OperandMode Mode = OperandMode_Default; 5497 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5498 Mode = OperandMode_NSA; 5499 CPolSeen = 0; 5500 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5501 5502 if (Res != MatchOperand_Success) { 5503 checkUnsupportedInstruction(Name, NameLoc); 5504 if (!Parser.hasPendingError()) { 5505 // FIXME: use real operand location rather than the current location. 5506 StringRef Msg = 5507 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5508 "not a valid operand."; 5509 Error(getLoc(), Msg); 5510 } 5511 while (!trySkipToken(AsmToken::EndOfStatement)) { 5512 lex(); 5513 } 5514 return true; 5515 } 5516 5517 // Eat the comma or space if there is one. 5518 trySkipToken(AsmToken::Comma); 5519 } 5520 5521 return false; 5522 } 5523 5524 //===----------------------------------------------------------------------===// 5525 // Utility functions 5526 //===----------------------------------------------------------------------===// 5527 5528 OperandMatchResultTy 5529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5530 5531 if (!trySkipId(Prefix, AsmToken::Colon)) 5532 return MatchOperand_NoMatch; 5533 5534 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5535 } 5536 5537 OperandMatchResultTy 5538 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5539 AMDGPUOperand::ImmTy ImmTy, 5540 bool (*ConvertResult)(int64_t&)) { 5541 SMLoc S = getLoc(); 5542 int64_t Value = 0; 5543 5544 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5545 if (Res != MatchOperand_Success) 5546 return Res; 5547 5548 if (ConvertResult && !ConvertResult(Value)) { 5549 Error(S, "invalid " + StringRef(Prefix) + " value."); 5550 } 5551 5552 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5553 return MatchOperand_Success; 5554 } 5555 5556 OperandMatchResultTy 5557 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5558 OperandVector &Operands, 5559 AMDGPUOperand::ImmTy ImmTy, 5560 bool (*ConvertResult)(int64_t&)) { 5561 SMLoc S = getLoc(); 5562 if (!trySkipId(Prefix, AsmToken::Colon)) 5563 return MatchOperand_NoMatch; 5564 5565 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5566 return MatchOperand_ParseFail; 5567 5568 unsigned Val = 0; 5569 const unsigned MaxSize = 4; 5570 5571 // FIXME: How to verify the number of elements matches the number of src 5572 // operands? 5573 for (int I = 0; ; ++I) { 5574 int64_t Op; 5575 SMLoc Loc = getLoc(); 5576 if (!parseExpr(Op)) 5577 return MatchOperand_ParseFail; 5578 5579 if (Op != 0 && Op != 1) { 5580 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5581 return MatchOperand_ParseFail; 5582 } 5583 5584 Val |= (Op << I); 5585 5586 if (trySkipToken(AsmToken::RBrac)) 5587 break; 5588 5589 if (I + 1 == MaxSize) { 5590 Error(getLoc(), "expected a closing square bracket"); 5591 return MatchOperand_ParseFail; 5592 } 5593 5594 if (!skipToken(AsmToken::Comma, "expected a comma")) 5595 return MatchOperand_ParseFail; 5596 } 5597 5598 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5599 return MatchOperand_Success; 5600 } 5601 5602 OperandMatchResultTy 5603 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5604 AMDGPUOperand::ImmTy ImmTy) { 5605 int64_t Bit; 5606 SMLoc S = getLoc(); 5607 5608 if (trySkipId(Name)) { 5609 Bit = 1; 5610 } else if (trySkipId("no", Name)) { 5611 Bit = 0; 5612 } else { 5613 return MatchOperand_NoMatch; 5614 } 5615 5616 if (Name == "r128" && !hasMIMG_R128()) { 5617 Error(S, "r128 modifier is not supported on this GPU"); 5618 return MatchOperand_ParseFail; 5619 } 5620 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5621 Error(S, "a16 modifier is not supported on this GPU"); 5622 return MatchOperand_ParseFail; 5623 } 5624 5625 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5626 ImmTy = AMDGPUOperand::ImmTyR128A16; 5627 5628 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5629 return MatchOperand_Success; 5630 } 5631 5632 OperandMatchResultTy 5633 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5634 unsigned CPolOn = 0; 5635 unsigned CPolOff = 0; 5636 SMLoc S = getLoc(); 5637 5638 if (trySkipId("glc")) 5639 CPolOn = AMDGPU::CPol::GLC; 5640 else if (trySkipId("noglc")) 5641 CPolOff = AMDGPU::CPol::GLC; 5642 else if (trySkipId("slc")) 5643 CPolOn = AMDGPU::CPol::SLC; 5644 else if (trySkipId("noslc")) 5645 CPolOff = AMDGPU::CPol::SLC; 5646 else if (trySkipId("dlc")) 5647 CPolOn = AMDGPU::CPol::DLC; 5648 else if (trySkipId("nodlc")) 5649 CPolOff = AMDGPU::CPol::DLC; 5650 else if (trySkipId("scc")) 5651 CPolOn = AMDGPU::CPol::SCC; 5652 else if (trySkipId("noscc")) 5653 CPolOff = AMDGPU::CPol::SCC; 5654 else 5655 return MatchOperand_NoMatch; 5656 5657 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5658 Error(S, "dlc modifier is not supported on this GPU"); 5659 return MatchOperand_ParseFail; 5660 } 5661 5662 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5663 Error(S, "scc modifier is not supported on this GPU"); 5664 return MatchOperand_ParseFail; 5665 } 5666 5667 if (CPolSeen & (CPolOn | CPolOff)) { 5668 Error(S, "duplicate cache policy modifier"); 5669 return MatchOperand_ParseFail; 5670 } 5671 5672 CPolSeen |= (CPolOn | CPolOff); 5673 5674 for (unsigned I = 1; I != Operands.size(); ++I) { 5675 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5676 if (Op.isCPol()) { 5677 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5678 return MatchOperand_Success; 5679 } 5680 } 5681 5682 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5683 AMDGPUOperand::ImmTyCPol)); 5684 5685 return MatchOperand_Success; 5686 } 5687 5688 static void addOptionalImmOperand( 5689 MCInst& Inst, const OperandVector& Operands, 5690 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5691 AMDGPUOperand::ImmTy ImmT, 5692 int64_t Default = 0) { 5693 auto i = OptionalIdx.find(ImmT); 5694 if (i != OptionalIdx.end()) { 5695 unsigned Idx = i->second; 5696 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5697 } else { 5698 Inst.addOperand(MCOperand::createImm(Default)); 5699 } 5700 } 5701 5702 OperandMatchResultTy 5703 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5704 StringRef &Value, 5705 SMLoc &StringLoc) { 5706 if (!trySkipId(Prefix, AsmToken::Colon)) 5707 return MatchOperand_NoMatch; 5708 5709 StringLoc = getLoc(); 5710 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5711 : MatchOperand_ParseFail; 5712 } 5713 5714 //===----------------------------------------------------------------------===// 5715 // MTBUF format 5716 //===----------------------------------------------------------------------===// 5717 5718 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5719 int64_t MaxVal, 5720 int64_t &Fmt) { 5721 int64_t Val; 5722 SMLoc Loc = getLoc(); 5723 5724 auto Res = parseIntWithPrefix(Pref, Val); 5725 if (Res == MatchOperand_ParseFail) 5726 return false; 5727 if (Res == MatchOperand_NoMatch) 5728 return true; 5729 5730 if (Val < 0 || Val > MaxVal) { 5731 Error(Loc, Twine("out of range ", StringRef(Pref))); 5732 return false; 5733 } 5734 5735 Fmt = Val; 5736 return true; 5737 } 5738 5739 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5740 // values to live in a joint format operand in the MCInst encoding. 5741 OperandMatchResultTy 5742 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5743 using namespace llvm::AMDGPU::MTBUFFormat; 5744 5745 int64_t Dfmt = DFMT_UNDEF; 5746 int64_t Nfmt = NFMT_UNDEF; 5747 5748 // dfmt and nfmt can appear in either order, and each is optional. 5749 for (int I = 0; I < 2; ++I) { 5750 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5751 return MatchOperand_ParseFail; 5752 5753 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5754 return MatchOperand_ParseFail; 5755 } 5756 // Skip optional comma between dfmt/nfmt 5757 // but guard against 2 commas following each other. 5758 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5759 !peekToken().is(AsmToken::Comma)) { 5760 trySkipToken(AsmToken::Comma); 5761 } 5762 } 5763 5764 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5765 return MatchOperand_NoMatch; 5766 5767 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5768 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5769 5770 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5771 return MatchOperand_Success; 5772 } 5773 5774 OperandMatchResultTy 5775 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5776 using namespace llvm::AMDGPU::MTBUFFormat; 5777 5778 int64_t Fmt = UFMT_UNDEF; 5779 5780 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5781 return MatchOperand_ParseFail; 5782 5783 if (Fmt == UFMT_UNDEF) 5784 return MatchOperand_NoMatch; 5785 5786 Format = Fmt; 5787 return MatchOperand_Success; 5788 } 5789 5790 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5791 int64_t &Nfmt, 5792 StringRef FormatStr, 5793 SMLoc Loc) { 5794 using namespace llvm::AMDGPU::MTBUFFormat; 5795 int64_t Format; 5796 5797 Format = getDfmt(FormatStr); 5798 if (Format != DFMT_UNDEF) { 5799 Dfmt = Format; 5800 return true; 5801 } 5802 5803 Format = getNfmt(FormatStr, getSTI()); 5804 if (Format != NFMT_UNDEF) { 5805 Nfmt = Format; 5806 return true; 5807 } 5808 5809 Error(Loc, "unsupported format"); 5810 return false; 5811 } 5812 5813 OperandMatchResultTy 5814 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5815 SMLoc FormatLoc, 5816 int64_t &Format) { 5817 using namespace llvm::AMDGPU::MTBUFFormat; 5818 5819 int64_t Dfmt = DFMT_UNDEF; 5820 int64_t Nfmt = NFMT_UNDEF; 5821 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5822 return MatchOperand_ParseFail; 5823 5824 if (trySkipToken(AsmToken::Comma)) { 5825 StringRef Str; 5826 SMLoc Loc = getLoc(); 5827 if (!parseId(Str, "expected a format string") || 5828 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5829 return MatchOperand_ParseFail; 5830 } 5831 if (Dfmt == DFMT_UNDEF) { 5832 Error(Loc, "duplicate numeric format"); 5833 return MatchOperand_ParseFail; 5834 } else if (Nfmt == NFMT_UNDEF) { 5835 Error(Loc, "duplicate data format"); 5836 return MatchOperand_ParseFail; 5837 } 5838 } 5839 5840 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5841 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5842 5843 if (isGFX10Plus()) { 5844 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5845 if (Ufmt == UFMT_UNDEF) { 5846 Error(FormatLoc, "unsupported format"); 5847 return MatchOperand_ParseFail; 5848 } 5849 Format = Ufmt; 5850 } else { 5851 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5852 } 5853 5854 return MatchOperand_Success; 5855 } 5856 5857 OperandMatchResultTy 5858 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5859 SMLoc Loc, 5860 int64_t &Format) { 5861 using namespace llvm::AMDGPU::MTBUFFormat; 5862 5863 auto Id = getUnifiedFormat(FormatStr); 5864 if (Id == UFMT_UNDEF) 5865 return MatchOperand_NoMatch; 5866 5867 if (!isGFX10Plus()) { 5868 Error(Loc, "unified format is not supported on this GPU"); 5869 return MatchOperand_ParseFail; 5870 } 5871 5872 Format = Id; 5873 return MatchOperand_Success; 5874 } 5875 5876 OperandMatchResultTy 5877 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5878 using namespace llvm::AMDGPU::MTBUFFormat; 5879 SMLoc Loc = getLoc(); 5880 5881 if (!parseExpr(Format)) 5882 return MatchOperand_ParseFail; 5883 if (!isValidFormatEncoding(Format, getSTI())) { 5884 Error(Loc, "out of range format"); 5885 return MatchOperand_ParseFail; 5886 } 5887 5888 return MatchOperand_Success; 5889 } 5890 5891 OperandMatchResultTy 5892 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5893 using namespace llvm::AMDGPU::MTBUFFormat; 5894 5895 if (!trySkipId("format", AsmToken::Colon)) 5896 return MatchOperand_NoMatch; 5897 5898 if (trySkipToken(AsmToken::LBrac)) { 5899 StringRef FormatStr; 5900 SMLoc Loc = getLoc(); 5901 if (!parseId(FormatStr, "expected a format string")) 5902 return MatchOperand_ParseFail; 5903 5904 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5905 if (Res == MatchOperand_NoMatch) 5906 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5907 if (Res != MatchOperand_Success) 5908 return Res; 5909 5910 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5911 return MatchOperand_ParseFail; 5912 5913 return MatchOperand_Success; 5914 } 5915 5916 return parseNumericFormat(Format); 5917 } 5918 5919 OperandMatchResultTy 5920 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5921 using namespace llvm::AMDGPU::MTBUFFormat; 5922 5923 int64_t Format = getDefaultFormatEncoding(getSTI()); 5924 OperandMatchResultTy Res; 5925 SMLoc Loc = getLoc(); 5926 5927 // Parse legacy format syntax. 5928 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5929 if (Res == MatchOperand_ParseFail) 5930 return Res; 5931 5932 bool FormatFound = (Res == MatchOperand_Success); 5933 5934 Operands.push_back( 5935 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5936 5937 if (FormatFound) 5938 trySkipToken(AsmToken::Comma); 5939 5940 if (isToken(AsmToken::EndOfStatement)) { 5941 // We are expecting an soffset operand, 5942 // but let matcher handle the error. 5943 return MatchOperand_Success; 5944 } 5945 5946 // Parse soffset. 5947 Res = parseRegOrImm(Operands); 5948 if (Res != MatchOperand_Success) 5949 return Res; 5950 5951 trySkipToken(AsmToken::Comma); 5952 5953 if (!FormatFound) { 5954 Res = parseSymbolicOrNumericFormat(Format); 5955 if (Res == MatchOperand_ParseFail) 5956 return Res; 5957 if (Res == MatchOperand_Success) { 5958 auto Size = Operands.size(); 5959 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5960 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5961 Op.setImm(Format); 5962 } 5963 return MatchOperand_Success; 5964 } 5965 5966 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5967 Error(getLoc(), "duplicate format"); 5968 return MatchOperand_ParseFail; 5969 } 5970 return MatchOperand_Success; 5971 } 5972 5973 //===----------------------------------------------------------------------===// 5974 // ds 5975 //===----------------------------------------------------------------------===// 5976 5977 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5978 const OperandVector &Operands) { 5979 OptionalImmIndexMap OptionalIdx; 5980 5981 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5982 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5983 5984 // Add the register arguments 5985 if (Op.isReg()) { 5986 Op.addRegOperands(Inst, 1); 5987 continue; 5988 } 5989 5990 // Handle optional arguments 5991 OptionalIdx[Op.getImmTy()] = i; 5992 } 5993 5994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5997 5998 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5999 } 6000 6001 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6002 bool IsGdsHardcoded) { 6003 OptionalImmIndexMap OptionalIdx; 6004 6005 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6006 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6007 6008 // Add the register arguments 6009 if (Op.isReg()) { 6010 Op.addRegOperands(Inst, 1); 6011 continue; 6012 } 6013 6014 if (Op.isToken() && Op.getToken() == "gds") { 6015 IsGdsHardcoded = true; 6016 continue; 6017 } 6018 6019 // Handle optional arguments 6020 OptionalIdx[Op.getImmTy()] = i; 6021 } 6022 6023 AMDGPUOperand::ImmTy OffsetType = 6024 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6025 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6026 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6027 AMDGPUOperand::ImmTyOffset; 6028 6029 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6030 6031 if (!IsGdsHardcoded) { 6032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6033 } 6034 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6035 } 6036 6037 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6038 OptionalImmIndexMap OptionalIdx; 6039 6040 unsigned OperandIdx[4]; 6041 unsigned EnMask = 0; 6042 int SrcIdx = 0; 6043 6044 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6045 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6046 6047 // Add the register arguments 6048 if (Op.isReg()) { 6049 assert(SrcIdx < 4); 6050 OperandIdx[SrcIdx] = Inst.size(); 6051 Op.addRegOperands(Inst, 1); 6052 ++SrcIdx; 6053 continue; 6054 } 6055 6056 if (Op.isOff()) { 6057 assert(SrcIdx < 4); 6058 OperandIdx[SrcIdx] = Inst.size(); 6059 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6060 ++SrcIdx; 6061 continue; 6062 } 6063 6064 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6065 Op.addImmOperands(Inst, 1); 6066 continue; 6067 } 6068 6069 if (Op.isToken() && Op.getToken() == "done") 6070 continue; 6071 6072 // Handle optional arguments 6073 OptionalIdx[Op.getImmTy()] = i; 6074 } 6075 6076 assert(SrcIdx == 4); 6077 6078 bool Compr = false; 6079 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6080 Compr = true; 6081 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6082 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6083 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6084 } 6085 6086 for (auto i = 0; i < SrcIdx; ++i) { 6087 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6088 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6089 } 6090 } 6091 6092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6094 6095 Inst.addOperand(MCOperand::createImm(EnMask)); 6096 } 6097 6098 //===----------------------------------------------------------------------===// 6099 // s_waitcnt 6100 //===----------------------------------------------------------------------===// 6101 6102 static bool 6103 encodeCnt( 6104 const AMDGPU::IsaVersion ISA, 6105 int64_t &IntVal, 6106 int64_t CntVal, 6107 bool Saturate, 6108 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6109 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6110 { 6111 bool Failed = false; 6112 6113 IntVal = encode(ISA, IntVal, CntVal); 6114 if (CntVal != decode(ISA, IntVal)) { 6115 if (Saturate) { 6116 IntVal = encode(ISA, IntVal, -1); 6117 } else { 6118 Failed = true; 6119 } 6120 } 6121 return Failed; 6122 } 6123 6124 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6125 6126 SMLoc CntLoc = getLoc(); 6127 StringRef CntName = getTokenStr(); 6128 6129 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6130 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6131 return false; 6132 6133 int64_t CntVal; 6134 SMLoc ValLoc = getLoc(); 6135 if (!parseExpr(CntVal)) 6136 return false; 6137 6138 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6139 6140 bool Failed = true; 6141 bool Sat = CntName.endswith("_sat"); 6142 6143 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6144 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6145 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6146 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6147 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6148 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6149 } else { 6150 Error(CntLoc, "invalid counter name " + CntName); 6151 return false; 6152 } 6153 6154 if (Failed) { 6155 Error(ValLoc, "too large value for " + CntName); 6156 return false; 6157 } 6158 6159 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6160 return false; 6161 6162 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6163 if (isToken(AsmToken::EndOfStatement)) { 6164 Error(getLoc(), "expected a counter name"); 6165 return false; 6166 } 6167 } 6168 6169 return true; 6170 } 6171 6172 OperandMatchResultTy 6173 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6174 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6175 int64_t Waitcnt = getWaitcntBitMask(ISA); 6176 SMLoc S = getLoc(); 6177 6178 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6179 while (!isToken(AsmToken::EndOfStatement)) { 6180 if (!parseCnt(Waitcnt)) 6181 return MatchOperand_ParseFail; 6182 } 6183 } else { 6184 if (!parseExpr(Waitcnt)) 6185 return MatchOperand_ParseFail; 6186 } 6187 6188 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6189 return MatchOperand_Success; 6190 } 6191 6192 bool 6193 AMDGPUOperand::isSWaitCnt() const { 6194 return isImm(); 6195 } 6196 6197 //===----------------------------------------------------------------------===// 6198 // hwreg 6199 //===----------------------------------------------------------------------===// 6200 6201 bool 6202 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6203 OperandInfoTy &Offset, 6204 OperandInfoTy &Width) { 6205 using namespace llvm::AMDGPU::Hwreg; 6206 6207 // The register may be specified by name or using a numeric code 6208 HwReg.Loc = getLoc(); 6209 if (isToken(AsmToken::Identifier) && 6210 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) { 6211 HwReg.IsSymbolic = true; 6212 lex(); // skip register name 6213 } else if (!parseExpr(HwReg.Id, "a register name")) { 6214 return false; 6215 } 6216 6217 if (trySkipToken(AsmToken::RParen)) 6218 return true; 6219 6220 // parse optional params 6221 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6222 return false; 6223 6224 Offset.Loc = getLoc(); 6225 if (!parseExpr(Offset.Id)) 6226 return false; 6227 6228 if (!skipToken(AsmToken::Comma, "expected a comma")) 6229 return false; 6230 6231 Width.Loc = getLoc(); 6232 return parseExpr(Width.Id) && 6233 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6234 } 6235 6236 bool 6237 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6238 const OperandInfoTy &Offset, 6239 const OperandInfoTy &Width) { 6240 6241 using namespace llvm::AMDGPU::Hwreg; 6242 6243 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6244 Error(HwReg.Loc, 6245 "specified hardware register is not supported on this GPU"); 6246 return false; 6247 } 6248 if (!isValidHwreg(HwReg.Id)) { 6249 Error(HwReg.Loc, 6250 "invalid code of hardware register: only 6-bit values are legal"); 6251 return false; 6252 } 6253 if (!isValidHwregOffset(Offset.Id)) { 6254 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6255 return false; 6256 } 6257 if (!isValidHwregWidth(Width.Id)) { 6258 Error(Width.Loc, 6259 "invalid bitfield width: only values from 1 to 32 are legal"); 6260 return false; 6261 } 6262 return true; 6263 } 6264 6265 OperandMatchResultTy 6266 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6267 using namespace llvm::AMDGPU::Hwreg; 6268 6269 int64_t ImmVal = 0; 6270 SMLoc Loc = getLoc(); 6271 6272 if (trySkipId("hwreg", AsmToken::LParen)) { 6273 OperandInfoTy HwReg(ID_UNKNOWN_); 6274 OperandInfoTy Offset(OFFSET_DEFAULT_); 6275 OperandInfoTy Width(WIDTH_DEFAULT_); 6276 if (parseHwregBody(HwReg, Offset, Width) && 6277 validateHwreg(HwReg, Offset, Width)) { 6278 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6279 } else { 6280 return MatchOperand_ParseFail; 6281 } 6282 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6283 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6284 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6285 return MatchOperand_ParseFail; 6286 } 6287 } else { 6288 return MatchOperand_ParseFail; 6289 } 6290 6291 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6292 return MatchOperand_Success; 6293 } 6294 6295 bool AMDGPUOperand::isHwreg() const { 6296 return isImmTy(ImmTyHwreg); 6297 } 6298 6299 //===----------------------------------------------------------------------===// 6300 // sendmsg 6301 //===----------------------------------------------------------------------===// 6302 6303 bool 6304 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6305 OperandInfoTy &Op, 6306 OperandInfoTy &Stream) { 6307 using namespace llvm::AMDGPU::SendMsg; 6308 6309 Msg.Loc = getLoc(); 6310 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6311 Msg.IsSymbolic = true; 6312 lex(); // skip message name 6313 } else if (!parseExpr(Msg.Id, "a message name")) { 6314 return false; 6315 } 6316 6317 if (trySkipToken(AsmToken::Comma)) { 6318 Op.IsDefined = true; 6319 Op.Loc = getLoc(); 6320 if (isToken(AsmToken::Identifier) && 6321 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6322 lex(); // skip operation name 6323 } else if (!parseExpr(Op.Id, "an operation name")) { 6324 return false; 6325 } 6326 6327 if (trySkipToken(AsmToken::Comma)) { 6328 Stream.IsDefined = true; 6329 Stream.Loc = getLoc(); 6330 if (!parseExpr(Stream.Id)) 6331 return false; 6332 } 6333 } 6334 6335 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6336 } 6337 6338 bool 6339 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6340 const OperandInfoTy &Op, 6341 const OperandInfoTy &Stream) { 6342 using namespace llvm::AMDGPU::SendMsg; 6343 6344 // Validation strictness depends on whether message is specified 6345 // in a symbolic or in a numeric form. In the latter case 6346 // only encoding possibility is checked. 6347 bool Strict = Msg.IsSymbolic; 6348 6349 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6350 Error(Msg.Loc, "invalid message id"); 6351 return false; 6352 } 6353 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6354 if (Op.IsDefined) { 6355 Error(Op.Loc, "message does not support operations"); 6356 } else { 6357 Error(Msg.Loc, "missing message operation"); 6358 } 6359 return false; 6360 } 6361 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6362 Error(Op.Loc, "invalid operation id"); 6363 return false; 6364 } 6365 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6366 Error(Stream.Loc, "message operation does not support streams"); 6367 return false; 6368 } 6369 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6370 Error(Stream.Loc, "invalid message stream id"); 6371 return false; 6372 } 6373 return true; 6374 } 6375 6376 OperandMatchResultTy 6377 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6378 using namespace llvm::AMDGPU::SendMsg; 6379 6380 int64_t ImmVal = 0; 6381 SMLoc Loc = getLoc(); 6382 6383 if (trySkipId("sendmsg", AsmToken::LParen)) { 6384 OperandInfoTy Msg(ID_UNKNOWN_); 6385 OperandInfoTy Op(OP_NONE_); 6386 OperandInfoTy Stream(STREAM_ID_NONE_); 6387 if (parseSendMsgBody(Msg, Op, Stream) && 6388 validateSendMsg(Msg, Op, Stream)) { 6389 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6390 } else { 6391 return MatchOperand_ParseFail; 6392 } 6393 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6394 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6395 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6396 return MatchOperand_ParseFail; 6397 } 6398 } else { 6399 return MatchOperand_ParseFail; 6400 } 6401 6402 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6403 return MatchOperand_Success; 6404 } 6405 6406 bool AMDGPUOperand::isSendMsg() const { 6407 return isImmTy(ImmTySendMsg); 6408 } 6409 6410 //===----------------------------------------------------------------------===// 6411 // v_interp 6412 //===----------------------------------------------------------------------===// 6413 6414 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6415 StringRef Str; 6416 SMLoc S = getLoc(); 6417 6418 if (!parseId(Str)) 6419 return MatchOperand_NoMatch; 6420 6421 int Slot = StringSwitch<int>(Str) 6422 .Case("p10", 0) 6423 .Case("p20", 1) 6424 .Case("p0", 2) 6425 .Default(-1); 6426 6427 if (Slot == -1) { 6428 Error(S, "invalid interpolation slot"); 6429 return MatchOperand_ParseFail; 6430 } 6431 6432 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6433 AMDGPUOperand::ImmTyInterpSlot)); 6434 return MatchOperand_Success; 6435 } 6436 6437 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6438 StringRef Str; 6439 SMLoc S = getLoc(); 6440 6441 if (!parseId(Str)) 6442 return MatchOperand_NoMatch; 6443 6444 if (!Str.startswith("attr")) { 6445 Error(S, "invalid interpolation attribute"); 6446 return MatchOperand_ParseFail; 6447 } 6448 6449 StringRef Chan = Str.take_back(2); 6450 int AttrChan = StringSwitch<int>(Chan) 6451 .Case(".x", 0) 6452 .Case(".y", 1) 6453 .Case(".z", 2) 6454 .Case(".w", 3) 6455 .Default(-1); 6456 if (AttrChan == -1) { 6457 Error(S, "invalid or missing interpolation attribute channel"); 6458 return MatchOperand_ParseFail; 6459 } 6460 6461 Str = Str.drop_back(2).drop_front(4); 6462 6463 uint8_t Attr; 6464 if (Str.getAsInteger(10, Attr)) { 6465 Error(S, "invalid or missing interpolation attribute number"); 6466 return MatchOperand_ParseFail; 6467 } 6468 6469 if (Attr > 63) { 6470 Error(S, "out of bounds interpolation attribute number"); 6471 return MatchOperand_ParseFail; 6472 } 6473 6474 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6475 6476 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6477 AMDGPUOperand::ImmTyInterpAttr)); 6478 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6479 AMDGPUOperand::ImmTyAttrChan)); 6480 return MatchOperand_Success; 6481 } 6482 6483 //===----------------------------------------------------------------------===// 6484 // exp 6485 //===----------------------------------------------------------------------===// 6486 6487 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6488 using namespace llvm::AMDGPU::Exp; 6489 6490 StringRef Str; 6491 SMLoc S = getLoc(); 6492 6493 if (!parseId(Str)) 6494 return MatchOperand_NoMatch; 6495 6496 unsigned Id = getTgtId(Str); 6497 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6498 Error(S, (Id == ET_INVALID) ? 6499 "invalid exp target" : 6500 "exp target is not supported on this GPU"); 6501 return MatchOperand_ParseFail; 6502 } 6503 6504 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6505 AMDGPUOperand::ImmTyExpTgt)); 6506 return MatchOperand_Success; 6507 } 6508 6509 //===----------------------------------------------------------------------===// 6510 // parser helpers 6511 //===----------------------------------------------------------------------===// 6512 6513 bool 6514 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6515 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6516 } 6517 6518 bool 6519 AMDGPUAsmParser::isId(const StringRef Id) const { 6520 return isId(getToken(), Id); 6521 } 6522 6523 bool 6524 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6525 return getTokenKind() == Kind; 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6530 if (isId(Id)) { 6531 lex(); 6532 return true; 6533 } 6534 return false; 6535 } 6536 6537 bool 6538 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6539 if (isToken(AsmToken::Identifier)) { 6540 StringRef Tok = getTokenStr(); 6541 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6542 lex(); 6543 return true; 6544 } 6545 } 6546 return false; 6547 } 6548 6549 bool 6550 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6551 if (isId(Id) && peekToken().is(Kind)) { 6552 lex(); 6553 lex(); 6554 return true; 6555 } 6556 return false; 6557 } 6558 6559 bool 6560 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6561 if (isToken(Kind)) { 6562 lex(); 6563 return true; 6564 } 6565 return false; 6566 } 6567 6568 bool 6569 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6570 const StringRef ErrMsg) { 6571 if (!trySkipToken(Kind)) { 6572 Error(getLoc(), ErrMsg); 6573 return false; 6574 } 6575 return true; 6576 } 6577 6578 bool 6579 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6580 SMLoc S = getLoc(); 6581 6582 const MCExpr *Expr; 6583 if (Parser.parseExpression(Expr)) 6584 return false; 6585 6586 if (Expr->evaluateAsAbsolute(Imm)) 6587 return true; 6588 6589 if (Expected.empty()) { 6590 Error(S, "expected absolute expression"); 6591 } else { 6592 Error(S, Twine("expected ", Expected) + 6593 Twine(" or an absolute expression")); 6594 } 6595 return false; 6596 } 6597 6598 bool 6599 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6600 SMLoc S = getLoc(); 6601 6602 const MCExpr *Expr; 6603 if (Parser.parseExpression(Expr)) 6604 return false; 6605 6606 int64_t IntVal; 6607 if (Expr->evaluateAsAbsolute(IntVal)) { 6608 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6609 } else { 6610 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6611 } 6612 return true; 6613 } 6614 6615 bool 6616 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6617 if (isToken(AsmToken::String)) { 6618 Val = getToken().getStringContents(); 6619 lex(); 6620 return true; 6621 } else { 6622 Error(getLoc(), ErrMsg); 6623 return false; 6624 } 6625 } 6626 6627 bool 6628 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6629 if (isToken(AsmToken::Identifier)) { 6630 Val = getTokenStr(); 6631 lex(); 6632 return true; 6633 } else { 6634 if (!ErrMsg.empty()) 6635 Error(getLoc(), ErrMsg); 6636 return false; 6637 } 6638 } 6639 6640 AsmToken 6641 AMDGPUAsmParser::getToken() const { 6642 return Parser.getTok(); 6643 } 6644 6645 AsmToken 6646 AMDGPUAsmParser::peekToken() { 6647 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6648 } 6649 6650 void 6651 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6652 auto TokCount = getLexer().peekTokens(Tokens); 6653 6654 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6655 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6656 } 6657 6658 AsmToken::TokenKind 6659 AMDGPUAsmParser::getTokenKind() const { 6660 return getLexer().getKind(); 6661 } 6662 6663 SMLoc 6664 AMDGPUAsmParser::getLoc() const { 6665 return getToken().getLoc(); 6666 } 6667 6668 StringRef 6669 AMDGPUAsmParser::getTokenStr() const { 6670 return getToken().getString(); 6671 } 6672 6673 void 6674 AMDGPUAsmParser::lex() { 6675 Parser.Lex(); 6676 } 6677 6678 SMLoc 6679 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6680 const OperandVector &Operands) const { 6681 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6682 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6683 if (Test(Op)) 6684 return Op.getStartLoc(); 6685 } 6686 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6687 } 6688 6689 SMLoc 6690 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6691 const OperandVector &Operands) const { 6692 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6693 return getOperandLoc(Test, Operands); 6694 } 6695 6696 SMLoc 6697 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6698 const OperandVector &Operands) const { 6699 auto Test = [=](const AMDGPUOperand& Op) { 6700 return Op.isRegKind() && Op.getReg() == Reg; 6701 }; 6702 return getOperandLoc(Test, Operands); 6703 } 6704 6705 SMLoc 6706 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6707 auto Test = [](const AMDGPUOperand& Op) { 6708 return Op.IsImmKindLiteral() || Op.isExpr(); 6709 }; 6710 return getOperandLoc(Test, Operands); 6711 } 6712 6713 SMLoc 6714 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6715 auto Test = [](const AMDGPUOperand& Op) { 6716 return Op.isImmKindConst(); 6717 }; 6718 return getOperandLoc(Test, Operands); 6719 } 6720 6721 //===----------------------------------------------------------------------===// 6722 // swizzle 6723 //===----------------------------------------------------------------------===// 6724 6725 LLVM_READNONE 6726 static unsigned 6727 encodeBitmaskPerm(const unsigned AndMask, 6728 const unsigned OrMask, 6729 const unsigned XorMask) { 6730 using namespace llvm::AMDGPU::Swizzle; 6731 6732 return BITMASK_PERM_ENC | 6733 (AndMask << BITMASK_AND_SHIFT) | 6734 (OrMask << BITMASK_OR_SHIFT) | 6735 (XorMask << BITMASK_XOR_SHIFT); 6736 } 6737 6738 bool 6739 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6740 const unsigned MinVal, 6741 const unsigned MaxVal, 6742 const StringRef ErrMsg, 6743 SMLoc &Loc) { 6744 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6745 return false; 6746 } 6747 Loc = getLoc(); 6748 if (!parseExpr(Op)) { 6749 return false; 6750 } 6751 if (Op < MinVal || Op > MaxVal) { 6752 Error(Loc, ErrMsg); 6753 return false; 6754 } 6755 6756 return true; 6757 } 6758 6759 bool 6760 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6761 const unsigned MinVal, 6762 const unsigned MaxVal, 6763 const StringRef ErrMsg) { 6764 SMLoc Loc; 6765 for (unsigned i = 0; i < OpNum; ++i) { 6766 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6767 return false; 6768 } 6769 6770 return true; 6771 } 6772 6773 bool 6774 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6775 using namespace llvm::AMDGPU::Swizzle; 6776 6777 int64_t Lane[LANE_NUM]; 6778 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6779 "expected a 2-bit lane id")) { 6780 Imm = QUAD_PERM_ENC; 6781 for (unsigned I = 0; I < LANE_NUM; ++I) { 6782 Imm |= Lane[I] << (LANE_SHIFT * I); 6783 } 6784 return true; 6785 } 6786 return false; 6787 } 6788 6789 bool 6790 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6791 using namespace llvm::AMDGPU::Swizzle; 6792 6793 SMLoc Loc; 6794 int64_t GroupSize; 6795 int64_t LaneIdx; 6796 6797 if (!parseSwizzleOperand(GroupSize, 6798 2, 32, 6799 "group size must be in the interval [2,32]", 6800 Loc)) { 6801 return false; 6802 } 6803 if (!isPowerOf2_64(GroupSize)) { 6804 Error(Loc, "group size must be a power of two"); 6805 return false; 6806 } 6807 if (parseSwizzleOperand(LaneIdx, 6808 0, GroupSize - 1, 6809 "lane id must be in the interval [0,group size - 1]", 6810 Loc)) { 6811 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6812 return true; 6813 } 6814 return false; 6815 } 6816 6817 bool 6818 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6819 using namespace llvm::AMDGPU::Swizzle; 6820 6821 SMLoc Loc; 6822 int64_t GroupSize; 6823 6824 if (!parseSwizzleOperand(GroupSize, 6825 2, 32, 6826 "group size must be in the interval [2,32]", 6827 Loc)) { 6828 return false; 6829 } 6830 if (!isPowerOf2_64(GroupSize)) { 6831 Error(Loc, "group size must be a power of two"); 6832 return false; 6833 } 6834 6835 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6836 return true; 6837 } 6838 6839 bool 6840 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6841 using namespace llvm::AMDGPU::Swizzle; 6842 6843 SMLoc Loc; 6844 int64_t GroupSize; 6845 6846 if (!parseSwizzleOperand(GroupSize, 6847 1, 16, 6848 "group size must be in the interval [1,16]", 6849 Loc)) { 6850 return false; 6851 } 6852 if (!isPowerOf2_64(GroupSize)) { 6853 Error(Loc, "group size must be a power of two"); 6854 return false; 6855 } 6856 6857 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6858 return true; 6859 } 6860 6861 bool 6862 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6863 using namespace llvm::AMDGPU::Swizzle; 6864 6865 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6866 return false; 6867 } 6868 6869 StringRef Ctl; 6870 SMLoc StrLoc = getLoc(); 6871 if (!parseString(Ctl)) { 6872 return false; 6873 } 6874 if (Ctl.size() != BITMASK_WIDTH) { 6875 Error(StrLoc, "expected a 5-character mask"); 6876 return false; 6877 } 6878 6879 unsigned AndMask = 0; 6880 unsigned OrMask = 0; 6881 unsigned XorMask = 0; 6882 6883 for (size_t i = 0; i < Ctl.size(); ++i) { 6884 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6885 switch(Ctl[i]) { 6886 default: 6887 Error(StrLoc, "invalid mask"); 6888 return false; 6889 case '0': 6890 break; 6891 case '1': 6892 OrMask |= Mask; 6893 break; 6894 case 'p': 6895 AndMask |= Mask; 6896 break; 6897 case 'i': 6898 AndMask |= Mask; 6899 XorMask |= Mask; 6900 break; 6901 } 6902 } 6903 6904 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6905 return true; 6906 } 6907 6908 bool 6909 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6910 6911 SMLoc OffsetLoc = getLoc(); 6912 6913 if (!parseExpr(Imm, "a swizzle macro")) { 6914 return false; 6915 } 6916 if (!isUInt<16>(Imm)) { 6917 Error(OffsetLoc, "expected a 16-bit offset"); 6918 return false; 6919 } 6920 return true; 6921 } 6922 6923 bool 6924 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6925 using namespace llvm::AMDGPU::Swizzle; 6926 6927 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6928 6929 SMLoc ModeLoc = getLoc(); 6930 bool Ok = false; 6931 6932 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6933 Ok = parseSwizzleQuadPerm(Imm); 6934 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6935 Ok = parseSwizzleBitmaskPerm(Imm); 6936 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6937 Ok = parseSwizzleBroadcast(Imm); 6938 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6939 Ok = parseSwizzleSwap(Imm); 6940 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6941 Ok = parseSwizzleReverse(Imm); 6942 } else { 6943 Error(ModeLoc, "expected a swizzle mode"); 6944 } 6945 6946 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6947 } 6948 6949 return false; 6950 } 6951 6952 OperandMatchResultTy 6953 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6954 SMLoc S = getLoc(); 6955 int64_t Imm = 0; 6956 6957 if (trySkipId("offset")) { 6958 6959 bool Ok = false; 6960 if (skipToken(AsmToken::Colon, "expected a colon")) { 6961 if (trySkipId("swizzle")) { 6962 Ok = parseSwizzleMacro(Imm); 6963 } else { 6964 Ok = parseSwizzleOffset(Imm); 6965 } 6966 } 6967 6968 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6969 6970 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6971 } else { 6972 // Swizzle "offset" operand is optional. 6973 // If it is omitted, try parsing other optional operands. 6974 return parseOptionalOpr(Operands); 6975 } 6976 } 6977 6978 bool 6979 AMDGPUOperand::isSwizzle() const { 6980 return isImmTy(ImmTySwizzle); 6981 } 6982 6983 //===----------------------------------------------------------------------===// 6984 // VGPR Index Mode 6985 //===----------------------------------------------------------------------===// 6986 6987 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6988 6989 using namespace llvm::AMDGPU::VGPRIndexMode; 6990 6991 if (trySkipToken(AsmToken::RParen)) { 6992 return OFF; 6993 } 6994 6995 int64_t Imm = 0; 6996 6997 while (true) { 6998 unsigned Mode = 0; 6999 SMLoc S = getLoc(); 7000 7001 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7002 if (trySkipId(IdSymbolic[ModeId])) { 7003 Mode = 1 << ModeId; 7004 break; 7005 } 7006 } 7007 7008 if (Mode == 0) { 7009 Error(S, (Imm == 0)? 7010 "expected a VGPR index mode or a closing parenthesis" : 7011 "expected a VGPR index mode"); 7012 return UNDEF; 7013 } 7014 7015 if (Imm & Mode) { 7016 Error(S, "duplicate VGPR index mode"); 7017 return UNDEF; 7018 } 7019 Imm |= Mode; 7020 7021 if (trySkipToken(AsmToken::RParen)) 7022 break; 7023 if (!skipToken(AsmToken::Comma, 7024 "expected a comma or a closing parenthesis")) 7025 return UNDEF; 7026 } 7027 7028 return Imm; 7029 } 7030 7031 OperandMatchResultTy 7032 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7033 7034 using namespace llvm::AMDGPU::VGPRIndexMode; 7035 7036 int64_t Imm = 0; 7037 SMLoc S = getLoc(); 7038 7039 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7040 Imm = parseGPRIdxMacro(); 7041 if (Imm == UNDEF) 7042 return MatchOperand_ParseFail; 7043 } else { 7044 if (getParser().parseAbsoluteExpression(Imm)) 7045 return MatchOperand_ParseFail; 7046 if (Imm < 0 || !isUInt<4>(Imm)) { 7047 Error(S, "invalid immediate: only 4-bit values are legal"); 7048 return MatchOperand_ParseFail; 7049 } 7050 } 7051 7052 Operands.push_back( 7053 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7054 return MatchOperand_Success; 7055 } 7056 7057 bool AMDGPUOperand::isGPRIdxMode() const { 7058 return isImmTy(ImmTyGprIdxMode); 7059 } 7060 7061 //===----------------------------------------------------------------------===// 7062 // sopp branch targets 7063 //===----------------------------------------------------------------------===// 7064 7065 OperandMatchResultTy 7066 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7067 7068 // Make sure we are not parsing something 7069 // that looks like a label or an expression but is not. 7070 // This will improve error messages. 7071 if (isRegister() || isModifier()) 7072 return MatchOperand_NoMatch; 7073 7074 if (!parseExpr(Operands)) 7075 return MatchOperand_ParseFail; 7076 7077 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7078 assert(Opr.isImm() || Opr.isExpr()); 7079 SMLoc Loc = Opr.getStartLoc(); 7080 7081 // Currently we do not support arbitrary expressions as branch targets. 7082 // Only labels and absolute expressions are accepted. 7083 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7084 Error(Loc, "expected an absolute expression or a label"); 7085 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7086 Error(Loc, "expected a 16-bit signed jump offset"); 7087 } 7088 7089 return MatchOperand_Success; 7090 } 7091 7092 //===----------------------------------------------------------------------===// 7093 // Boolean holding registers 7094 //===----------------------------------------------------------------------===// 7095 7096 OperandMatchResultTy 7097 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7098 return parseReg(Operands); 7099 } 7100 7101 //===----------------------------------------------------------------------===// 7102 // mubuf 7103 //===----------------------------------------------------------------------===// 7104 7105 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7106 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7107 } 7108 7109 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7110 const OperandVector &Operands, 7111 bool IsAtomic, 7112 bool IsLds) { 7113 bool IsLdsOpcode = IsLds; 7114 bool HasLdsModifier = false; 7115 OptionalImmIndexMap OptionalIdx; 7116 unsigned FirstOperandIdx = 1; 7117 bool IsAtomicReturn = false; 7118 7119 if (IsAtomic) { 7120 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7122 if (!Op.isCPol()) 7123 continue; 7124 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7125 break; 7126 } 7127 7128 if (!IsAtomicReturn) { 7129 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7130 if (NewOpc != -1) 7131 Inst.setOpcode(NewOpc); 7132 } 7133 7134 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7135 SIInstrFlags::IsAtomicRet; 7136 } 7137 7138 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7140 7141 // Add the register arguments 7142 if (Op.isReg()) { 7143 Op.addRegOperands(Inst, 1); 7144 // Insert a tied src for atomic return dst. 7145 // This cannot be postponed as subsequent calls to 7146 // addImmOperands rely on correct number of MC operands. 7147 if (IsAtomicReturn && i == FirstOperandIdx) 7148 Op.addRegOperands(Inst, 1); 7149 continue; 7150 } 7151 7152 // Handle the case where soffset is an immediate 7153 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7154 Op.addImmOperands(Inst, 1); 7155 continue; 7156 } 7157 7158 HasLdsModifier |= Op.isLDS(); 7159 7160 // Handle tokens like 'offen' which are sometimes hard-coded into the 7161 // asm string. There are no MCInst operands for these. 7162 if (Op.isToken()) { 7163 continue; 7164 } 7165 assert(Op.isImm()); 7166 7167 // Handle optional arguments 7168 OptionalIdx[Op.getImmTy()] = i; 7169 } 7170 7171 // This is a workaround for an llvm quirk which may result in an 7172 // incorrect instruction selection. Lds and non-lds versions of 7173 // MUBUF instructions are identical except that lds versions 7174 // have mandatory 'lds' modifier. However this modifier follows 7175 // optional modifiers and llvm asm matcher regards this 'lds' 7176 // modifier as an optional one. As a result, an lds version 7177 // of opcode may be selected even if it has no 'lds' modifier. 7178 if (IsLdsOpcode && !HasLdsModifier) { 7179 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7180 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7181 Inst.setOpcode(NoLdsOpcode); 7182 IsLdsOpcode = false; 7183 } 7184 } 7185 7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7188 7189 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7191 } 7192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7193 } 7194 7195 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7196 OptionalImmIndexMap OptionalIdx; 7197 7198 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7199 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7200 7201 // Add the register arguments 7202 if (Op.isReg()) { 7203 Op.addRegOperands(Inst, 1); 7204 continue; 7205 } 7206 7207 // Handle the case where soffset is an immediate 7208 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7209 Op.addImmOperands(Inst, 1); 7210 continue; 7211 } 7212 7213 // Handle tokens like 'offen' which are sometimes hard-coded into the 7214 // asm string. There are no MCInst operands for these. 7215 if (Op.isToken()) { 7216 continue; 7217 } 7218 assert(Op.isImm()); 7219 7220 // Handle optional arguments 7221 OptionalIdx[Op.getImmTy()] = i; 7222 } 7223 7224 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7225 AMDGPUOperand::ImmTyOffset); 7226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7230 } 7231 7232 //===----------------------------------------------------------------------===// 7233 // mimg 7234 //===----------------------------------------------------------------------===// 7235 7236 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7237 bool IsAtomic) { 7238 unsigned I = 1; 7239 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7240 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7241 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7242 } 7243 7244 if (IsAtomic) { 7245 // Add src, same as dst 7246 assert(Desc.getNumDefs() == 1); 7247 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7248 } 7249 7250 OptionalImmIndexMap OptionalIdx; 7251 7252 for (unsigned E = Operands.size(); I != E; ++I) { 7253 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7254 7255 // Add the register arguments 7256 if (Op.isReg()) { 7257 Op.addRegOperands(Inst, 1); 7258 } else if (Op.isImmModifier()) { 7259 OptionalIdx[Op.getImmTy()] = I; 7260 } else if (!Op.isToken()) { 7261 llvm_unreachable("unexpected operand type"); 7262 } 7263 } 7264 7265 bool IsGFX10Plus = isGFX10Plus(); 7266 7267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7268 if (IsGFX10Plus) 7269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7270 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7271 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7272 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7273 if (IsGFX10Plus) 7274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7275 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7276 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7277 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7278 if (!IsGFX10Plus) 7279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7281 } 7282 7283 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7284 cvtMIMG(Inst, Operands, true); 7285 } 7286 7287 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7288 OptionalImmIndexMap OptionalIdx; 7289 bool IsAtomicReturn = false; 7290 7291 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7293 if (!Op.isCPol()) 7294 continue; 7295 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7296 break; 7297 } 7298 7299 if (!IsAtomicReturn) { 7300 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7301 if (NewOpc != -1) 7302 Inst.setOpcode(NewOpc); 7303 } 7304 7305 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7306 SIInstrFlags::IsAtomicRet; 7307 7308 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7309 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7310 7311 // Add the register arguments 7312 if (Op.isReg()) { 7313 Op.addRegOperands(Inst, 1); 7314 if (IsAtomicReturn && i == 1) 7315 Op.addRegOperands(Inst, 1); 7316 continue; 7317 } 7318 7319 // Handle the case where soffset is an immediate 7320 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7321 Op.addImmOperands(Inst, 1); 7322 continue; 7323 } 7324 7325 // Handle tokens like 'offen' which are sometimes hard-coded into the 7326 // asm string. There are no MCInst operands for these. 7327 if (Op.isToken()) { 7328 continue; 7329 } 7330 assert(Op.isImm()); 7331 7332 // Handle optional arguments 7333 OptionalIdx[Op.getImmTy()] = i; 7334 } 7335 7336 if ((int)Inst.getNumOperands() <= 7337 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7338 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7339 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7340 } 7341 7342 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7343 const OperandVector &Operands) { 7344 for (unsigned I = 1; I < Operands.size(); ++I) { 7345 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7346 if (Operand.isReg()) 7347 Operand.addRegOperands(Inst, 1); 7348 } 7349 7350 Inst.addOperand(MCOperand::createImm(1)); // a16 7351 } 7352 7353 //===----------------------------------------------------------------------===// 7354 // smrd 7355 //===----------------------------------------------------------------------===// 7356 7357 bool AMDGPUOperand::isSMRDOffset8() const { 7358 return isImm() && isUInt<8>(getImm()); 7359 } 7360 7361 bool AMDGPUOperand::isSMEMOffset() const { 7362 return isImm(); // Offset range is checked later by validator. 7363 } 7364 7365 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7366 // 32-bit literals are only supported on CI and we only want to use them 7367 // when the offset is > 8-bits. 7368 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7369 } 7370 7371 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7372 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7373 } 7374 7375 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7376 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7377 } 7378 7379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7380 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7381 } 7382 7383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7384 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7385 } 7386 7387 //===----------------------------------------------------------------------===// 7388 // vop3 7389 //===----------------------------------------------------------------------===// 7390 7391 static bool ConvertOmodMul(int64_t &Mul) { 7392 if (Mul != 1 && Mul != 2 && Mul != 4) 7393 return false; 7394 7395 Mul >>= 1; 7396 return true; 7397 } 7398 7399 static bool ConvertOmodDiv(int64_t &Div) { 7400 if (Div == 1) { 7401 Div = 0; 7402 return true; 7403 } 7404 7405 if (Div == 2) { 7406 Div = 3; 7407 return true; 7408 } 7409 7410 return false; 7411 } 7412 7413 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7414 // This is intentional and ensures compatibility with sp3. 7415 // See bug 35397 for details. 7416 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7417 if (BoundCtrl == 0 || BoundCtrl == 1) { 7418 BoundCtrl = 1; 7419 return true; 7420 } 7421 return false; 7422 } 7423 7424 // Note: the order in this table matches the order of operands in AsmString. 7425 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7426 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7427 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7428 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7429 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7430 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7431 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7432 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7433 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7434 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7435 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7436 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7437 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7438 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7439 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7440 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7441 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7442 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7443 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7444 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7445 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7446 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7447 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7448 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7449 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7450 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7451 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7452 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7453 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7454 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7455 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7456 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7457 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7458 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7459 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7460 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7461 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7462 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7463 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7464 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7465 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7466 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7467 }; 7468 7469 void AMDGPUAsmParser::onBeginOfFile() { 7470 if (!getParser().getStreamer().getTargetStreamer() || 7471 getSTI().getTargetTriple().getArch() == Triple::r600) 7472 return; 7473 7474 if (!getTargetStreamer().getTargetID()) 7475 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7476 7477 if (isHsaAbiVersion3AndAbove(&getSTI())) 7478 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7479 } 7480 7481 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7482 7483 OperandMatchResultTy res = parseOptionalOpr(Operands); 7484 7485 // This is a hack to enable hardcoded mandatory operands which follow 7486 // optional operands. 7487 // 7488 // Current design assumes that all operands after the first optional operand 7489 // are also optional. However implementation of some instructions violates 7490 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7491 // 7492 // To alleviate this problem, we have to (implicitly) parse extra operands 7493 // to make sure autogenerated parser of custom operands never hit hardcoded 7494 // mandatory operands. 7495 7496 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7497 if (res != MatchOperand_Success || 7498 isToken(AsmToken::EndOfStatement)) 7499 break; 7500 7501 trySkipToken(AsmToken::Comma); 7502 res = parseOptionalOpr(Operands); 7503 } 7504 7505 return res; 7506 } 7507 7508 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7509 OperandMatchResultTy res; 7510 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7511 // try to parse any optional operand here 7512 if (Op.IsBit) { 7513 res = parseNamedBit(Op.Name, Operands, Op.Type); 7514 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7515 res = parseOModOperand(Operands); 7516 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7517 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7518 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7519 res = parseSDWASel(Operands, Op.Name, Op.Type); 7520 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7521 res = parseSDWADstUnused(Operands); 7522 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7523 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7524 Op.Type == AMDGPUOperand::ImmTyNegLo || 7525 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7526 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7527 Op.ConvertResult); 7528 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7529 res = parseDim(Operands); 7530 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7531 res = parseCPol(Operands); 7532 } else { 7533 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7534 } 7535 if (res != MatchOperand_NoMatch) { 7536 return res; 7537 } 7538 } 7539 return MatchOperand_NoMatch; 7540 } 7541 7542 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7543 StringRef Name = getTokenStr(); 7544 if (Name == "mul") { 7545 return parseIntWithPrefix("mul", Operands, 7546 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7547 } 7548 7549 if (Name == "div") { 7550 return parseIntWithPrefix("div", Operands, 7551 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7552 } 7553 7554 return MatchOperand_NoMatch; 7555 } 7556 7557 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7558 cvtVOP3P(Inst, Operands); 7559 7560 int Opc = Inst.getOpcode(); 7561 7562 int SrcNum; 7563 const int Ops[] = { AMDGPU::OpName::src0, 7564 AMDGPU::OpName::src1, 7565 AMDGPU::OpName::src2 }; 7566 for (SrcNum = 0; 7567 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7568 ++SrcNum); 7569 assert(SrcNum > 0); 7570 7571 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7572 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7573 7574 if ((OpSel & (1 << SrcNum)) != 0) { 7575 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7576 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7577 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7578 } 7579 } 7580 7581 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7582 // 1. This operand is input modifiers 7583 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7584 // 2. This is not last operand 7585 && Desc.NumOperands > (OpNum + 1) 7586 // 3. Next operand is register class 7587 && Desc.OpInfo[OpNum + 1].RegClass != -1 7588 // 4. Next register is not tied to any other operand 7589 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7590 } 7591 7592 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7593 { 7594 OptionalImmIndexMap OptionalIdx; 7595 unsigned Opc = Inst.getOpcode(); 7596 7597 unsigned I = 1; 7598 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7599 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7600 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7601 } 7602 7603 for (unsigned E = Operands.size(); I != E; ++I) { 7604 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7605 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7606 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7607 } else if (Op.isInterpSlot() || 7608 Op.isInterpAttr() || 7609 Op.isAttrChan()) { 7610 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7611 } else if (Op.isImmModifier()) { 7612 OptionalIdx[Op.getImmTy()] = I; 7613 } else { 7614 llvm_unreachable("unhandled operand type"); 7615 } 7616 } 7617 7618 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7620 } 7621 7622 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7624 } 7625 7626 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7628 } 7629 } 7630 7631 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7632 OptionalImmIndexMap &OptionalIdx) { 7633 unsigned Opc = Inst.getOpcode(); 7634 7635 unsigned I = 1; 7636 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7637 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7638 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7639 } 7640 7641 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7642 // This instruction has src modifiers 7643 for (unsigned E = Operands.size(); I != E; ++I) { 7644 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7645 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7646 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7647 } else if (Op.isImmModifier()) { 7648 OptionalIdx[Op.getImmTy()] = I; 7649 } else if (Op.isRegOrImm()) { 7650 Op.addRegOrImmOperands(Inst, 1); 7651 } else { 7652 llvm_unreachable("unhandled operand type"); 7653 } 7654 } 7655 } else { 7656 // No src modifiers 7657 for (unsigned E = Operands.size(); I != E; ++I) { 7658 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7659 if (Op.isMod()) { 7660 OptionalIdx[Op.getImmTy()] = I; 7661 } else { 7662 Op.addRegOrImmOperands(Inst, 1); 7663 } 7664 } 7665 } 7666 7667 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7669 } 7670 7671 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7673 } 7674 7675 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7676 // it has src2 register operand that is tied to dst operand 7677 // we don't allow modifiers for this operand in assembler so src2_modifiers 7678 // should be 0. 7679 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7680 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7681 Opc == AMDGPU::V_MAC_F32_e64_vi || 7682 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7683 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7684 Opc == AMDGPU::V_MAC_F16_e64_vi || 7685 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7686 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7687 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7688 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7689 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7690 auto it = Inst.begin(); 7691 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7692 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7693 ++it; 7694 // Copy the operand to ensure it's not invalidated when Inst grows. 7695 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7696 } 7697 } 7698 7699 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7700 OptionalImmIndexMap OptionalIdx; 7701 cvtVOP3(Inst, Operands, OptionalIdx); 7702 } 7703 7704 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7705 OptionalImmIndexMap &OptIdx) { 7706 const int Opc = Inst.getOpcode(); 7707 const MCInstrDesc &Desc = MII.get(Opc); 7708 7709 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7710 7711 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7712 assert(!IsPacked); 7713 Inst.addOperand(Inst.getOperand(0)); 7714 } 7715 7716 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7717 // instruction, and then figure out where to actually put the modifiers 7718 7719 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7720 if (OpSelIdx != -1) { 7721 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7722 } 7723 7724 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7725 if (OpSelHiIdx != -1) { 7726 int DefaultVal = IsPacked ? -1 : 0; 7727 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7728 DefaultVal); 7729 } 7730 7731 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7732 if (NegLoIdx != -1) { 7733 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7734 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7735 } 7736 7737 const int Ops[] = { AMDGPU::OpName::src0, 7738 AMDGPU::OpName::src1, 7739 AMDGPU::OpName::src2 }; 7740 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7741 AMDGPU::OpName::src1_modifiers, 7742 AMDGPU::OpName::src2_modifiers }; 7743 7744 unsigned OpSel = 0; 7745 unsigned OpSelHi = 0; 7746 unsigned NegLo = 0; 7747 unsigned NegHi = 0; 7748 7749 if (OpSelIdx != -1) 7750 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7751 7752 if (OpSelHiIdx != -1) 7753 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7754 7755 if (NegLoIdx != -1) { 7756 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7757 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7758 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7759 } 7760 7761 for (int J = 0; J < 3; ++J) { 7762 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7763 if (OpIdx == -1) 7764 break; 7765 7766 uint32_t ModVal = 0; 7767 7768 if ((OpSel & (1 << J)) != 0) 7769 ModVal |= SISrcMods::OP_SEL_0; 7770 7771 if ((OpSelHi & (1 << J)) != 0) 7772 ModVal |= SISrcMods::OP_SEL_1; 7773 7774 if ((NegLo & (1 << J)) != 0) 7775 ModVal |= SISrcMods::NEG; 7776 7777 if ((NegHi & (1 << J)) != 0) 7778 ModVal |= SISrcMods::NEG_HI; 7779 7780 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7781 7782 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7783 } 7784 } 7785 7786 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7787 OptionalImmIndexMap OptIdx; 7788 cvtVOP3(Inst, Operands, OptIdx); 7789 cvtVOP3P(Inst, Operands, OptIdx); 7790 } 7791 7792 //===----------------------------------------------------------------------===// 7793 // dpp 7794 //===----------------------------------------------------------------------===// 7795 7796 bool AMDGPUOperand::isDPP8() const { 7797 return isImmTy(ImmTyDPP8); 7798 } 7799 7800 bool AMDGPUOperand::isDPPCtrl() const { 7801 using namespace AMDGPU::DPP; 7802 7803 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7804 if (result) { 7805 int64_t Imm = getImm(); 7806 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7807 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7808 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7809 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7810 (Imm == DppCtrl::WAVE_SHL1) || 7811 (Imm == DppCtrl::WAVE_ROL1) || 7812 (Imm == DppCtrl::WAVE_SHR1) || 7813 (Imm == DppCtrl::WAVE_ROR1) || 7814 (Imm == DppCtrl::ROW_MIRROR) || 7815 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7816 (Imm == DppCtrl::BCAST15) || 7817 (Imm == DppCtrl::BCAST31) || 7818 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7819 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7820 } 7821 return false; 7822 } 7823 7824 //===----------------------------------------------------------------------===// 7825 // mAI 7826 //===----------------------------------------------------------------------===// 7827 7828 bool AMDGPUOperand::isBLGP() const { 7829 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7830 } 7831 7832 bool AMDGPUOperand::isCBSZ() const { 7833 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7834 } 7835 7836 bool AMDGPUOperand::isABID() const { 7837 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7838 } 7839 7840 bool AMDGPUOperand::isS16Imm() const { 7841 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7842 } 7843 7844 bool AMDGPUOperand::isU16Imm() const { 7845 return isImm() && isUInt<16>(getImm()); 7846 } 7847 7848 //===----------------------------------------------------------------------===// 7849 // dim 7850 //===----------------------------------------------------------------------===// 7851 7852 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7853 // We want to allow "dim:1D" etc., 7854 // but the initial 1 is tokenized as an integer. 7855 std::string Token; 7856 if (isToken(AsmToken::Integer)) { 7857 SMLoc Loc = getToken().getEndLoc(); 7858 Token = std::string(getTokenStr()); 7859 lex(); 7860 if (getLoc() != Loc) 7861 return false; 7862 } 7863 7864 StringRef Suffix; 7865 if (!parseId(Suffix)) 7866 return false; 7867 Token += Suffix; 7868 7869 StringRef DimId = Token; 7870 if (DimId.startswith("SQ_RSRC_IMG_")) 7871 DimId = DimId.drop_front(12); 7872 7873 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7874 if (!DimInfo) 7875 return false; 7876 7877 Encoding = DimInfo->Encoding; 7878 return true; 7879 } 7880 7881 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7882 if (!isGFX10Plus()) 7883 return MatchOperand_NoMatch; 7884 7885 SMLoc S = getLoc(); 7886 7887 if (!trySkipId("dim", AsmToken::Colon)) 7888 return MatchOperand_NoMatch; 7889 7890 unsigned Encoding; 7891 SMLoc Loc = getLoc(); 7892 if (!parseDimId(Encoding)) { 7893 Error(Loc, "invalid dim value"); 7894 return MatchOperand_ParseFail; 7895 } 7896 7897 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7898 AMDGPUOperand::ImmTyDim)); 7899 return MatchOperand_Success; 7900 } 7901 7902 //===----------------------------------------------------------------------===// 7903 // dpp 7904 //===----------------------------------------------------------------------===// 7905 7906 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7907 SMLoc S = getLoc(); 7908 7909 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7910 return MatchOperand_NoMatch; 7911 7912 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7913 7914 int64_t Sels[8]; 7915 7916 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7917 return MatchOperand_ParseFail; 7918 7919 for (size_t i = 0; i < 8; ++i) { 7920 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7921 return MatchOperand_ParseFail; 7922 7923 SMLoc Loc = getLoc(); 7924 if (getParser().parseAbsoluteExpression(Sels[i])) 7925 return MatchOperand_ParseFail; 7926 if (0 > Sels[i] || 7 < Sels[i]) { 7927 Error(Loc, "expected a 3-bit value"); 7928 return MatchOperand_ParseFail; 7929 } 7930 } 7931 7932 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7933 return MatchOperand_ParseFail; 7934 7935 unsigned DPP8 = 0; 7936 for (size_t i = 0; i < 8; ++i) 7937 DPP8 |= (Sels[i] << (i * 3)); 7938 7939 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7940 return MatchOperand_Success; 7941 } 7942 7943 bool 7944 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7945 const OperandVector &Operands) { 7946 if (Ctrl == "row_newbcast") 7947 return isGFX90A(); 7948 7949 if (Ctrl == "row_share" || 7950 Ctrl == "row_xmask") 7951 return isGFX10Plus(); 7952 7953 if (Ctrl == "wave_shl" || 7954 Ctrl == "wave_shr" || 7955 Ctrl == "wave_rol" || 7956 Ctrl == "wave_ror" || 7957 Ctrl == "row_bcast") 7958 return isVI() || isGFX9(); 7959 7960 return Ctrl == "row_mirror" || 7961 Ctrl == "row_half_mirror" || 7962 Ctrl == "quad_perm" || 7963 Ctrl == "row_shl" || 7964 Ctrl == "row_shr" || 7965 Ctrl == "row_ror"; 7966 } 7967 7968 int64_t 7969 AMDGPUAsmParser::parseDPPCtrlPerm() { 7970 // quad_perm:[%d,%d,%d,%d] 7971 7972 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7973 return -1; 7974 7975 int64_t Val = 0; 7976 for (int i = 0; i < 4; ++i) { 7977 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7978 return -1; 7979 7980 int64_t Temp; 7981 SMLoc Loc = getLoc(); 7982 if (getParser().parseAbsoluteExpression(Temp)) 7983 return -1; 7984 if (Temp < 0 || Temp > 3) { 7985 Error(Loc, "expected a 2-bit value"); 7986 return -1; 7987 } 7988 7989 Val += (Temp << i * 2); 7990 } 7991 7992 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7993 return -1; 7994 7995 return Val; 7996 } 7997 7998 int64_t 7999 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8000 using namespace AMDGPU::DPP; 8001 8002 // sel:%d 8003 8004 int64_t Val; 8005 SMLoc Loc = getLoc(); 8006 8007 if (getParser().parseAbsoluteExpression(Val)) 8008 return -1; 8009 8010 struct DppCtrlCheck { 8011 int64_t Ctrl; 8012 int Lo; 8013 int Hi; 8014 }; 8015 8016 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8017 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8018 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8019 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8020 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8021 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8022 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8023 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8024 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8025 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8026 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8027 .Default({-1, 0, 0}); 8028 8029 bool Valid; 8030 if (Check.Ctrl == -1) { 8031 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8032 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8033 } else { 8034 Valid = Check.Lo <= Val && Val <= Check.Hi; 8035 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8036 } 8037 8038 if (!Valid) { 8039 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8040 return -1; 8041 } 8042 8043 return Val; 8044 } 8045 8046 OperandMatchResultTy 8047 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8048 using namespace AMDGPU::DPP; 8049 8050 if (!isToken(AsmToken::Identifier) || 8051 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8052 return MatchOperand_NoMatch; 8053 8054 SMLoc S = getLoc(); 8055 int64_t Val = -1; 8056 StringRef Ctrl; 8057 8058 parseId(Ctrl); 8059 8060 if (Ctrl == "row_mirror") { 8061 Val = DppCtrl::ROW_MIRROR; 8062 } else if (Ctrl == "row_half_mirror") { 8063 Val = DppCtrl::ROW_HALF_MIRROR; 8064 } else { 8065 if (skipToken(AsmToken::Colon, "expected a colon")) { 8066 if (Ctrl == "quad_perm") { 8067 Val = parseDPPCtrlPerm(); 8068 } else { 8069 Val = parseDPPCtrlSel(Ctrl); 8070 } 8071 } 8072 } 8073 8074 if (Val == -1) 8075 return MatchOperand_ParseFail; 8076 8077 Operands.push_back( 8078 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8079 return MatchOperand_Success; 8080 } 8081 8082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8083 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8084 } 8085 8086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8087 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8088 } 8089 8090 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8091 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8092 } 8093 8094 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8095 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8096 } 8097 8098 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8099 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8100 } 8101 8102 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8103 OptionalImmIndexMap OptionalIdx; 8104 8105 unsigned Opc = Inst.getOpcode(); 8106 bool HasModifiers = 8107 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8108 unsigned I = 1; 8109 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8110 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8111 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8112 } 8113 8114 int Fi = 0; 8115 for (unsigned E = Operands.size(); I != E; ++I) { 8116 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8117 MCOI::TIED_TO); 8118 if (TiedTo != -1) { 8119 assert((unsigned)TiedTo < Inst.getNumOperands()); 8120 // handle tied old or src2 for MAC instructions 8121 Inst.addOperand(Inst.getOperand(TiedTo)); 8122 } 8123 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8124 // Add the register arguments 8125 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8126 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8127 // Skip it. 8128 continue; 8129 } 8130 8131 if (IsDPP8) { 8132 if (Op.isDPP8()) { 8133 Op.addImmOperands(Inst, 1); 8134 } else if (HasModifiers && 8135 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8136 Op.addRegWithFPInputModsOperands(Inst, 2); 8137 } else if (Op.isFI()) { 8138 Fi = Op.getImm(); 8139 } else if (Op.isReg()) { 8140 Op.addRegOperands(Inst, 1); 8141 } else { 8142 llvm_unreachable("Invalid operand type"); 8143 } 8144 } else { 8145 if (HasModifiers && 8146 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8147 Op.addRegWithFPInputModsOperands(Inst, 2); 8148 } else if (Op.isReg()) { 8149 Op.addRegOperands(Inst, 1); 8150 } else if (Op.isDPPCtrl()) { 8151 Op.addImmOperands(Inst, 1); 8152 } else if (Op.isImm()) { 8153 // Handle optional arguments 8154 OptionalIdx[Op.getImmTy()] = I; 8155 } else { 8156 llvm_unreachable("Invalid operand type"); 8157 } 8158 } 8159 } 8160 8161 if (IsDPP8) { 8162 using namespace llvm::AMDGPU::DPP; 8163 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8164 } else { 8165 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8168 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8170 } 8171 } 8172 } 8173 8174 //===----------------------------------------------------------------------===// 8175 // sdwa 8176 //===----------------------------------------------------------------------===// 8177 8178 OperandMatchResultTy 8179 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8180 AMDGPUOperand::ImmTy Type) { 8181 using namespace llvm::AMDGPU::SDWA; 8182 8183 SMLoc S = getLoc(); 8184 StringRef Value; 8185 OperandMatchResultTy res; 8186 8187 SMLoc StringLoc; 8188 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8189 if (res != MatchOperand_Success) { 8190 return res; 8191 } 8192 8193 int64_t Int; 8194 Int = StringSwitch<int64_t>(Value) 8195 .Case("BYTE_0", SdwaSel::BYTE_0) 8196 .Case("BYTE_1", SdwaSel::BYTE_1) 8197 .Case("BYTE_2", SdwaSel::BYTE_2) 8198 .Case("BYTE_3", SdwaSel::BYTE_3) 8199 .Case("WORD_0", SdwaSel::WORD_0) 8200 .Case("WORD_1", SdwaSel::WORD_1) 8201 .Case("DWORD", SdwaSel::DWORD) 8202 .Default(0xffffffff); 8203 8204 if (Int == 0xffffffff) { 8205 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8206 return MatchOperand_ParseFail; 8207 } 8208 8209 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8210 return MatchOperand_Success; 8211 } 8212 8213 OperandMatchResultTy 8214 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8215 using namespace llvm::AMDGPU::SDWA; 8216 8217 SMLoc S = getLoc(); 8218 StringRef Value; 8219 OperandMatchResultTy res; 8220 8221 SMLoc StringLoc; 8222 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8223 if (res != MatchOperand_Success) { 8224 return res; 8225 } 8226 8227 int64_t Int; 8228 Int = StringSwitch<int64_t>(Value) 8229 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8230 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8231 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8232 .Default(0xffffffff); 8233 8234 if (Int == 0xffffffff) { 8235 Error(StringLoc, "invalid dst_unused value"); 8236 return MatchOperand_ParseFail; 8237 } 8238 8239 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8240 return MatchOperand_Success; 8241 } 8242 8243 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8244 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8245 } 8246 8247 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8248 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8249 } 8250 8251 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8252 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8253 } 8254 8255 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8256 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8257 } 8258 8259 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8260 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8261 } 8262 8263 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8264 uint64_t BasicInstType, 8265 bool SkipDstVcc, 8266 bool SkipSrcVcc) { 8267 using namespace llvm::AMDGPU::SDWA; 8268 8269 OptionalImmIndexMap OptionalIdx; 8270 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8271 bool SkippedVcc = false; 8272 8273 unsigned I = 1; 8274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8275 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8276 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8277 } 8278 8279 for (unsigned E = Operands.size(); I != E; ++I) { 8280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8281 if (SkipVcc && !SkippedVcc && Op.isReg() && 8282 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8283 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8284 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8285 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8286 // Skip VCC only if we didn't skip it on previous iteration. 8287 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8288 if (BasicInstType == SIInstrFlags::VOP2 && 8289 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8290 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8291 SkippedVcc = true; 8292 continue; 8293 } else if (BasicInstType == SIInstrFlags::VOPC && 8294 Inst.getNumOperands() == 0) { 8295 SkippedVcc = true; 8296 continue; 8297 } 8298 } 8299 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8300 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8301 } else if (Op.isImm()) { 8302 // Handle optional arguments 8303 OptionalIdx[Op.getImmTy()] = I; 8304 } else { 8305 llvm_unreachable("Invalid operand type"); 8306 } 8307 SkippedVcc = false; 8308 } 8309 8310 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8311 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8312 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8313 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8314 switch (BasicInstType) { 8315 case SIInstrFlags::VOP1: 8316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8317 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8319 } 8320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8323 break; 8324 8325 case SIInstrFlags::VOP2: 8326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8327 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8328 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8329 } 8330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8334 break; 8335 8336 case SIInstrFlags::VOPC: 8337 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8338 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8339 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8341 break; 8342 8343 default: 8344 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8345 } 8346 } 8347 8348 // special case v_mac_{f16, f32}: 8349 // it has src2 register operand that is tied to dst operand 8350 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8351 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8352 auto it = Inst.begin(); 8353 std::advance( 8354 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8355 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8356 } 8357 } 8358 8359 //===----------------------------------------------------------------------===// 8360 // mAI 8361 //===----------------------------------------------------------------------===// 8362 8363 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8364 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8365 } 8366 8367 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8368 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8369 } 8370 8371 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8372 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8373 } 8374 8375 /// Force static initialization. 8376 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8377 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8378 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8379 } 8380 8381 #define GET_REGISTER_MATCHER 8382 #define GET_MATCHER_IMPLEMENTATION 8383 #define GET_MNEMONIC_SPELL_CHECKER 8384 #define GET_MNEMONIC_CHECKER 8385 #include "AMDGPUGenAsmMatcher.inc" 8386 8387 // This function should be defined after auto-generated include so that we have 8388 // MatchClassKind enum defined 8389 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8390 unsigned Kind) { 8391 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8392 // But MatchInstructionImpl() expects to meet token and fails to validate 8393 // operand. This method checks if we are given immediate operand but expect to 8394 // get corresponding token. 8395 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8396 switch (Kind) { 8397 case MCK_addr64: 8398 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8399 case MCK_gds: 8400 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8401 case MCK_lds: 8402 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8403 case MCK_idxen: 8404 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8405 case MCK_offen: 8406 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8407 case MCK_SSrcB32: 8408 // When operands have expression values, they will return true for isToken, 8409 // because it is not possible to distinguish between a token and an 8410 // expression at parse time. MatchInstructionImpl() will always try to 8411 // match an operand as a token, when isToken returns true, and when the 8412 // name of the expression is not a valid token, the match will fail, 8413 // so we need to handle it here. 8414 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8415 case MCK_SSrcF32: 8416 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8417 case MCK_SoppBrTarget: 8418 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8419 case MCK_VReg32OrOff: 8420 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8421 case MCK_InterpSlot: 8422 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8423 case MCK_Attr: 8424 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8425 case MCK_AttrChan: 8426 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8427 case MCK_ImmSMEMOffset: 8428 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8429 case MCK_SReg_64: 8430 case MCK_SReg_64_XEXEC: 8431 // Null is defined as a 32-bit register but 8432 // it should also be enabled with 64-bit operands. 8433 // The following code enables it for SReg_64 operands 8434 // used as source and destination. Remaining source 8435 // operands are handled in isInlinableImm. 8436 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8437 default: 8438 return Match_InvalidOperand; 8439 } 8440 } 8441 8442 //===----------------------------------------------------------------------===// 8443 // endpgm 8444 //===----------------------------------------------------------------------===// 8445 8446 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8447 SMLoc S = getLoc(); 8448 int64_t Imm = 0; 8449 8450 if (!parseExpr(Imm)) { 8451 // The operand is optional, if not present default to 0 8452 Imm = 0; 8453 } 8454 8455 if (!isUInt<16>(Imm)) { 8456 Error(S, "expected a 16-bit value"); 8457 return MatchOperand_ParseFail; 8458 } 8459 8460 Operands.push_back( 8461 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8462 return MatchOperand_Success; 8463 } 8464 8465 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8466