1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1390 bool isGFX90A() const { 1391 return AMDGPU::isGFX90A(getSTI()); 1392 } 1393 1394 bool isGFX940() const { 1395 return AMDGPU::isGFX940(getSTI()); 1396 } 1397 1398 bool isGFX9Plus() const { 1399 return AMDGPU::isGFX9Plus(getSTI()); 1400 } 1401 1402 bool isGFX10() const { 1403 return AMDGPU::isGFX10(getSTI()); 1404 } 1405 1406 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1407 1408 bool isGFX10_BEncoding() const { 1409 return AMDGPU::isGFX10_BEncoding(getSTI()); 1410 } 1411 1412 bool hasInv2PiInlineImm() const { 1413 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1414 } 1415 1416 bool hasFlatOffsets() const { 1417 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1418 } 1419 1420 bool hasArchitectedFlatScratch() const { 1421 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1422 } 1423 1424 bool hasSGPR102_SGPR103() const { 1425 return !isVI() && !isGFX9(); 1426 } 1427 1428 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1429 1430 bool hasIntClamp() const { 1431 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1432 } 1433 1434 AMDGPUTargetStreamer &getTargetStreamer() { 1435 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1436 return static_cast<AMDGPUTargetStreamer &>(TS); 1437 } 1438 1439 const MCRegisterInfo *getMRI() const { 1440 // We need this const_cast because for some reason getContext() is not const 1441 // in MCAsmParser. 1442 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1443 } 1444 1445 const MCInstrInfo *getMII() const { 1446 return &MII; 1447 } 1448 1449 const FeatureBitset &getFeatureBits() const { 1450 return getSTI().getFeatureBits(); 1451 } 1452 1453 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1454 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1455 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1456 1457 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1458 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1459 bool isForcedDPP() const { return ForcedDPP; } 1460 bool isForcedSDWA() const { return ForcedSDWA; } 1461 ArrayRef<unsigned> getMatchedVariants() const; 1462 StringRef getMatchedVariantName() const; 1463 1464 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1465 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1466 bool RestoreOnFailure); 1467 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1468 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1469 SMLoc &EndLoc) override; 1470 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1471 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1472 unsigned Kind) override; 1473 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1474 OperandVector &Operands, MCStreamer &Out, 1475 uint64_t &ErrorInfo, 1476 bool MatchingInlineAsm) override; 1477 bool ParseDirective(AsmToken DirectiveID) override; 1478 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1479 OperandMode Mode = OperandMode_Default); 1480 StringRef parseMnemonicSuffix(StringRef Name); 1481 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1482 SMLoc NameLoc, OperandVector &Operands) override; 1483 //bool ProcessInstruction(MCInst &Inst); 1484 1485 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1486 1487 OperandMatchResultTy 1488 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1489 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1490 bool (*ConvertResult)(int64_t &) = nullptr); 1491 1492 OperandMatchResultTy 1493 parseOperandArrayWithPrefix(const char *Prefix, 1494 OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1496 bool (*ConvertResult)(int64_t&) = nullptr); 1497 1498 OperandMatchResultTy 1499 parseNamedBit(StringRef Name, OperandVector &Operands, 1500 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1501 OperandMatchResultTy parseCPol(OperandVector &Operands); 1502 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1503 StringRef &Value, 1504 SMLoc &StringLoc); 1505 1506 bool isModifier(); 1507 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1508 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1509 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1510 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1511 bool parseSP3NegModifier(); 1512 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1513 OperandMatchResultTy parseReg(OperandVector &Operands); 1514 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1515 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1516 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1517 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1518 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1519 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1520 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1521 OperandMatchResultTy parseUfmt(int64_t &Format); 1522 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1523 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1524 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1525 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1526 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1527 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1528 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1529 1530 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1531 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1532 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1533 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1534 1535 bool parseCnt(int64_t &IntVal); 1536 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1537 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1538 1539 private: 1540 struct OperandInfoTy { 1541 SMLoc Loc; 1542 int64_t Id; 1543 bool IsSymbolic = false; 1544 bool IsDefined = false; 1545 1546 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1547 }; 1548 1549 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1550 bool validateSendMsg(const OperandInfoTy &Msg, 1551 const OperandInfoTy &Op, 1552 const OperandInfoTy &Stream); 1553 1554 bool parseHwregBody(OperandInfoTy &HwReg, 1555 OperandInfoTy &Offset, 1556 OperandInfoTy &Width); 1557 bool validateHwreg(const OperandInfoTy &HwReg, 1558 const OperandInfoTy &Offset, 1559 const OperandInfoTy &Width); 1560 1561 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1562 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1563 1564 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1565 const OperandVector &Operands) const; 1566 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1567 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1568 SMLoc getLitLoc(const OperandVector &Operands) const; 1569 SMLoc getConstLoc(const OperandVector &Operands) const; 1570 1571 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1572 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1573 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1574 bool validateSOPLiteral(const MCInst &Inst) const; 1575 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1576 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1577 bool validateIntClampSupported(const MCInst &Inst); 1578 bool validateMIMGAtomicDMask(const MCInst &Inst); 1579 bool validateMIMGGatherDMask(const MCInst &Inst); 1580 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1581 bool validateMIMGDataSize(const MCInst &Inst); 1582 bool validateMIMGAddrSize(const MCInst &Inst); 1583 bool validateMIMGD16(const MCInst &Inst); 1584 bool validateMIMGDim(const MCInst &Inst); 1585 bool validateMIMGMSAA(const MCInst &Inst); 1586 bool validateOpSel(const MCInst &Inst); 1587 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1588 bool validateVccOperand(unsigned Reg) const; 1589 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1591 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateAGPRLdSt(const MCInst &Inst) const; 1593 bool validateVGPRAlign(const MCInst &Inst) const; 1594 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1595 bool validateDivScale(const MCInst &Inst); 1596 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1597 const SMLoc &IDLoc); 1598 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1599 unsigned getConstantBusLimit(unsigned Opcode) const; 1600 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1601 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1602 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1603 1604 bool isSupportedMnemo(StringRef Mnemo, 1605 const FeatureBitset &FBS); 1606 bool isSupportedMnemo(StringRef Mnemo, 1607 const FeatureBitset &FBS, 1608 ArrayRef<unsigned> Variants); 1609 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1610 1611 bool isId(const StringRef Id) const; 1612 bool isId(const AsmToken &Token, const StringRef Id) const; 1613 bool isToken(const AsmToken::TokenKind Kind) const; 1614 bool trySkipId(const StringRef Id); 1615 bool trySkipId(const StringRef Pref, const StringRef Id); 1616 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1617 bool trySkipToken(const AsmToken::TokenKind Kind); 1618 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1619 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1620 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1621 1622 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1623 AsmToken::TokenKind getTokenKind() const; 1624 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1625 bool parseExpr(OperandVector &Operands); 1626 StringRef getTokenStr() const; 1627 AsmToken peekToken(); 1628 AsmToken getToken() const; 1629 SMLoc getLoc() const; 1630 void lex(); 1631 1632 public: 1633 void onBeginOfFile() override; 1634 1635 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1636 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1637 1638 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1639 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1640 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1641 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1642 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1643 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1644 1645 bool parseSwizzleOperand(int64_t &Op, 1646 const unsigned MinVal, 1647 const unsigned MaxVal, 1648 const StringRef ErrMsg, 1649 SMLoc &Loc); 1650 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1651 const unsigned MinVal, 1652 const unsigned MaxVal, 1653 const StringRef ErrMsg); 1654 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1655 bool parseSwizzleOffset(int64_t &Imm); 1656 bool parseSwizzleMacro(int64_t &Imm); 1657 bool parseSwizzleQuadPerm(int64_t &Imm); 1658 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1659 bool parseSwizzleBroadcast(int64_t &Imm); 1660 bool parseSwizzleSwap(int64_t &Imm); 1661 bool parseSwizzleReverse(int64_t &Imm); 1662 1663 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1664 int64_t parseGPRIdxMacro(); 1665 1666 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1667 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1668 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1669 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1670 1671 AMDGPUOperand::Ptr defaultCPol() const; 1672 1673 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1674 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1675 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1676 AMDGPUOperand::Ptr defaultFlatOffset() const; 1677 1678 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1679 1680 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1681 OptionalImmIndexMap &OptionalIdx); 1682 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1683 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1684 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1685 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1686 OptionalImmIndexMap &OptionalIdx); 1687 1688 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1689 1690 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1691 bool IsAtomic = false); 1692 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1693 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1694 1695 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1696 1697 bool parseDimId(unsigned &Encoding); 1698 OperandMatchResultTy parseDim(OperandVector &Operands); 1699 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1700 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1701 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1702 int64_t parseDPPCtrlSel(StringRef Ctrl); 1703 int64_t parseDPPCtrlPerm(); 1704 AMDGPUOperand::Ptr defaultRowMask() const; 1705 AMDGPUOperand::Ptr defaultBankMask() const; 1706 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1707 AMDGPUOperand::Ptr defaultFI() const; 1708 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1709 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1710 1711 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1712 AMDGPUOperand::ImmTy Type); 1713 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1714 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1715 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1716 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1717 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1718 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1719 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1720 uint64_t BasicInstType, 1721 bool SkipDstVcc = false, 1722 bool SkipSrcVcc = false); 1723 1724 AMDGPUOperand::Ptr defaultBLGP() const; 1725 AMDGPUOperand::Ptr defaultCBSZ() const; 1726 AMDGPUOperand::Ptr defaultABID() const; 1727 1728 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1729 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1730 }; 1731 1732 struct OptionalOperand { 1733 const char *Name; 1734 AMDGPUOperand::ImmTy Type; 1735 bool IsBit; 1736 bool (*ConvertResult)(int64_t&); 1737 }; 1738 1739 } // end anonymous namespace 1740 1741 // May be called with integer type with equivalent bitwidth. 1742 static const fltSemantics *getFltSemantics(unsigned Size) { 1743 switch (Size) { 1744 case 4: 1745 return &APFloat::IEEEsingle(); 1746 case 8: 1747 return &APFloat::IEEEdouble(); 1748 case 2: 1749 return &APFloat::IEEEhalf(); 1750 default: 1751 llvm_unreachable("unsupported fp type"); 1752 } 1753 } 1754 1755 static const fltSemantics *getFltSemantics(MVT VT) { 1756 return getFltSemantics(VT.getSizeInBits() / 8); 1757 } 1758 1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1760 switch (OperandType) { 1761 case AMDGPU::OPERAND_REG_IMM_INT32: 1762 case AMDGPU::OPERAND_REG_IMM_FP32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1771 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1772 case AMDGPU::OPERAND_KIMM32: 1773 return &APFloat::IEEEsingle(); 1774 case AMDGPU::OPERAND_REG_IMM_INT64: 1775 case AMDGPU::OPERAND_REG_IMM_FP64: 1776 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1779 return &APFloat::IEEEdouble(); 1780 case AMDGPU::OPERAND_REG_IMM_INT16: 1781 case AMDGPU::OPERAND_REG_IMM_FP16: 1782 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1783 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1784 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1786 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1787 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1788 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1791 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1792 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1793 case AMDGPU::OPERAND_KIMM16: 1794 return &APFloat::IEEEhalf(); 1795 default: 1796 llvm_unreachable("unsupported fp type"); 1797 } 1798 } 1799 1800 //===----------------------------------------------------------------------===// 1801 // Operand 1802 //===----------------------------------------------------------------------===// 1803 1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1805 bool Lost; 1806 1807 // Convert literal to single precision 1808 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1809 APFloat::rmNearestTiesToEven, 1810 &Lost); 1811 // We allow precision lost but not overflow or underflow 1812 if (Status != APFloat::opOK && 1813 Lost && 1814 ((Status & APFloat::opOverflow) != 0 || 1815 (Status & APFloat::opUnderflow) != 0)) { 1816 return false; 1817 } 1818 1819 return true; 1820 } 1821 1822 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1823 return isUIntN(Size, Val) || isIntN(Size, Val); 1824 } 1825 1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1827 if (VT.getScalarType() == MVT::i16) { 1828 // FP immediate values are broken. 1829 return isInlinableIntLiteral(Val); 1830 } 1831 1832 // f16/v2f16 operands work correctly for all values. 1833 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1834 } 1835 1836 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1837 1838 // This is a hack to enable named inline values like 1839 // shared_base with both 32-bit and 64-bit operands. 1840 // Note that these values are defined as 1841 // 32-bit operands only. 1842 if (isInlineValue()) { 1843 return true; 1844 } 1845 1846 if (!isImmTy(ImmTyNone)) { 1847 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1848 return false; 1849 } 1850 // TODO: We should avoid using host float here. It would be better to 1851 // check the float bit values which is what a few other places do. 1852 // We've had bot failures before due to weird NaN support on mips hosts. 1853 1854 APInt Literal(64, Imm.Val); 1855 1856 if (Imm.IsFPImm) { // We got fp literal token 1857 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1858 return AMDGPU::isInlinableLiteral64(Imm.Val, 1859 AsmParser->hasInv2PiInlineImm()); 1860 } 1861 1862 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1863 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1864 return false; 1865 1866 if (type.getScalarSizeInBits() == 16) { 1867 return isInlineableLiteralOp16( 1868 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1869 type, AsmParser->hasInv2PiInlineImm()); 1870 } 1871 1872 // Check if single precision literal is inlinable 1873 return AMDGPU::isInlinableLiteral32( 1874 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1875 AsmParser->hasInv2PiInlineImm()); 1876 } 1877 1878 // We got int literal token. 1879 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1880 return AMDGPU::isInlinableLiteral64(Imm.Val, 1881 AsmParser->hasInv2PiInlineImm()); 1882 } 1883 1884 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1885 return false; 1886 } 1887 1888 if (type.getScalarSizeInBits() == 16) { 1889 return isInlineableLiteralOp16( 1890 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1891 type, AsmParser->hasInv2PiInlineImm()); 1892 } 1893 1894 return AMDGPU::isInlinableLiteral32( 1895 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1896 AsmParser->hasInv2PiInlineImm()); 1897 } 1898 1899 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1900 // Check that this immediate can be added as literal 1901 if (!isImmTy(ImmTyNone)) { 1902 return false; 1903 } 1904 1905 if (!Imm.IsFPImm) { 1906 // We got int literal token. 1907 1908 if (type == MVT::f64 && hasFPModifiers()) { 1909 // Cannot apply fp modifiers to int literals preserving the same semantics 1910 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1911 // disable these cases. 1912 return false; 1913 } 1914 1915 unsigned Size = type.getSizeInBits(); 1916 if (Size == 64) 1917 Size = 32; 1918 1919 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1920 // types. 1921 return isSafeTruncation(Imm.Val, Size); 1922 } 1923 1924 // We got fp literal token 1925 if (type == MVT::f64) { // Expected 64-bit fp operand 1926 // We would set low 64-bits of literal to zeroes but we accept this literals 1927 return true; 1928 } 1929 1930 if (type == MVT::i64) { // Expected 64-bit int operand 1931 // We don't allow fp literals in 64-bit integer instructions. It is 1932 // unclear how we should encode them. 1933 return false; 1934 } 1935 1936 // We allow fp literals with f16x2 operands assuming that the specified 1937 // literal goes into the lower half and the upper half is zero. We also 1938 // require that the literal may be losslessly converted to f16. 1939 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1940 (type == MVT::v2i16)? MVT::i16 : 1941 (type == MVT::v2f32)? MVT::f32 : type; 1942 1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1944 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1945 } 1946 1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1948 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1949 } 1950 1951 bool AMDGPUOperand::isVRegWithInputMods() const { 1952 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1953 // GFX90A allows DPP on 64-bit operands. 1954 (isRegClass(AMDGPU::VReg_64RegClassID) && 1955 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1956 } 1957 1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1959 if (AsmParser->isVI()) 1960 return isVReg32(); 1961 else if (AsmParser->isGFX9Plus()) 1962 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1963 else 1964 return false; 1965 } 1966 1967 bool AMDGPUOperand::isSDWAFP16Operand() const { 1968 return isSDWAOperand(MVT::f16); 1969 } 1970 1971 bool AMDGPUOperand::isSDWAFP32Operand() const { 1972 return isSDWAOperand(MVT::f32); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAInt16Operand() const { 1976 return isSDWAOperand(MVT::i16); 1977 } 1978 1979 bool AMDGPUOperand::isSDWAInt32Operand() const { 1980 return isSDWAOperand(MVT::i32); 1981 } 1982 1983 bool AMDGPUOperand::isBoolReg() const { 1984 auto FB = AsmParser->getFeatureBits(); 1985 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1986 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1987 } 1988 1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1990 { 1991 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1992 assert(Size == 2 || Size == 4 || Size == 8); 1993 1994 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1995 1996 if (Imm.Mods.Abs) { 1997 Val &= ~FpSignMask; 1998 } 1999 if (Imm.Mods.Neg) { 2000 Val ^= FpSignMask; 2001 } 2002 2003 return Val; 2004 } 2005 2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2007 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2008 Inst.getNumOperands())) { 2009 addLiteralImmOperand(Inst, Imm.Val, 2010 ApplyModifiers & 2011 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2012 } else { 2013 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2014 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2015 setImmKindNone(); 2016 } 2017 } 2018 2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2020 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2021 auto OpNum = Inst.getNumOperands(); 2022 // Check that this operand accepts literals 2023 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2024 2025 if (ApplyModifiers) { 2026 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2027 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2028 Val = applyInputFPModifiers(Val, Size); 2029 } 2030 2031 APInt Literal(64, Val); 2032 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2033 2034 if (Imm.IsFPImm) { // We got fp literal token 2035 switch (OpTy) { 2036 case AMDGPU::OPERAND_REG_IMM_INT64: 2037 case AMDGPU::OPERAND_REG_IMM_FP64: 2038 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2039 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2040 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2041 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2042 AsmParser->hasInv2PiInlineImm())) { 2043 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2044 setImmKindConst(); 2045 return; 2046 } 2047 2048 // Non-inlineable 2049 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2050 // For fp operands we check if low 32 bits are zeros 2051 if (Literal.getLoBits(32) != 0) { 2052 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2053 "Can't encode literal as exact 64-bit floating-point operand. " 2054 "Low 32-bits will be set to zero"); 2055 } 2056 2057 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2058 setImmKindLiteral(); 2059 return; 2060 } 2061 2062 // We don't allow fp literals in 64-bit integer instructions. It is 2063 // unclear how we should encode them. This case should be checked earlier 2064 // in predicate methods (isLiteralImm()) 2065 llvm_unreachable("fp literal in 64-bit integer instruction."); 2066 2067 case AMDGPU::OPERAND_REG_IMM_INT32: 2068 case AMDGPU::OPERAND_REG_IMM_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2070 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2071 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2073 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2074 case AMDGPU::OPERAND_REG_IMM_INT16: 2075 case AMDGPU::OPERAND_REG_IMM_FP16: 2076 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2077 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2078 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2079 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2080 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2086 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2087 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2090 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2091 case AMDGPU::OPERAND_KIMM32: 2092 case AMDGPU::OPERAND_KIMM16: { 2093 bool lost; 2094 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2095 // Convert literal to single precision 2096 FPLiteral.convert(*getOpFltSemantics(OpTy), 2097 APFloat::rmNearestTiesToEven, &lost); 2098 // We allow precision lost but not overflow or underflow. This should be 2099 // checked earlier in isLiteralImm() 2100 2101 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2102 Inst.addOperand(MCOperand::createImm(ImmVal)); 2103 setImmKindLiteral(); 2104 return; 2105 } 2106 default: 2107 llvm_unreachable("invalid operand size"); 2108 } 2109 2110 return; 2111 } 2112 2113 // We got int literal token. 2114 // Only sign extend inline immediates. 2115 switch (OpTy) { 2116 case AMDGPU::OPERAND_REG_IMM_INT32: 2117 case AMDGPU::OPERAND_REG_IMM_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2119 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2120 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2123 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2124 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2125 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2126 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2127 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2129 if (isSafeTruncation(Val, 32) && 2130 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2131 AsmParser->hasInv2PiInlineImm())) { 2132 Inst.addOperand(MCOperand::createImm(Val)); 2133 setImmKindConst(); 2134 return; 2135 } 2136 2137 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2138 setImmKindLiteral(); 2139 return; 2140 2141 case AMDGPU::OPERAND_REG_IMM_INT64: 2142 case AMDGPU::OPERAND_REG_IMM_FP64: 2143 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2144 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2145 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2146 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2147 Inst.addOperand(MCOperand::createImm(Val)); 2148 setImmKindConst(); 2149 return; 2150 } 2151 2152 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2153 setImmKindLiteral(); 2154 return; 2155 2156 case AMDGPU::OPERAND_REG_IMM_INT16: 2157 case AMDGPU::OPERAND_REG_IMM_FP16: 2158 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2159 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2160 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2163 if (isSafeTruncation(Val, 16) && 2164 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2165 AsmParser->hasInv2PiInlineImm())) { 2166 Inst.addOperand(MCOperand::createImm(Val)); 2167 setImmKindConst(); 2168 return; 2169 } 2170 2171 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2172 setImmKindLiteral(); 2173 return; 2174 2175 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2176 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2177 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2179 assert(isSafeTruncation(Val, 16)); 2180 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2181 AsmParser->hasInv2PiInlineImm())); 2182 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 return; 2185 } 2186 case AMDGPU::OPERAND_KIMM32: 2187 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2188 setImmKindNone(); 2189 return; 2190 case AMDGPU::OPERAND_KIMM16: 2191 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2192 setImmKindNone(); 2193 return; 2194 default: 2195 llvm_unreachable("invalid operand size"); 2196 } 2197 } 2198 2199 template <unsigned Bitwidth> 2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2201 APInt Literal(64, Imm.Val); 2202 setImmKindNone(); 2203 2204 if (!Imm.IsFPImm) { 2205 // We got int literal token. 2206 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2207 return; 2208 } 2209 2210 bool Lost; 2211 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2212 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2213 APFloat::rmNearestTiesToEven, &Lost); 2214 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2215 } 2216 2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2218 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2219 } 2220 2221 static bool isInlineValue(unsigned Reg) { 2222 switch (Reg) { 2223 case AMDGPU::SRC_SHARED_BASE: 2224 case AMDGPU::SRC_SHARED_LIMIT: 2225 case AMDGPU::SRC_PRIVATE_BASE: 2226 case AMDGPU::SRC_PRIVATE_LIMIT: 2227 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2228 return true; 2229 case AMDGPU::SRC_VCCZ: 2230 case AMDGPU::SRC_EXECZ: 2231 case AMDGPU::SRC_SCC: 2232 return true; 2233 case AMDGPU::SGPR_NULL: 2234 return true; 2235 default: 2236 return false; 2237 } 2238 } 2239 2240 bool AMDGPUOperand::isInlineValue() const { 2241 return isRegKind() && ::isInlineValue(getReg()); 2242 } 2243 2244 //===----------------------------------------------------------------------===// 2245 // AsmParser 2246 //===----------------------------------------------------------------------===// 2247 2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2249 if (Is == IS_VGPR) { 2250 switch (RegWidth) { 2251 default: return -1; 2252 case 1: return AMDGPU::VGPR_32RegClassID; 2253 case 2: return AMDGPU::VReg_64RegClassID; 2254 case 3: return AMDGPU::VReg_96RegClassID; 2255 case 4: return AMDGPU::VReg_128RegClassID; 2256 case 5: return AMDGPU::VReg_160RegClassID; 2257 case 6: return AMDGPU::VReg_192RegClassID; 2258 case 7: return AMDGPU::VReg_224RegClassID; 2259 case 8: return AMDGPU::VReg_256RegClassID; 2260 case 16: return AMDGPU::VReg_512RegClassID; 2261 case 32: return AMDGPU::VReg_1024RegClassID; 2262 } 2263 } else if (Is == IS_TTMP) { 2264 switch (RegWidth) { 2265 default: return -1; 2266 case 1: return AMDGPU::TTMP_32RegClassID; 2267 case 2: return AMDGPU::TTMP_64RegClassID; 2268 case 4: return AMDGPU::TTMP_128RegClassID; 2269 case 8: return AMDGPU::TTMP_256RegClassID; 2270 case 16: return AMDGPU::TTMP_512RegClassID; 2271 } 2272 } else if (Is == IS_SGPR) { 2273 switch (RegWidth) { 2274 default: return -1; 2275 case 1: return AMDGPU::SGPR_32RegClassID; 2276 case 2: return AMDGPU::SGPR_64RegClassID; 2277 case 3: return AMDGPU::SGPR_96RegClassID; 2278 case 4: return AMDGPU::SGPR_128RegClassID; 2279 case 5: return AMDGPU::SGPR_160RegClassID; 2280 case 6: return AMDGPU::SGPR_192RegClassID; 2281 case 7: return AMDGPU::SGPR_224RegClassID; 2282 case 8: return AMDGPU::SGPR_256RegClassID; 2283 case 16: return AMDGPU::SGPR_512RegClassID; 2284 } 2285 } else if (Is == IS_AGPR) { 2286 switch (RegWidth) { 2287 default: return -1; 2288 case 1: return AMDGPU::AGPR_32RegClassID; 2289 case 2: return AMDGPU::AReg_64RegClassID; 2290 case 3: return AMDGPU::AReg_96RegClassID; 2291 case 4: return AMDGPU::AReg_128RegClassID; 2292 case 5: return AMDGPU::AReg_160RegClassID; 2293 case 6: return AMDGPU::AReg_192RegClassID; 2294 case 7: return AMDGPU::AReg_224RegClassID; 2295 case 8: return AMDGPU::AReg_256RegClassID; 2296 case 16: return AMDGPU::AReg_512RegClassID; 2297 case 32: return AMDGPU::AReg_1024RegClassID; 2298 } 2299 } 2300 return -1; 2301 } 2302 2303 static unsigned getSpecialRegForName(StringRef RegName) { 2304 return StringSwitch<unsigned>(RegName) 2305 .Case("exec", AMDGPU::EXEC) 2306 .Case("vcc", AMDGPU::VCC) 2307 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2308 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2309 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2310 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2311 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2312 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2313 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2314 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2315 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2316 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2317 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2318 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2319 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2320 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2321 .Case("m0", AMDGPU::M0) 2322 .Case("vccz", AMDGPU::SRC_VCCZ) 2323 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2324 .Case("execz", AMDGPU::SRC_EXECZ) 2325 .Case("src_execz", AMDGPU::SRC_EXECZ) 2326 .Case("scc", AMDGPU::SRC_SCC) 2327 .Case("src_scc", AMDGPU::SRC_SCC) 2328 .Case("tba", AMDGPU::TBA) 2329 .Case("tma", AMDGPU::TMA) 2330 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2331 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2332 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2333 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2334 .Case("vcc_lo", AMDGPU::VCC_LO) 2335 .Case("vcc_hi", AMDGPU::VCC_HI) 2336 .Case("exec_lo", AMDGPU::EXEC_LO) 2337 .Case("exec_hi", AMDGPU::EXEC_HI) 2338 .Case("tma_lo", AMDGPU::TMA_LO) 2339 .Case("tma_hi", AMDGPU::TMA_HI) 2340 .Case("tba_lo", AMDGPU::TBA_LO) 2341 .Case("tba_hi", AMDGPU::TBA_HI) 2342 .Case("pc", AMDGPU::PC_REG) 2343 .Case("null", AMDGPU::SGPR_NULL) 2344 .Default(AMDGPU::NoRegister); 2345 } 2346 2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2348 SMLoc &EndLoc, bool RestoreOnFailure) { 2349 auto R = parseRegister(); 2350 if (!R) return true; 2351 assert(R->isReg()); 2352 RegNo = R->getReg(); 2353 StartLoc = R->getStartLoc(); 2354 EndLoc = R->getEndLoc(); 2355 return false; 2356 } 2357 2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2359 SMLoc &EndLoc) { 2360 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2361 } 2362 2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2364 SMLoc &StartLoc, 2365 SMLoc &EndLoc) { 2366 bool Result = 2367 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2368 bool PendingErrors = getParser().hasPendingError(); 2369 getParser().clearPendingErrors(); 2370 if (PendingErrors) 2371 return MatchOperand_ParseFail; 2372 if (Result) 2373 return MatchOperand_NoMatch; 2374 return MatchOperand_Success; 2375 } 2376 2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2378 RegisterKind RegKind, unsigned Reg1, 2379 SMLoc Loc) { 2380 switch (RegKind) { 2381 case IS_SPECIAL: 2382 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2383 Reg = AMDGPU::EXEC; 2384 RegWidth = 2; 2385 return true; 2386 } 2387 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2388 Reg = AMDGPU::FLAT_SCR; 2389 RegWidth = 2; 2390 return true; 2391 } 2392 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2393 Reg = AMDGPU::XNACK_MASK; 2394 RegWidth = 2; 2395 return true; 2396 } 2397 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2398 Reg = AMDGPU::VCC; 2399 RegWidth = 2; 2400 return true; 2401 } 2402 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2403 Reg = AMDGPU::TBA; 2404 RegWidth = 2; 2405 return true; 2406 } 2407 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2408 Reg = AMDGPU::TMA; 2409 RegWidth = 2; 2410 return true; 2411 } 2412 Error(Loc, "register does not fit in the list"); 2413 return false; 2414 case IS_VGPR: 2415 case IS_SGPR: 2416 case IS_AGPR: 2417 case IS_TTMP: 2418 if (Reg1 != Reg + RegWidth) { 2419 Error(Loc, "registers in a list must have consecutive indices"); 2420 return false; 2421 } 2422 RegWidth++; 2423 return true; 2424 default: 2425 llvm_unreachable("unexpected register kind"); 2426 } 2427 } 2428 2429 struct RegInfo { 2430 StringLiteral Name; 2431 RegisterKind Kind; 2432 }; 2433 2434 static constexpr RegInfo RegularRegisters[] = { 2435 {{"v"}, IS_VGPR}, 2436 {{"s"}, IS_SGPR}, 2437 {{"ttmp"}, IS_TTMP}, 2438 {{"acc"}, IS_AGPR}, 2439 {{"a"}, IS_AGPR}, 2440 }; 2441 2442 static bool isRegularReg(RegisterKind Kind) { 2443 return Kind == IS_VGPR || 2444 Kind == IS_SGPR || 2445 Kind == IS_TTMP || 2446 Kind == IS_AGPR; 2447 } 2448 2449 static const RegInfo* getRegularRegInfo(StringRef Str) { 2450 for (const RegInfo &Reg : RegularRegisters) 2451 if (Str.startswith(Reg.Name)) 2452 return &Reg; 2453 return nullptr; 2454 } 2455 2456 static bool getRegNum(StringRef Str, unsigned& Num) { 2457 return !Str.getAsInteger(10, Num); 2458 } 2459 2460 bool 2461 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2462 const AsmToken &NextToken) const { 2463 2464 // A list of consecutive registers: [s0,s1,s2,s3] 2465 if (Token.is(AsmToken::LBrac)) 2466 return true; 2467 2468 if (!Token.is(AsmToken::Identifier)) 2469 return false; 2470 2471 // A single register like s0 or a range of registers like s[0:1] 2472 2473 StringRef Str = Token.getString(); 2474 const RegInfo *Reg = getRegularRegInfo(Str); 2475 if (Reg) { 2476 StringRef RegName = Reg->Name; 2477 StringRef RegSuffix = Str.substr(RegName.size()); 2478 if (!RegSuffix.empty()) { 2479 unsigned Num; 2480 // A single register with an index: rXX 2481 if (getRegNum(RegSuffix, Num)) 2482 return true; 2483 } else { 2484 // A range of registers: r[XX:YY]. 2485 if (NextToken.is(AsmToken::LBrac)) 2486 return true; 2487 } 2488 } 2489 2490 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2491 } 2492 2493 bool 2494 AMDGPUAsmParser::isRegister() 2495 { 2496 return isRegister(getToken(), peekToken()); 2497 } 2498 2499 unsigned 2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2501 unsigned RegNum, 2502 unsigned RegWidth, 2503 SMLoc Loc) { 2504 2505 assert(isRegularReg(RegKind)); 2506 2507 unsigned AlignSize = 1; 2508 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2509 // SGPR and TTMP registers must be aligned. 2510 // Max required alignment is 4 dwords. 2511 AlignSize = std::min(RegWidth, 4u); 2512 } 2513 2514 if (RegNum % AlignSize != 0) { 2515 Error(Loc, "invalid register alignment"); 2516 return AMDGPU::NoRegister; 2517 } 2518 2519 unsigned RegIdx = RegNum / AlignSize; 2520 int RCID = getRegClass(RegKind, RegWidth); 2521 if (RCID == -1) { 2522 Error(Loc, "invalid or unsupported register size"); 2523 return AMDGPU::NoRegister; 2524 } 2525 2526 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2527 const MCRegisterClass RC = TRI->getRegClass(RCID); 2528 if (RegIdx >= RC.getNumRegs()) { 2529 Error(Loc, "register index is out of range"); 2530 return AMDGPU::NoRegister; 2531 } 2532 2533 return RC.getRegister(RegIdx); 2534 } 2535 2536 bool 2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2538 int64_t RegLo, RegHi; 2539 if (!skipToken(AsmToken::LBrac, "missing register index")) 2540 return false; 2541 2542 SMLoc FirstIdxLoc = getLoc(); 2543 SMLoc SecondIdxLoc; 2544 2545 if (!parseExpr(RegLo)) 2546 return false; 2547 2548 if (trySkipToken(AsmToken::Colon)) { 2549 SecondIdxLoc = getLoc(); 2550 if (!parseExpr(RegHi)) 2551 return false; 2552 } else { 2553 RegHi = RegLo; 2554 } 2555 2556 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2557 return false; 2558 2559 if (!isUInt<32>(RegLo)) { 2560 Error(FirstIdxLoc, "invalid register index"); 2561 return false; 2562 } 2563 2564 if (!isUInt<32>(RegHi)) { 2565 Error(SecondIdxLoc, "invalid register index"); 2566 return false; 2567 } 2568 2569 if (RegLo > RegHi) { 2570 Error(FirstIdxLoc, "first register index should not exceed second index"); 2571 return false; 2572 } 2573 2574 Num = static_cast<unsigned>(RegLo); 2575 Width = (RegHi - RegLo) + 1; 2576 return true; 2577 } 2578 2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2580 unsigned &RegNum, unsigned &RegWidth, 2581 SmallVectorImpl<AsmToken> &Tokens) { 2582 assert(isToken(AsmToken::Identifier)); 2583 unsigned Reg = getSpecialRegForName(getTokenStr()); 2584 if (Reg) { 2585 RegNum = 0; 2586 RegWidth = 1; 2587 RegKind = IS_SPECIAL; 2588 Tokens.push_back(getToken()); 2589 lex(); // skip register name 2590 } 2591 return Reg; 2592 } 2593 2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2595 unsigned &RegNum, unsigned &RegWidth, 2596 SmallVectorImpl<AsmToken> &Tokens) { 2597 assert(isToken(AsmToken::Identifier)); 2598 StringRef RegName = getTokenStr(); 2599 auto Loc = getLoc(); 2600 2601 const RegInfo *RI = getRegularRegInfo(RegName); 2602 if (!RI) { 2603 Error(Loc, "invalid register name"); 2604 return AMDGPU::NoRegister; 2605 } 2606 2607 Tokens.push_back(getToken()); 2608 lex(); // skip register name 2609 2610 RegKind = RI->Kind; 2611 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2612 if (!RegSuffix.empty()) { 2613 // Single 32-bit register: vXX. 2614 if (!getRegNum(RegSuffix, RegNum)) { 2615 Error(Loc, "invalid register index"); 2616 return AMDGPU::NoRegister; 2617 } 2618 RegWidth = 1; 2619 } else { 2620 // Range of registers: v[XX:YY]. ":YY" is optional. 2621 if (!ParseRegRange(RegNum, RegWidth)) 2622 return AMDGPU::NoRegister; 2623 } 2624 2625 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2626 } 2627 2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2629 unsigned &RegWidth, 2630 SmallVectorImpl<AsmToken> &Tokens) { 2631 unsigned Reg = AMDGPU::NoRegister; 2632 auto ListLoc = getLoc(); 2633 2634 if (!skipToken(AsmToken::LBrac, 2635 "expected a register or a list of registers")) { 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2640 2641 auto Loc = getLoc(); 2642 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2643 return AMDGPU::NoRegister; 2644 if (RegWidth != 1) { 2645 Error(Loc, "expected a single 32-bit register"); 2646 return AMDGPU::NoRegister; 2647 } 2648 2649 for (; trySkipToken(AsmToken::Comma); ) { 2650 RegisterKind NextRegKind; 2651 unsigned NextReg, NextRegNum, NextRegWidth; 2652 Loc = getLoc(); 2653 2654 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2655 NextRegNum, NextRegWidth, 2656 Tokens)) { 2657 return AMDGPU::NoRegister; 2658 } 2659 if (NextRegWidth != 1) { 2660 Error(Loc, "expected a single 32-bit register"); 2661 return AMDGPU::NoRegister; 2662 } 2663 if (NextRegKind != RegKind) { 2664 Error(Loc, "registers in a list must be of the same kind"); 2665 return AMDGPU::NoRegister; 2666 } 2667 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 if (!skipToken(AsmToken::RBrac, 2672 "expected a comma or a closing square bracket")) { 2673 return AMDGPU::NoRegister; 2674 } 2675 2676 if (isRegularReg(RegKind)) 2677 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2678 2679 return Reg; 2680 } 2681 2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2683 unsigned &RegNum, unsigned &RegWidth, 2684 SmallVectorImpl<AsmToken> &Tokens) { 2685 auto Loc = getLoc(); 2686 Reg = AMDGPU::NoRegister; 2687 2688 if (isToken(AsmToken::Identifier)) { 2689 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2690 if (Reg == AMDGPU::NoRegister) 2691 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2692 } else { 2693 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2694 } 2695 2696 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2697 if (Reg == AMDGPU::NoRegister) { 2698 assert(Parser.hasPendingError()); 2699 return false; 2700 } 2701 2702 if (!subtargetHasRegister(*TRI, Reg)) { 2703 if (Reg == AMDGPU::SGPR_NULL) { 2704 Error(Loc, "'null' operand is not supported on this GPU"); 2705 } else { 2706 Error(Loc, "register not available on this GPU"); 2707 } 2708 return false; 2709 } 2710 2711 return true; 2712 } 2713 2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2715 unsigned &RegNum, unsigned &RegWidth, 2716 bool RestoreOnFailure /*=false*/) { 2717 Reg = AMDGPU::NoRegister; 2718 2719 SmallVector<AsmToken, 1> Tokens; 2720 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2721 if (RestoreOnFailure) { 2722 while (!Tokens.empty()) { 2723 getLexer().UnLex(Tokens.pop_back_val()); 2724 } 2725 } 2726 return true; 2727 } 2728 return false; 2729 } 2730 2731 Optional<StringRef> 2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2733 switch (RegKind) { 2734 case IS_VGPR: 2735 return StringRef(".amdgcn.next_free_vgpr"); 2736 case IS_SGPR: 2737 return StringRef(".amdgcn.next_free_sgpr"); 2738 default: 2739 return None; 2740 } 2741 } 2742 2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2744 auto SymbolName = getGprCountSymbolName(RegKind); 2745 assert(SymbolName && "initializing invalid register kind"); 2746 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2747 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2748 } 2749 2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2751 unsigned DwordRegIndex, 2752 unsigned RegWidth) { 2753 // Symbols are only defined for GCN targets 2754 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2755 return true; 2756 2757 auto SymbolName = getGprCountSymbolName(RegKind); 2758 if (!SymbolName) 2759 return true; 2760 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2761 2762 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2763 int64_t OldCount; 2764 2765 if (!Sym->isVariable()) 2766 return !Error(getLoc(), 2767 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2768 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2769 return !Error( 2770 getLoc(), 2771 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2772 2773 if (OldCount <= NewMax) 2774 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2775 2776 return true; 2777 } 2778 2779 std::unique_ptr<AMDGPUOperand> 2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2781 const auto &Tok = getToken(); 2782 SMLoc StartLoc = Tok.getLoc(); 2783 SMLoc EndLoc = Tok.getEndLoc(); 2784 RegisterKind RegKind; 2785 unsigned Reg, RegNum, RegWidth; 2786 2787 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2788 return nullptr; 2789 } 2790 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2791 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2792 return nullptr; 2793 } else 2794 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2795 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2800 // TODO: add syntactic sugar for 1/(2*PI) 2801 2802 assert(!isRegister()); 2803 assert(!isModifier()); 2804 2805 const auto& Tok = getToken(); 2806 const auto& NextTok = peekToken(); 2807 bool IsReal = Tok.is(AsmToken::Real); 2808 SMLoc S = getLoc(); 2809 bool Negate = false; 2810 2811 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2812 lex(); 2813 IsReal = true; 2814 Negate = true; 2815 } 2816 2817 if (IsReal) { 2818 // Floating-point expressions are not supported. 2819 // Can only allow floating-point literals with an 2820 // optional sign. 2821 2822 StringRef Num = getTokenStr(); 2823 lex(); 2824 2825 APFloat RealVal(APFloat::IEEEdouble()); 2826 auto roundMode = APFloat::rmNearestTiesToEven; 2827 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2828 return MatchOperand_ParseFail; 2829 } 2830 if (Negate) 2831 RealVal.changeSign(); 2832 2833 Operands.push_back( 2834 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2835 AMDGPUOperand::ImmTyNone, true)); 2836 2837 return MatchOperand_Success; 2838 2839 } else { 2840 int64_t IntVal; 2841 const MCExpr *Expr; 2842 SMLoc S = getLoc(); 2843 2844 if (HasSP3AbsModifier) { 2845 // This is a workaround for handling expressions 2846 // as arguments of SP3 'abs' modifier, for example: 2847 // |1.0| 2848 // |-1| 2849 // |1+x| 2850 // This syntax is not compatible with syntax of standard 2851 // MC expressions (due to the trailing '|'). 2852 SMLoc EndLoc; 2853 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2854 return MatchOperand_ParseFail; 2855 } else { 2856 if (Parser.parseExpression(Expr)) 2857 return MatchOperand_ParseFail; 2858 } 2859 2860 if (Expr->evaluateAsAbsolute(IntVal)) { 2861 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2862 } else { 2863 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2864 } 2865 2866 return MatchOperand_Success; 2867 } 2868 2869 return MatchOperand_NoMatch; 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2874 if (!isRegister()) 2875 return MatchOperand_NoMatch; 2876 2877 if (auto R = parseRegister()) { 2878 assert(R->isReg()); 2879 Operands.push_back(std::move(R)); 2880 return MatchOperand_Success; 2881 } 2882 return MatchOperand_ParseFail; 2883 } 2884 2885 OperandMatchResultTy 2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2887 auto res = parseReg(Operands); 2888 if (res != MatchOperand_NoMatch) { 2889 return res; 2890 } else if (isModifier()) { 2891 return MatchOperand_NoMatch; 2892 } else { 2893 return parseImm(Operands, HasSP3AbsMod); 2894 } 2895 } 2896 2897 bool 2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2899 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2900 const auto &str = Token.getString(); 2901 return str == "abs" || str == "neg" || str == "sext"; 2902 } 2903 return false; 2904 } 2905 2906 bool 2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2908 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2909 } 2910 2911 bool 2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2913 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2914 } 2915 2916 bool 2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2918 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2919 } 2920 2921 // Check if this is an operand modifier or an opcode modifier 2922 // which may look like an expression but it is not. We should 2923 // avoid parsing these modifiers as expressions. Currently 2924 // recognized sequences are: 2925 // |...| 2926 // abs(...) 2927 // neg(...) 2928 // sext(...) 2929 // -reg 2930 // -|...| 2931 // -abs(...) 2932 // name:... 2933 // Note that simple opcode modifiers like 'gds' may be parsed as 2934 // expressions; this is a special case. See getExpressionAsToken. 2935 // 2936 bool 2937 AMDGPUAsmParser::isModifier() { 2938 2939 AsmToken Tok = getToken(); 2940 AsmToken NextToken[2]; 2941 peekTokens(NextToken); 2942 2943 return isOperandModifier(Tok, NextToken[0]) || 2944 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2945 isOpcodeModifierWithVal(Tok, NextToken[0]); 2946 } 2947 2948 // Check if the current token is an SP3 'neg' modifier. 2949 // Currently this modifier is allowed in the following context: 2950 // 2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2952 // 2. Before an 'abs' modifier: -abs(...) 2953 // 3. Before an SP3 'abs' modifier: -|...| 2954 // 2955 // In all other cases "-" is handled as a part 2956 // of an expression that follows the sign. 2957 // 2958 // Note: When "-" is followed by an integer literal, 2959 // this is interpreted as integer negation rather 2960 // than a floating-point NEG modifier applied to N. 2961 // Beside being contr-intuitive, such use of floating-point 2962 // NEG modifier would have resulted in different meaning 2963 // of integer literals used with VOP1/2/C and VOP3, 2964 // for example: 2965 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2966 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2967 // Negative fp literals with preceding "-" are 2968 // handled likewise for uniformity 2969 // 2970 bool 2971 AMDGPUAsmParser::parseSP3NegModifier() { 2972 2973 AsmToken NextToken[2]; 2974 peekTokens(NextToken); 2975 2976 if (isToken(AsmToken::Minus) && 2977 (isRegister(NextToken[0], NextToken[1]) || 2978 NextToken[0].is(AsmToken::Pipe) || 2979 isId(NextToken[0], "abs"))) { 2980 lex(); 2981 return true; 2982 } 2983 2984 return false; 2985 } 2986 2987 OperandMatchResultTy 2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2989 bool AllowImm) { 2990 bool Neg, SP3Neg; 2991 bool Abs, SP3Abs; 2992 SMLoc Loc; 2993 2994 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2995 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2996 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2997 return MatchOperand_ParseFail; 2998 } 2999 3000 SP3Neg = parseSP3NegModifier(); 3001 3002 Loc = getLoc(); 3003 Neg = trySkipId("neg"); 3004 if (Neg && SP3Neg) { 3005 Error(Loc, "expected register or immediate"); 3006 return MatchOperand_ParseFail; 3007 } 3008 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3009 return MatchOperand_ParseFail; 3010 3011 Abs = trySkipId("abs"); 3012 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3013 return MatchOperand_ParseFail; 3014 3015 Loc = getLoc(); 3016 SP3Abs = trySkipToken(AsmToken::Pipe); 3017 if (Abs && SP3Abs) { 3018 Error(Loc, "expected register or immediate"); 3019 return MatchOperand_ParseFail; 3020 } 3021 3022 OperandMatchResultTy Res; 3023 if (AllowImm) { 3024 Res = parseRegOrImm(Operands, SP3Abs); 3025 } else { 3026 Res = parseReg(Operands); 3027 } 3028 if (Res != MatchOperand_Success) { 3029 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3030 } 3031 3032 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3033 return MatchOperand_ParseFail; 3034 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3035 return MatchOperand_ParseFail; 3036 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3037 return MatchOperand_ParseFail; 3038 3039 AMDGPUOperand::Modifiers Mods; 3040 Mods.Abs = Abs || SP3Abs; 3041 Mods.Neg = Neg || SP3Neg; 3042 3043 if (Mods.hasFPModifiers()) { 3044 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3045 if (Op.isExpr()) { 3046 Error(Op.getStartLoc(), "expected an absolute expression"); 3047 return MatchOperand_ParseFail; 3048 } 3049 Op.setModifiers(Mods); 3050 } 3051 return MatchOperand_Success; 3052 } 3053 3054 OperandMatchResultTy 3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3056 bool AllowImm) { 3057 bool Sext = trySkipId("sext"); 3058 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3059 return MatchOperand_ParseFail; 3060 3061 OperandMatchResultTy Res; 3062 if (AllowImm) { 3063 Res = parseRegOrImm(Operands); 3064 } else { 3065 Res = parseReg(Operands); 3066 } 3067 if (Res != MatchOperand_Success) { 3068 return Sext? MatchOperand_ParseFail : Res; 3069 } 3070 3071 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3072 return MatchOperand_ParseFail; 3073 3074 AMDGPUOperand::Modifiers Mods; 3075 Mods.Sext = Sext; 3076 3077 if (Mods.hasIntModifiers()) { 3078 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3079 if (Op.isExpr()) { 3080 Error(Op.getStartLoc(), "expected an absolute expression"); 3081 return MatchOperand_ParseFail; 3082 } 3083 Op.setModifiers(Mods); 3084 } 3085 3086 return MatchOperand_Success; 3087 } 3088 3089 OperandMatchResultTy 3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3091 return parseRegOrImmWithFPInputMods(Operands, false); 3092 } 3093 3094 OperandMatchResultTy 3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3096 return parseRegOrImmWithIntInputMods(Operands, false); 3097 } 3098 3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3100 auto Loc = getLoc(); 3101 if (trySkipId("off")) { 3102 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3103 AMDGPUOperand::ImmTyOff, false)); 3104 return MatchOperand_Success; 3105 } 3106 3107 if (!isRegister()) 3108 return MatchOperand_NoMatch; 3109 3110 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3111 if (Reg) { 3112 Operands.push_back(std::move(Reg)); 3113 return MatchOperand_Success; 3114 } 3115 3116 return MatchOperand_ParseFail; 3117 3118 } 3119 3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3122 3123 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3124 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3125 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3126 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3127 return Match_InvalidOperand; 3128 3129 if ((TSFlags & SIInstrFlags::VOP3) && 3130 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3131 getForcedEncodingSize() != 64) 3132 return Match_PreferE32; 3133 3134 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3135 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3136 // v_mac_f32/16 allow only dst_sel == DWORD; 3137 auto OpNum = 3138 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3139 const auto &Op = Inst.getOperand(OpNum); 3140 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3141 return Match_InvalidOperand; 3142 } 3143 } 3144 3145 return Match_Success; 3146 } 3147 3148 static ArrayRef<unsigned> getAllVariants() { 3149 static const unsigned Variants[] = { 3150 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3151 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3152 }; 3153 3154 return makeArrayRef(Variants); 3155 } 3156 3157 // What asm variants we should check 3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3159 if (getForcedEncodingSize() == 32) { 3160 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3161 return makeArrayRef(Variants); 3162 } 3163 3164 if (isForcedVOP3()) { 3165 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3166 return makeArrayRef(Variants); 3167 } 3168 3169 if (isForcedSDWA()) { 3170 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3171 AMDGPUAsmVariants::SDWA9}; 3172 return makeArrayRef(Variants); 3173 } 3174 3175 if (isForcedDPP()) { 3176 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3177 return makeArrayRef(Variants); 3178 } 3179 3180 return getAllVariants(); 3181 } 3182 3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3184 if (getForcedEncodingSize() == 32) 3185 return "e32"; 3186 3187 if (isForcedVOP3()) 3188 return "e64"; 3189 3190 if (isForcedSDWA()) 3191 return "sdwa"; 3192 3193 if (isForcedDPP()) 3194 return "dpp"; 3195 3196 return ""; 3197 } 3198 3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3200 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3201 const unsigned Num = Desc.getNumImplicitUses(); 3202 for (unsigned i = 0; i < Num; ++i) { 3203 unsigned Reg = Desc.ImplicitUses[i]; 3204 switch (Reg) { 3205 case AMDGPU::FLAT_SCR: 3206 case AMDGPU::VCC: 3207 case AMDGPU::VCC_LO: 3208 case AMDGPU::VCC_HI: 3209 case AMDGPU::M0: 3210 return Reg; 3211 default: 3212 break; 3213 } 3214 } 3215 return AMDGPU::NoRegister; 3216 } 3217 3218 // NB: This code is correct only when used to check constant 3219 // bus limitations because GFX7 support no f16 inline constants. 3220 // Note that there are no cases when a GFX7 opcode violates 3221 // constant bus limitations due to the use of an f16 constant. 3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3223 unsigned OpIdx) const { 3224 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3225 3226 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3227 return false; 3228 } 3229 3230 const MCOperand &MO = Inst.getOperand(OpIdx); 3231 3232 int64_t Val = MO.getImm(); 3233 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3234 3235 switch (OpSize) { // expected operand size 3236 case 8: 3237 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3238 case 4: 3239 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3240 case 2: { 3241 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3242 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3243 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3245 return AMDGPU::isInlinableIntLiteral(Val); 3246 3247 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3248 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3249 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3250 return AMDGPU::isInlinableIntLiteralV216(Val); 3251 3252 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3253 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3254 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3255 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3256 3257 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3258 } 3259 default: 3260 llvm_unreachable("invalid operand size"); 3261 } 3262 } 3263 3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3265 if (!isGFX10Plus()) 3266 return 1; 3267 3268 switch (Opcode) { 3269 // 64-bit shift instructions can use only one scalar value input 3270 case AMDGPU::V_LSHLREV_B64_e64: 3271 case AMDGPU::V_LSHLREV_B64_gfx10: 3272 case AMDGPU::V_LSHRREV_B64_e64: 3273 case AMDGPU::V_LSHRREV_B64_gfx10: 3274 case AMDGPU::V_ASHRREV_I64_e64: 3275 case AMDGPU::V_ASHRREV_I64_gfx10: 3276 case AMDGPU::V_LSHL_B64_e64: 3277 case AMDGPU::V_LSHR_B64_e64: 3278 case AMDGPU::V_ASHR_I64_e64: 3279 return 1; 3280 default: 3281 return 2; 3282 } 3283 } 3284 3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3286 const MCOperand &MO = Inst.getOperand(OpIdx); 3287 if (MO.isImm()) { 3288 return !isInlineConstant(Inst, OpIdx); 3289 } else if (MO.isReg()) { 3290 auto Reg = MO.getReg(); 3291 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3292 auto PReg = mc2PseudoReg(Reg); 3293 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3294 } else { 3295 return true; 3296 } 3297 } 3298 3299 bool 3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3301 const OperandVector &Operands) { 3302 const unsigned Opcode = Inst.getOpcode(); 3303 const MCInstrDesc &Desc = MII.get(Opcode); 3304 unsigned LastSGPR = AMDGPU::NoRegister; 3305 unsigned ConstantBusUseCount = 0; 3306 unsigned NumLiterals = 0; 3307 unsigned LiteralSize; 3308 3309 if (Desc.TSFlags & 3310 (SIInstrFlags::VOPC | 3311 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3312 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3313 SIInstrFlags::SDWA)) { 3314 // Check special imm operands (used by madmk, etc) 3315 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3316 ++NumLiterals; 3317 LiteralSize = 4; 3318 } 3319 3320 SmallDenseSet<unsigned> SGPRsUsed; 3321 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3322 if (SGPRUsed != AMDGPU::NoRegister) { 3323 SGPRsUsed.insert(SGPRUsed); 3324 ++ConstantBusUseCount; 3325 } 3326 3327 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3328 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3329 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3330 3331 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3332 3333 for (int OpIdx : OpIndices) { 3334 if (OpIdx == -1) break; 3335 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (usesConstantBus(Inst, OpIdx)) { 3338 if (MO.isReg()) { 3339 LastSGPR = mc2PseudoReg(MO.getReg()); 3340 // Pairs of registers with a partial intersections like these 3341 // s0, s[0:1] 3342 // flat_scratch_lo, flat_scratch 3343 // flat_scratch_lo, flat_scratch_hi 3344 // are theoretically valid but they are disabled anyway. 3345 // Note that this code mimics SIInstrInfo::verifyInstruction 3346 if (!SGPRsUsed.count(LastSGPR)) { 3347 SGPRsUsed.insert(LastSGPR); 3348 ++ConstantBusUseCount; 3349 } 3350 } else { // Expression or a literal 3351 3352 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3353 continue; // special operand like VINTERP attr_chan 3354 3355 // An instruction may use only one literal. 3356 // This has been validated on the previous step. 3357 // See validateVOPLiteral. 3358 // This literal may be used as more than one operand. 3359 // If all these operands are of the same size, 3360 // this literal counts as one scalar value. 3361 // Otherwise it counts as 2 scalar values. 3362 // See "GFX10 Shader Programming", section 3.6.2.3. 3363 3364 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3365 if (Size < 4) Size = 4; 3366 3367 if (NumLiterals == 0) { 3368 NumLiterals = 1; 3369 LiteralSize = Size; 3370 } else if (LiteralSize != Size) { 3371 NumLiterals = 2; 3372 } 3373 } 3374 } 3375 } 3376 } 3377 ConstantBusUseCount += NumLiterals; 3378 3379 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3380 return true; 3381 3382 SMLoc LitLoc = getLitLoc(Operands); 3383 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3384 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3385 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3386 return false; 3387 } 3388 3389 bool 3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3391 const OperandVector &Operands) { 3392 const unsigned Opcode = Inst.getOpcode(); 3393 const MCInstrDesc &Desc = MII.get(Opcode); 3394 3395 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3396 if (DstIdx == -1 || 3397 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3398 return true; 3399 } 3400 3401 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3402 3403 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3404 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3405 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3406 3407 assert(DstIdx != -1); 3408 const MCOperand &Dst = Inst.getOperand(DstIdx); 3409 assert(Dst.isReg()); 3410 3411 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3412 3413 for (int SrcIdx : SrcIndices) { 3414 if (SrcIdx == -1) break; 3415 const MCOperand &Src = Inst.getOperand(SrcIdx); 3416 if (Src.isReg()) { 3417 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3418 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3419 Error(getRegLoc(SrcReg, Operands), 3420 "destination must be different than all sources"); 3421 return false; 3422 } 3423 } 3424 } 3425 3426 return true; 3427 } 3428 3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3430 3431 const unsigned Opc = Inst.getOpcode(); 3432 const MCInstrDesc &Desc = MII.get(Opc); 3433 3434 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3435 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3436 assert(ClampIdx != -1); 3437 return Inst.getOperand(ClampIdx).getImm() == 0; 3438 } 3439 3440 return true; 3441 } 3442 3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3444 3445 const unsigned Opc = Inst.getOpcode(); 3446 const MCInstrDesc &Desc = MII.get(Opc); 3447 3448 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3449 return true; 3450 3451 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3452 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3453 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3454 3455 assert(VDataIdx != -1); 3456 3457 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3458 return true; 3459 3460 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3461 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3462 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3463 if (DMask == 0) 3464 DMask = 1; 3465 3466 unsigned DataSize = 3467 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3468 if (hasPackedD16()) { 3469 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3470 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3471 DataSize = (DataSize + 1) / 2; 3472 } 3473 3474 return (VDataSize / 4) == DataSize + TFESize; 3475 } 3476 3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3478 const unsigned Opc = Inst.getOpcode(); 3479 const MCInstrDesc &Desc = MII.get(Opc); 3480 3481 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3482 return true; 3483 3484 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3485 3486 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3487 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3488 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3489 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3490 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3491 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3492 3493 assert(VAddr0Idx != -1); 3494 assert(SrsrcIdx != -1); 3495 assert(SrsrcIdx > VAddr0Idx); 3496 3497 if (DimIdx == -1) 3498 return true; // intersect_ray 3499 3500 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3501 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3502 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3503 unsigned ActualAddrSize = 3504 IsNSA ? SrsrcIdx - VAddr0Idx 3505 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3506 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3507 3508 unsigned ExpectedAddrSize = 3509 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3510 3511 if (!IsNSA) { 3512 if (ExpectedAddrSize > 8) 3513 ExpectedAddrSize = 16; 3514 3515 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3516 // This provides backward compatibility for assembly created 3517 // before 160b/192b/224b types were directly supported. 3518 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3519 return true; 3520 } 3521 3522 return ActualAddrSize == ExpectedAddrSize; 3523 } 3524 3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3526 3527 const unsigned Opc = Inst.getOpcode(); 3528 const MCInstrDesc &Desc = MII.get(Opc); 3529 3530 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3531 return true; 3532 if (!Desc.mayLoad() || !Desc.mayStore()) 3533 return true; // Not atomic 3534 3535 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3536 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3537 3538 // This is an incomplete check because image_atomic_cmpswap 3539 // may only use 0x3 and 0xf while other atomic operations 3540 // may use 0x1 and 0x3. However these limitations are 3541 // verified when we check that dmask matches dst size. 3542 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3543 } 3544 3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3546 3547 const unsigned Opc = Inst.getOpcode(); 3548 const MCInstrDesc &Desc = MII.get(Opc); 3549 3550 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3551 return true; 3552 3553 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3554 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3555 3556 // GATHER4 instructions use dmask in a different fashion compared to 3557 // other MIMG instructions. The only useful DMASK values are 3558 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3559 // (red,red,red,red) etc.) The ISA document doesn't mention 3560 // this. 3561 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3562 } 3563 3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return true; 3570 3571 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3572 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3573 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3574 3575 if (!BaseOpcode->MSAA) 3576 return true; 3577 3578 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3579 assert(DimIdx != -1); 3580 3581 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3582 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3583 3584 return DimInfo->MSAA; 3585 } 3586 3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3588 { 3589 switch (Opcode) { 3590 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3591 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3592 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3593 return true; 3594 default: 3595 return false; 3596 } 3597 } 3598 3599 // movrels* opcodes should only allow VGPRS as src0. 3600 // This is specified in .td description for vop1/vop3, 3601 // but sdwa is handled differently. See isSDWAOperand. 3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3603 const OperandVector &Operands) { 3604 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3609 return true; 3610 3611 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3612 assert(Src0Idx != -1); 3613 3614 SMLoc ErrLoc; 3615 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3616 if (Src0.isReg()) { 3617 auto Reg = mc2PseudoReg(Src0.getReg()); 3618 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3619 if (!isSGPR(Reg, TRI)) 3620 return true; 3621 ErrLoc = getRegLoc(Reg, Operands); 3622 } else { 3623 ErrLoc = getConstLoc(Operands); 3624 } 3625 3626 Error(ErrLoc, "source operand must be a VGPR"); 3627 return false; 3628 } 3629 3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3631 const OperandVector &Operands) { 3632 3633 const unsigned Opc = Inst.getOpcode(); 3634 3635 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3636 return true; 3637 3638 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3639 assert(Src0Idx != -1); 3640 3641 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3642 if (!Src0.isReg()) 3643 return true; 3644 3645 auto Reg = mc2PseudoReg(Src0.getReg()); 3646 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3647 if (isSGPR(Reg, TRI)) { 3648 Error(getRegLoc(Reg, Operands), 3649 "source operand must be either a VGPR or an inline constant"); 3650 return false; 3651 } 3652 3653 return true; 3654 } 3655 3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3657 const OperandVector &Operands) { 3658 const unsigned Opc = Inst.getOpcode(); 3659 const MCInstrDesc &Desc = MII.get(Opc); 3660 3661 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3662 return true; 3663 3664 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3665 if (Src2Idx == -1) 3666 return true; 3667 3668 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3669 if (!Src2.isReg()) 3670 return true; 3671 3672 MCRegister Src2Reg = Src2.getReg(); 3673 MCRegister DstReg = Inst.getOperand(0).getReg(); 3674 if (Src2Reg == DstReg) 3675 return true; 3676 3677 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3678 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3679 return true; 3680 3681 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3682 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3683 "source 2 operand must not partially overlap with dst"); 3684 return false; 3685 } 3686 3687 return true; 3688 } 3689 3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3691 switch (Inst.getOpcode()) { 3692 default: 3693 return true; 3694 case V_DIV_SCALE_F32_gfx6_gfx7: 3695 case V_DIV_SCALE_F32_vi: 3696 case V_DIV_SCALE_F32_gfx10: 3697 case V_DIV_SCALE_F64_gfx6_gfx7: 3698 case V_DIV_SCALE_F64_vi: 3699 case V_DIV_SCALE_F64_gfx10: 3700 break; 3701 } 3702 3703 // TODO: Check that src0 = src1 or src2. 3704 3705 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3706 AMDGPU::OpName::src2_modifiers, 3707 AMDGPU::OpName::src2_modifiers}) { 3708 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3709 .getImm() & 3710 SISrcMods::ABS) { 3711 return false; 3712 } 3713 } 3714 3715 return true; 3716 } 3717 3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3719 3720 const unsigned Opc = Inst.getOpcode(); 3721 const MCInstrDesc &Desc = MII.get(Opc); 3722 3723 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3724 return true; 3725 3726 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3727 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3728 if (isCI() || isSI()) 3729 return false; 3730 } 3731 3732 return true; 3733 } 3734 3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3736 const unsigned Opc = Inst.getOpcode(); 3737 const MCInstrDesc &Desc = MII.get(Opc); 3738 3739 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3740 return true; 3741 3742 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3743 if (DimIdx < 0) 3744 return true; 3745 3746 long Imm = Inst.getOperand(DimIdx).getImm(); 3747 if (Imm < 0 || Imm >= 8) 3748 return false; 3749 3750 return true; 3751 } 3752 3753 static bool IsRevOpcode(const unsigned Opcode) 3754 { 3755 switch (Opcode) { 3756 case AMDGPU::V_SUBREV_F32_e32: 3757 case AMDGPU::V_SUBREV_F32_e64: 3758 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3759 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3760 case AMDGPU::V_SUBREV_F32_e32_vi: 3761 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3762 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3763 case AMDGPU::V_SUBREV_F32_e64_vi: 3764 3765 case AMDGPU::V_SUBREV_CO_U32_e32: 3766 case AMDGPU::V_SUBREV_CO_U32_e64: 3767 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3768 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3769 3770 case AMDGPU::V_SUBBREV_U32_e32: 3771 case AMDGPU::V_SUBBREV_U32_e64: 3772 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3773 case AMDGPU::V_SUBBREV_U32_e32_vi: 3774 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3775 case AMDGPU::V_SUBBREV_U32_e64_vi: 3776 3777 case AMDGPU::V_SUBREV_U32_e32: 3778 case AMDGPU::V_SUBREV_U32_e64: 3779 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3780 case AMDGPU::V_SUBREV_U32_e32_vi: 3781 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3782 case AMDGPU::V_SUBREV_U32_e64_vi: 3783 3784 case AMDGPU::V_SUBREV_F16_e32: 3785 case AMDGPU::V_SUBREV_F16_e64: 3786 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3787 case AMDGPU::V_SUBREV_F16_e32_vi: 3788 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3789 case AMDGPU::V_SUBREV_F16_e64_vi: 3790 3791 case AMDGPU::V_SUBREV_U16_e32: 3792 case AMDGPU::V_SUBREV_U16_e64: 3793 case AMDGPU::V_SUBREV_U16_e32_vi: 3794 case AMDGPU::V_SUBREV_U16_e64_vi: 3795 3796 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3797 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3798 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3799 3800 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3801 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3802 3803 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3804 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3805 3806 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3807 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3808 3809 case AMDGPU::V_LSHRREV_B32_e32: 3810 case AMDGPU::V_LSHRREV_B32_e64: 3811 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3812 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3813 case AMDGPU::V_LSHRREV_B32_e32_vi: 3814 case AMDGPU::V_LSHRREV_B32_e64_vi: 3815 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3816 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3817 3818 case AMDGPU::V_ASHRREV_I32_e32: 3819 case AMDGPU::V_ASHRREV_I32_e64: 3820 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3821 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3822 case AMDGPU::V_ASHRREV_I32_e32_vi: 3823 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3824 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3825 case AMDGPU::V_ASHRREV_I32_e64_vi: 3826 3827 case AMDGPU::V_LSHLREV_B32_e32: 3828 case AMDGPU::V_LSHLREV_B32_e64: 3829 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3830 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3831 case AMDGPU::V_LSHLREV_B32_e32_vi: 3832 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3833 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3834 case AMDGPU::V_LSHLREV_B32_e64_vi: 3835 3836 case AMDGPU::V_LSHLREV_B16_e32: 3837 case AMDGPU::V_LSHLREV_B16_e64: 3838 case AMDGPU::V_LSHLREV_B16_e32_vi: 3839 case AMDGPU::V_LSHLREV_B16_e64_vi: 3840 case AMDGPU::V_LSHLREV_B16_gfx10: 3841 3842 case AMDGPU::V_LSHRREV_B16_e32: 3843 case AMDGPU::V_LSHRREV_B16_e64: 3844 case AMDGPU::V_LSHRREV_B16_e32_vi: 3845 case AMDGPU::V_LSHRREV_B16_e64_vi: 3846 case AMDGPU::V_LSHRREV_B16_gfx10: 3847 3848 case AMDGPU::V_ASHRREV_I16_e32: 3849 case AMDGPU::V_ASHRREV_I16_e64: 3850 case AMDGPU::V_ASHRREV_I16_e32_vi: 3851 case AMDGPU::V_ASHRREV_I16_e64_vi: 3852 case AMDGPU::V_ASHRREV_I16_gfx10: 3853 3854 case AMDGPU::V_LSHLREV_B64_e64: 3855 case AMDGPU::V_LSHLREV_B64_gfx10: 3856 case AMDGPU::V_LSHLREV_B64_vi: 3857 3858 case AMDGPU::V_LSHRREV_B64_e64: 3859 case AMDGPU::V_LSHRREV_B64_gfx10: 3860 case AMDGPU::V_LSHRREV_B64_vi: 3861 3862 case AMDGPU::V_ASHRREV_I64_e64: 3863 case AMDGPU::V_ASHRREV_I64_gfx10: 3864 case AMDGPU::V_ASHRREV_I64_vi: 3865 3866 case AMDGPU::V_PK_LSHLREV_B16: 3867 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3868 case AMDGPU::V_PK_LSHLREV_B16_vi: 3869 3870 case AMDGPU::V_PK_LSHRREV_B16: 3871 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3872 case AMDGPU::V_PK_LSHRREV_B16_vi: 3873 case AMDGPU::V_PK_ASHRREV_I16: 3874 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3875 case AMDGPU::V_PK_ASHRREV_I16_vi: 3876 return true; 3877 default: 3878 return false; 3879 } 3880 } 3881 3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3883 3884 using namespace SIInstrFlags; 3885 const unsigned Opcode = Inst.getOpcode(); 3886 const MCInstrDesc &Desc = MII.get(Opcode); 3887 3888 // lds_direct register is defined so that it can be used 3889 // with 9-bit operands only. Ignore encodings which do not accept these. 3890 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3891 if ((Desc.TSFlags & Enc) == 0) 3892 return None; 3893 3894 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3895 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3896 if (SrcIdx == -1) 3897 break; 3898 const auto &Src = Inst.getOperand(SrcIdx); 3899 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3900 3901 if (isGFX90A()) 3902 return StringRef("lds_direct is not supported on this GPU"); 3903 3904 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3905 return StringRef("lds_direct cannot be used with this instruction"); 3906 3907 if (SrcName != OpName::src0) 3908 return StringRef("lds_direct may be used as src0 only"); 3909 } 3910 } 3911 3912 return None; 3913 } 3914 3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3916 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3917 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3918 if (Op.isFlatOffset()) 3919 return Op.getStartLoc(); 3920 } 3921 return getLoc(); 3922 } 3923 3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3925 const OperandVector &Operands) { 3926 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3927 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3928 return true; 3929 3930 auto Opcode = Inst.getOpcode(); 3931 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3932 assert(OpNum != -1); 3933 3934 const auto &Op = Inst.getOperand(OpNum); 3935 if (!hasFlatOffsets() && Op.getImm() != 0) { 3936 Error(getFlatOffsetLoc(Operands), 3937 "flat offset modifier is not supported on this GPU"); 3938 return false; 3939 } 3940 3941 // For FLAT segment the offset must be positive; 3942 // MSB is ignored and forced to zero. 3943 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3944 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3945 if (!isIntN(OffsetSize, Op.getImm())) { 3946 Error(getFlatOffsetLoc(Operands), 3947 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3948 return false; 3949 } 3950 } else { 3951 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3952 if (!isUIntN(OffsetSize, Op.getImm())) { 3953 Error(getFlatOffsetLoc(Operands), 3954 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3955 return false; 3956 } 3957 } 3958 3959 return true; 3960 } 3961 3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3963 // Start with second operand because SMEM Offset cannot be dst or src0. 3964 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3965 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3966 if (Op.isSMEMOffset()) 3967 return Op.getStartLoc(); 3968 } 3969 return getLoc(); 3970 } 3971 3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3973 const OperandVector &Operands) { 3974 if (isCI() || isSI()) 3975 return true; 3976 3977 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3978 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3979 return true; 3980 3981 auto Opcode = Inst.getOpcode(); 3982 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3983 if (OpNum == -1) 3984 return true; 3985 3986 const auto &Op = Inst.getOperand(OpNum); 3987 if (!Op.isImm()) 3988 return true; 3989 3990 uint64_t Offset = Op.getImm(); 3991 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3992 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3993 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3994 return true; 3995 3996 Error(getSMEMOffsetLoc(Operands), 3997 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3998 "expected a 21-bit signed offset"); 3999 4000 return false; 4001 } 4002 4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4004 unsigned Opcode = Inst.getOpcode(); 4005 const MCInstrDesc &Desc = MII.get(Opcode); 4006 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4007 return true; 4008 4009 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4010 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4011 4012 const int OpIndices[] = { Src0Idx, Src1Idx }; 4013 4014 unsigned NumExprs = 0; 4015 unsigned NumLiterals = 0; 4016 uint32_t LiteralValue; 4017 4018 for (int OpIdx : OpIndices) { 4019 if (OpIdx == -1) break; 4020 4021 const MCOperand &MO = Inst.getOperand(OpIdx); 4022 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4023 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4024 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4025 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4026 if (NumLiterals == 0 || LiteralValue != Value) { 4027 LiteralValue = Value; 4028 ++NumLiterals; 4029 } 4030 } else if (MO.isExpr()) { 4031 ++NumExprs; 4032 } 4033 } 4034 } 4035 4036 return NumLiterals + NumExprs <= 1; 4037 } 4038 4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4040 const unsigned Opc = Inst.getOpcode(); 4041 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4042 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4043 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4044 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4045 4046 if (OpSel & ~3) 4047 return false; 4048 } 4049 4050 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4051 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4052 if (OpSelIdx != -1) { 4053 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4054 return false; 4055 } 4056 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4057 if (OpSelHiIdx != -1) { 4058 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4059 return false; 4060 } 4061 } 4062 4063 return true; 4064 } 4065 4066 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4067 const OperandVector &Operands) { 4068 const unsigned Opc = Inst.getOpcode(); 4069 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4070 if (DppCtrlIdx < 0) 4071 return true; 4072 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4073 4074 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4075 // DPP64 is supported for row_newbcast only. 4076 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4077 if (Src0Idx >= 0 && 4078 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4079 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4080 Error(S, "64 bit dpp only supports row_newbcast"); 4081 return false; 4082 } 4083 } 4084 4085 return true; 4086 } 4087 4088 // Check if VCC register matches wavefront size 4089 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4090 auto FB = getFeatureBits(); 4091 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4092 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4093 } 4094 4095 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4096 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4097 const OperandVector &Operands) { 4098 unsigned Opcode = Inst.getOpcode(); 4099 const MCInstrDesc &Desc = MII.get(Opcode); 4100 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4101 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4102 ImmIdx == -1) 4103 return true; 4104 4105 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4106 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4107 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4108 4109 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4110 4111 unsigned NumExprs = 0; 4112 unsigned NumLiterals = 0; 4113 uint32_t LiteralValue; 4114 4115 for (int OpIdx : OpIndices) { 4116 if (OpIdx == -1) 4117 continue; 4118 4119 const MCOperand &MO = Inst.getOperand(OpIdx); 4120 if (!MO.isImm() && !MO.isExpr()) 4121 continue; 4122 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4123 continue; 4124 4125 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4126 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4127 Error(getConstLoc(Operands), 4128 "inline constants are not allowed for this operand"); 4129 return false; 4130 } 4131 4132 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4133 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4134 if (NumLiterals == 0 || LiteralValue != Value) { 4135 LiteralValue = Value; 4136 ++NumLiterals; 4137 } 4138 } else if (MO.isExpr()) { 4139 ++NumExprs; 4140 } 4141 } 4142 NumLiterals += NumExprs; 4143 4144 if (!NumLiterals) 4145 return true; 4146 4147 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4148 Error(getLitLoc(Operands), "literal operands are not supported"); 4149 return false; 4150 } 4151 4152 if (NumLiterals > 1) { 4153 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4154 return false; 4155 } 4156 4157 return true; 4158 } 4159 4160 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4161 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4162 const MCRegisterInfo *MRI) { 4163 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4164 if (OpIdx < 0) 4165 return -1; 4166 4167 const MCOperand &Op = Inst.getOperand(OpIdx); 4168 if (!Op.isReg()) 4169 return -1; 4170 4171 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4172 auto Reg = Sub ? Sub : Op.getReg(); 4173 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4174 return AGPR32.contains(Reg) ? 1 : 0; 4175 } 4176 4177 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4178 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4179 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4180 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4181 SIInstrFlags::DS)) == 0) 4182 return true; 4183 4184 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4185 : AMDGPU::OpName::vdata; 4186 4187 const MCRegisterInfo *MRI = getMRI(); 4188 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4189 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4190 4191 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4192 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4193 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4194 return false; 4195 } 4196 4197 auto FB = getFeatureBits(); 4198 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4199 if (DataAreg < 0 || DstAreg < 0) 4200 return true; 4201 return DstAreg == DataAreg; 4202 } 4203 4204 return DstAreg < 1 && DataAreg < 1; 4205 } 4206 4207 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4208 auto FB = getFeatureBits(); 4209 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4210 return true; 4211 4212 const MCRegisterInfo *MRI = getMRI(); 4213 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4214 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4215 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4216 const MCOperand &Op = Inst.getOperand(I); 4217 if (!Op.isReg()) 4218 continue; 4219 4220 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4221 if (!Sub) 4222 continue; 4223 4224 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4225 return false; 4226 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4227 return false; 4228 } 4229 4230 return true; 4231 } 4232 4233 // gfx90a has an undocumented limitation: 4234 // DS_GWS opcodes must use even aligned registers. 4235 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4236 const OperandVector &Operands) { 4237 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4238 return true; 4239 4240 int Opc = Inst.getOpcode(); 4241 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4242 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4243 return true; 4244 4245 const MCRegisterInfo *MRI = getMRI(); 4246 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4247 int Data0Pos = 4248 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4249 assert(Data0Pos != -1); 4250 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4251 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4252 if (RegIdx & 1) { 4253 SMLoc RegLoc = getRegLoc(Reg, Operands); 4254 Error(RegLoc, "vgpr must be even aligned"); 4255 return false; 4256 } 4257 4258 return true; 4259 } 4260 4261 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4262 const OperandVector &Operands, 4263 const SMLoc &IDLoc) { 4264 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4265 AMDGPU::OpName::cpol); 4266 if (CPolPos == -1) 4267 return true; 4268 4269 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4270 4271 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4272 if ((TSFlags & (SIInstrFlags::SMRD)) && 4273 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4274 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4275 return false; 4276 } 4277 4278 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4279 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4280 StringRef CStr(S.getPointer()); 4281 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4282 Error(S, "scc is not supported on this GPU"); 4283 return false; 4284 } 4285 4286 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4287 return true; 4288 4289 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4290 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4291 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4292 : "instruction must use glc"); 4293 return false; 4294 } 4295 } else { 4296 if (CPol & CPol::GLC) { 4297 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4298 StringRef CStr(S.getPointer()); 4299 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4300 Error(S, isGFX940() ? "instruction must not use sc0" 4301 : "instruction must not use glc"); 4302 return false; 4303 } 4304 } 4305 4306 return true; 4307 } 4308 4309 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4310 const SMLoc &IDLoc, 4311 const OperandVector &Operands) { 4312 if (auto ErrMsg = validateLdsDirect(Inst)) { 4313 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4314 return false; 4315 } 4316 if (!validateSOPLiteral(Inst)) { 4317 Error(getLitLoc(Operands), 4318 "only one literal operand is allowed"); 4319 return false; 4320 } 4321 if (!validateVOPLiteral(Inst, Operands)) { 4322 return false; 4323 } 4324 if (!validateConstantBusLimitations(Inst, Operands)) { 4325 return false; 4326 } 4327 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4328 return false; 4329 } 4330 if (!validateIntClampSupported(Inst)) { 4331 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4332 "integer clamping is not supported on this GPU"); 4333 return false; 4334 } 4335 if (!validateOpSel(Inst)) { 4336 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4337 "invalid op_sel operand"); 4338 return false; 4339 } 4340 if (!validateDPP(Inst, Operands)) { 4341 return false; 4342 } 4343 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4344 if (!validateMIMGD16(Inst)) { 4345 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4346 "d16 modifier is not supported on this GPU"); 4347 return false; 4348 } 4349 if (!validateMIMGDim(Inst)) { 4350 Error(IDLoc, "dim modifier is required on this GPU"); 4351 return false; 4352 } 4353 if (!validateMIMGMSAA(Inst)) { 4354 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4355 "invalid dim; must be MSAA type"); 4356 return false; 4357 } 4358 if (!validateMIMGDataSize(Inst)) { 4359 Error(IDLoc, 4360 "image data size does not match dmask and tfe"); 4361 return false; 4362 } 4363 if (!validateMIMGAddrSize(Inst)) { 4364 Error(IDLoc, 4365 "image address size does not match dim and a16"); 4366 return false; 4367 } 4368 if (!validateMIMGAtomicDMask(Inst)) { 4369 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4370 "invalid atomic image dmask"); 4371 return false; 4372 } 4373 if (!validateMIMGGatherDMask(Inst)) { 4374 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4375 "invalid image_gather dmask: only one bit must be set"); 4376 return false; 4377 } 4378 if (!validateMovrels(Inst, Operands)) { 4379 return false; 4380 } 4381 if (!validateFlatOffset(Inst, Operands)) { 4382 return false; 4383 } 4384 if (!validateSMEMOffset(Inst, Operands)) { 4385 return false; 4386 } 4387 if (!validateMAIAccWrite(Inst, Operands)) { 4388 return false; 4389 } 4390 if (!validateMFMA(Inst, Operands)) { 4391 return false; 4392 } 4393 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4394 return false; 4395 } 4396 4397 if (!validateAGPRLdSt(Inst)) { 4398 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4399 ? "invalid register class: data and dst should be all VGPR or AGPR" 4400 : "invalid register class: agpr loads and stores not supported on this GPU" 4401 ); 4402 return false; 4403 } 4404 if (!validateVGPRAlign(Inst)) { 4405 Error(IDLoc, 4406 "invalid register class: vgpr tuples must be 64 bit aligned"); 4407 return false; 4408 } 4409 if (!validateGWS(Inst, Operands)) { 4410 return false; 4411 } 4412 4413 if (!validateDivScale(Inst)) { 4414 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4415 return false; 4416 } 4417 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4418 return false; 4419 } 4420 4421 return true; 4422 } 4423 4424 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4425 const FeatureBitset &FBS, 4426 unsigned VariantID = 0); 4427 4428 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4429 const FeatureBitset &AvailableFeatures, 4430 unsigned VariantID); 4431 4432 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4433 const FeatureBitset &FBS) { 4434 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4435 } 4436 4437 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4438 const FeatureBitset &FBS, 4439 ArrayRef<unsigned> Variants) { 4440 for (auto Variant : Variants) { 4441 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4442 return true; 4443 } 4444 4445 return false; 4446 } 4447 4448 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4449 const SMLoc &IDLoc) { 4450 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4451 4452 // Check if requested instruction variant is supported. 4453 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4454 return false; 4455 4456 // This instruction is not supported. 4457 // Clear any other pending errors because they are no longer relevant. 4458 getParser().clearPendingErrors(); 4459 4460 // Requested instruction variant is not supported. 4461 // Check if any other variants are supported. 4462 StringRef VariantName = getMatchedVariantName(); 4463 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4464 return Error(IDLoc, 4465 Twine(VariantName, 4466 " variant of this instruction is not supported")); 4467 } 4468 4469 // Finally check if this instruction is supported on any other GPU. 4470 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4471 return Error(IDLoc, "instruction not supported on this GPU"); 4472 } 4473 4474 // Instruction not supported on any GPU. Probably a typo. 4475 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4476 return Error(IDLoc, "invalid instruction" + Suggestion); 4477 } 4478 4479 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4480 OperandVector &Operands, 4481 MCStreamer &Out, 4482 uint64_t &ErrorInfo, 4483 bool MatchingInlineAsm) { 4484 MCInst Inst; 4485 unsigned Result = Match_Success; 4486 for (auto Variant : getMatchedVariants()) { 4487 uint64_t EI; 4488 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4489 Variant); 4490 // We order match statuses from least to most specific. We use most specific 4491 // status as resulting 4492 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4493 if ((R == Match_Success) || 4494 (R == Match_PreferE32) || 4495 (R == Match_MissingFeature && Result != Match_PreferE32) || 4496 (R == Match_InvalidOperand && Result != Match_MissingFeature 4497 && Result != Match_PreferE32) || 4498 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4499 && Result != Match_MissingFeature 4500 && Result != Match_PreferE32)) { 4501 Result = R; 4502 ErrorInfo = EI; 4503 } 4504 if (R == Match_Success) 4505 break; 4506 } 4507 4508 if (Result == Match_Success) { 4509 if (!validateInstruction(Inst, IDLoc, Operands)) { 4510 return true; 4511 } 4512 Inst.setLoc(IDLoc); 4513 Out.emitInstruction(Inst, getSTI()); 4514 return false; 4515 } 4516 4517 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4518 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4519 return true; 4520 } 4521 4522 switch (Result) { 4523 default: break; 4524 case Match_MissingFeature: 4525 // It has been verified that the specified instruction 4526 // mnemonic is valid. A match was found but it requires 4527 // features which are not supported on this GPU. 4528 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4529 4530 case Match_InvalidOperand: { 4531 SMLoc ErrorLoc = IDLoc; 4532 if (ErrorInfo != ~0ULL) { 4533 if (ErrorInfo >= Operands.size()) { 4534 return Error(IDLoc, "too few operands for instruction"); 4535 } 4536 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4537 if (ErrorLoc == SMLoc()) 4538 ErrorLoc = IDLoc; 4539 } 4540 return Error(ErrorLoc, "invalid operand for instruction"); 4541 } 4542 4543 case Match_PreferE32: 4544 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4545 "should be encoded as e32"); 4546 case Match_MnemonicFail: 4547 llvm_unreachable("Invalid instructions should have been handled already"); 4548 } 4549 llvm_unreachable("Implement any new match types added!"); 4550 } 4551 4552 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4553 int64_t Tmp = -1; 4554 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4555 return true; 4556 } 4557 if (getParser().parseAbsoluteExpression(Tmp)) { 4558 return true; 4559 } 4560 Ret = static_cast<uint32_t>(Tmp); 4561 return false; 4562 } 4563 4564 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4565 uint32_t &Minor) { 4566 if (ParseAsAbsoluteExpression(Major)) 4567 return TokError("invalid major version"); 4568 4569 if (!trySkipToken(AsmToken::Comma)) 4570 return TokError("minor version number required, comma expected"); 4571 4572 if (ParseAsAbsoluteExpression(Minor)) 4573 return TokError("invalid minor version"); 4574 4575 return false; 4576 } 4577 4578 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4579 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4580 return TokError("directive only supported for amdgcn architecture"); 4581 4582 std::string TargetIDDirective; 4583 SMLoc TargetStart = getTok().getLoc(); 4584 if (getParser().parseEscapedString(TargetIDDirective)) 4585 return true; 4586 4587 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4588 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4589 return getParser().Error(TargetRange.Start, 4590 (Twine(".amdgcn_target directive's target id ") + 4591 Twine(TargetIDDirective) + 4592 Twine(" does not match the specified target id ") + 4593 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4594 4595 return false; 4596 } 4597 4598 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4599 return Error(Range.Start, "value out of range", Range); 4600 } 4601 4602 bool AMDGPUAsmParser::calculateGPRBlocks( 4603 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4604 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4605 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4606 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4607 // TODO(scott.linder): These calculations are duplicated from 4608 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4609 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4610 4611 unsigned NumVGPRs = NextFreeVGPR; 4612 unsigned NumSGPRs = NextFreeSGPR; 4613 4614 if (Version.Major >= 10) 4615 NumSGPRs = 0; 4616 else { 4617 unsigned MaxAddressableNumSGPRs = 4618 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4619 4620 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4621 NumSGPRs > MaxAddressableNumSGPRs) 4622 return OutOfRangeError(SGPRRange); 4623 4624 NumSGPRs += 4625 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4626 4627 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4628 NumSGPRs > MaxAddressableNumSGPRs) 4629 return OutOfRangeError(SGPRRange); 4630 4631 if (Features.test(FeatureSGPRInitBug)) 4632 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4633 } 4634 4635 VGPRBlocks = 4636 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4637 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4638 4639 return false; 4640 } 4641 4642 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4643 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4644 return TokError("directive only supported for amdgcn architecture"); 4645 4646 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4647 return TokError("directive only supported for amdhsa OS"); 4648 4649 StringRef KernelName; 4650 if (getParser().parseIdentifier(KernelName)) 4651 return true; 4652 4653 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4654 4655 StringSet<> Seen; 4656 4657 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4658 4659 SMRange VGPRRange; 4660 uint64_t NextFreeVGPR = 0; 4661 uint64_t AccumOffset = 0; 4662 uint64_t SharedVGPRCount = 0; 4663 SMRange SGPRRange; 4664 uint64_t NextFreeSGPR = 0; 4665 4666 // Count the number of user SGPRs implied from the enabled feature bits. 4667 unsigned ImpliedUserSGPRCount = 0; 4668 4669 // Track if the asm explicitly contains the directive for the user SGPR 4670 // count. 4671 Optional<unsigned> ExplicitUserSGPRCount; 4672 bool ReserveVCC = true; 4673 bool ReserveFlatScr = true; 4674 Optional<bool> EnableWavefrontSize32; 4675 4676 while (true) { 4677 while (trySkipToken(AsmToken::EndOfStatement)); 4678 4679 StringRef ID; 4680 SMRange IDRange = getTok().getLocRange(); 4681 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4682 return true; 4683 4684 if (ID == ".end_amdhsa_kernel") 4685 break; 4686 4687 if (Seen.find(ID) != Seen.end()) 4688 return TokError(".amdhsa_ directives cannot be repeated"); 4689 Seen.insert(ID); 4690 4691 SMLoc ValStart = getLoc(); 4692 int64_t IVal; 4693 if (getParser().parseAbsoluteExpression(IVal)) 4694 return true; 4695 SMLoc ValEnd = getLoc(); 4696 SMRange ValRange = SMRange(ValStart, ValEnd); 4697 4698 if (IVal < 0) 4699 return OutOfRangeError(ValRange); 4700 4701 uint64_t Val = IVal; 4702 4703 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4704 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4705 return OutOfRangeError(RANGE); \ 4706 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4707 4708 if (ID == ".amdhsa_group_segment_fixed_size") { 4709 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4710 return OutOfRangeError(ValRange); 4711 KD.group_segment_fixed_size = Val; 4712 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4713 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4714 return OutOfRangeError(ValRange); 4715 KD.private_segment_fixed_size = Val; 4716 } else if (ID == ".amdhsa_kernarg_size") { 4717 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4718 return OutOfRangeError(ValRange); 4719 KD.kernarg_size = Val; 4720 } else if (ID == ".amdhsa_user_sgpr_count") { 4721 ExplicitUserSGPRCount = Val; 4722 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4723 if (hasArchitectedFlatScratch()) 4724 return Error(IDRange.Start, 4725 "directive is not supported with architected flat scratch", 4726 IDRange); 4727 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4728 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4729 Val, ValRange); 4730 if (Val) 4731 ImpliedUserSGPRCount += 4; 4732 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4733 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4734 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4735 ValRange); 4736 if (Val) 4737 ImpliedUserSGPRCount += 2; 4738 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4739 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4740 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4741 ValRange); 4742 if (Val) 4743 ImpliedUserSGPRCount += 2; 4744 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4745 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4747 Val, ValRange); 4748 if (Val) 4749 ImpliedUserSGPRCount += 2; 4750 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4751 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4753 ValRange); 4754 if (Val) 4755 ImpliedUserSGPRCount += 2; 4756 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4757 if (hasArchitectedFlatScratch()) 4758 return Error(IDRange.Start, 4759 "directive is not supported with architected flat scratch", 4760 IDRange); 4761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4763 ValRange); 4764 if (Val) 4765 ImpliedUserSGPRCount += 2; 4766 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4767 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4768 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4769 Val, ValRange); 4770 if (Val) 4771 ImpliedUserSGPRCount += 1; 4772 } else if (ID == ".amdhsa_wavefront_size32") { 4773 if (IVersion.Major < 10) 4774 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4775 EnableWavefrontSize32 = Val; 4776 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4777 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4778 Val, ValRange); 4779 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4780 if (hasArchitectedFlatScratch()) 4781 return Error(IDRange.Start, 4782 "directive is not supported with architected flat scratch", 4783 IDRange); 4784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4785 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4786 } else if (ID == ".amdhsa_enable_private_segment") { 4787 if (!hasArchitectedFlatScratch()) 4788 return Error( 4789 IDRange.Start, 4790 "directive is not supported without architected flat scratch", 4791 IDRange); 4792 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4793 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4794 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4795 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4796 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4797 ValRange); 4798 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4800 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4801 ValRange); 4802 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4804 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4805 ValRange); 4806 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4808 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4809 ValRange); 4810 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4812 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4813 ValRange); 4814 } else if (ID == ".amdhsa_next_free_vgpr") { 4815 VGPRRange = ValRange; 4816 NextFreeVGPR = Val; 4817 } else if (ID == ".amdhsa_next_free_sgpr") { 4818 SGPRRange = ValRange; 4819 NextFreeSGPR = Val; 4820 } else if (ID == ".amdhsa_accum_offset") { 4821 if (!isGFX90A()) 4822 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4823 AccumOffset = Val; 4824 } else if (ID == ".amdhsa_reserve_vcc") { 4825 if (!isUInt<1>(Val)) 4826 return OutOfRangeError(ValRange); 4827 ReserveVCC = Val; 4828 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4829 if (IVersion.Major < 7) 4830 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4831 if (hasArchitectedFlatScratch()) 4832 return Error(IDRange.Start, 4833 "directive is not supported with architected flat scratch", 4834 IDRange); 4835 if (!isUInt<1>(Val)) 4836 return OutOfRangeError(ValRange); 4837 ReserveFlatScr = Val; 4838 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4839 if (IVersion.Major < 8) 4840 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4841 if (!isUInt<1>(Val)) 4842 return OutOfRangeError(ValRange); 4843 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4844 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4845 IDRange); 4846 } else if (ID == ".amdhsa_float_round_mode_32") { 4847 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4848 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4849 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4850 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4851 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4852 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4853 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4854 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4855 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4856 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4857 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4858 ValRange); 4859 } else if (ID == ".amdhsa_dx10_clamp") { 4860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4861 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4862 } else if (ID == ".amdhsa_ieee_mode") { 4863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4864 Val, ValRange); 4865 } else if (ID == ".amdhsa_fp16_overflow") { 4866 if (IVersion.Major < 9) 4867 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4868 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4869 ValRange); 4870 } else if (ID == ".amdhsa_tg_split") { 4871 if (!isGFX90A()) 4872 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4874 ValRange); 4875 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4876 if (IVersion.Major < 10) 4877 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4878 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4879 ValRange); 4880 } else if (ID == ".amdhsa_memory_ordered") { 4881 if (IVersion.Major < 10) 4882 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4883 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4884 ValRange); 4885 } else if (ID == ".amdhsa_forward_progress") { 4886 if (IVersion.Major < 10) 4887 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4888 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4889 ValRange); 4890 } else if (ID == ".amdhsa_shared_vgpr_count") { 4891 if (IVersion.Major < 10) 4892 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4893 SharedVGPRCount = Val; 4894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4895 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4896 ValRange); 4897 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4898 PARSE_BITS_ENTRY( 4899 KD.compute_pgm_rsrc2, 4900 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4901 ValRange); 4902 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4903 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4904 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4905 Val, ValRange); 4906 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4907 PARSE_BITS_ENTRY( 4908 KD.compute_pgm_rsrc2, 4909 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4910 ValRange); 4911 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4912 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4913 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4914 Val, ValRange); 4915 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4916 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4917 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4918 Val, ValRange); 4919 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4920 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4921 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4922 Val, ValRange); 4923 } else if (ID == ".amdhsa_exception_int_div_zero") { 4924 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4925 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4926 Val, ValRange); 4927 } else { 4928 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4929 } 4930 4931 #undef PARSE_BITS_ENTRY 4932 } 4933 4934 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4935 return TokError(".amdhsa_next_free_vgpr directive is required"); 4936 4937 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4938 return TokError(".amdhsa_next_free_sgpr directive is required"); 4939 4940 unsigned VGPRBlocks; 4941 unsigned SGPRBlocks; 4942 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4943 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4944 EnableWavefrontSize32, NextFreeVGPR, 4945 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4946 SGPRBlocks)) 4947 return true; 4948 4949 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4950 VGPRBlocks)) 4951 return OutOfRangeError(VGPRRange); 4952 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4953 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4954 4955 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4956 SGPRBlocks)) 4957 return OutOfRangeError(SGPRRange); 4958 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4959 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4960 SGPRBlocks); 4961 4962 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4963 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4964 "enabled user SGPRs"); 4965 4966 unsigned UserSGPRCount = 4967 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4968 4969 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4970 return TokError("too many user SGPRs enabled"); 4971 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4972 UserSGPRCount); 4973 4974 if (isGFX90A()) { 4975 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4976 return TokError(".amdhsa_accum_offset directive is required"); 4977 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4978 return TokError("accum_offset should be in range [4..256] in " 4979 "increments of 4"); 4980 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4981 return TokError("accum_offset exceeds total VGPR allocation"); 4982 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4983 (AccumOffset / 4 - 1)); 4984 } 4985 4986 if (IVersion.Major == 10) { 4987 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 4988 if (SharedVGPRCount && EnableWavefrontSize32) { 4989 return TokError("shared_vgpr_count directive not valid on " 4990 "wavefront size 32"); 4991 } 4992 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 4993 return TokError("shared_vgpr_count*2 + " 4994 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 4995 "exceed 63\n"); 4996 } 4997 } 4998 4999 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5000 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5001 ReserveFlatScr); 5002 return false; 5003 } 5004 5005 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5006 uint32_t Major; 5007 uint32_t Minor; 5008 5009 if (ParseDirectiveMajorMinor(Major, Minor)) 5010 return true; 5011 5012 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5013 return false; 5014 } 5015 5016 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5017 uint32_t Major; 5018 uint32_t Minor; 5019 uint32_t Stepping; 5020 StringRef VendorName; 5021 StringRef ArchName; 5022 5023 // If this directive has no arguments, then use the ISA version for the 5024 // targeted GPU. 5025 if (isToken(AsmToken::EndOfStatement)) { 5026 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5027 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5028 ISA.Stepping, 5029 "AMD", "AMDGPU"); 5030 return false; 5031 } 5032 5033 if (ParseDirectiveMajorMinor(Major, Minor)) 5034 return true; 5035 5036 if (!trySkipToken(AsmToken::Comma)) 5037 return TokError("stepping version number required, comma expected"); 5038 5039 if (ParseAsAbsoluteExpression(Stepping)) 5040 return TokError("invalid stepping version"); 5041 5042 if (!trySkipToken(AsmToken::Comma)) 5043 return TokError("vendor name required, comma expected"); 5044 5045 if (!parseString(VendorName, "invalid vendor name")) 5046 return true; 5047 5048 if (!trySkipToken(AsmToken::Comma)) 5049 return TokError("arch name required, comma expected"); 5050 5051 if (!parseString(ArchName, "invalid arch name")) 5052 return true; 5053 5054 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5055 VendorName, ArchName); 5056 return false; 5057 } 5058 5059 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5060 amd_kernel_code_t &Header) { 5061 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5062 // assembly for backwards compatibility. 5063 if (ID == "max_scratch_backing_memory_byte_size") { 5064 Parser.eatToEndOfStatement(); 5065 return false; 5066 } 5067 5068 SmallString<40> ErrStr; 5069 raw_svector_ostream Err(ErrStr); 5070 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5071 return TokError(Err.str()); 5072 } 5073 Lex(); 5074 5075 if (ID == "enable_wavefront_size32") { 5076 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5077 if (!isGFX10Plus()) 5078 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5079 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5080 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5081 } else { 5082 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5083 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5084 } 5085 } 5086 5087 if (ID == "wavefront_size") { 5088 if (Header.wavefront_size == 5) { 5089 if (!isGFX10Plus()) 5090 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5091 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5092 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5093 } else if (Header.wavefront_size == 6) { 5094 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5095 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5096 } 5097 } 5098 5099 if (ID == "enable_wgp_mode") { 5100 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5101 !isGFX10Plus()) 5102 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5103 } 5104 5105 if (ID == "enable_mem_ordered") { 5106 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5107 !isGFX10Plus()) 5108 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5109 } 5110 5111 if (ID == "enable_fwd_progress") { 5112 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5113 !isGFX10Plus()) 5114 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5115 } 5116 5117 return false; 5118 } 5119 5120 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5121 amd_kernel_code_t Header; 5122 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5123 5124 while (true) { 5125 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5126 // will set the current token to EndOfStatement. 5127 while(trySkipToken(AsmToken::EndOfStatement)); 5128 5129 StringRef ID; 5130 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5131 return true; 5132 5133 if (ID == ".end_amd_kernel_code_t") 5134 break; 5135 5136 if (ParseAMDKernelCodeTValue(ID, Header)) 5137 return true; 5138 } 5139 5140 getTargetStreamer().EmitAMDKernelCodeT(Header); 5141 5142 return false; 5143 } 5144 5145 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5146 StringRef KernelName; 5147 if (!parseId(KernelName, "expected symbol name")) 5148 return true; 5149 5150 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5151 ELF::STT_AMDGPU_HSA_KERNEL); 5152 5153 KernelScope.initialize(getContext()); 5154 return false; 5155 } 5156 5157 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5158 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5159 return Error(getLoc(), 5160 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5161 "architectures"); 5162 } 5163 5164 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5165 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5166 return Error(getParser().getTok().getLoc(), "target id must match options"); 5167 5168 getTargetStreamer().EmitISAVersion(); 5169 Lex(); 5170 5171 return false; 5172 } 5173 5174 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5175 const char *AssemblerDirectiveBegin; 5176 const char *AssemblerDirectiveEnd; 5177 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5178 isHsaAbiVersion3AndAbove(&getSTI()) 5179 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5180 HSAMD::V3::AssemblerDirectiveEnd) 5181 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5182 HSAMD::AssemblerDirectiveEnd); 5183 5184 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5185 return Error(getLoc(), 5186 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5187 "not available on non-amdhsa OSes")).str()); 5188 } 5189 5190 std::string HSAMetadataString; 5191 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5192 HSAMetadataString)) 5193 return true; 5194 5195 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5196 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5197 return Error(getLoc(), "invalid HSA metadata"); 5198 } else { 5199 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5200 return Error(getLoc(), "invalid HSA metadata"); 5201 } 5202 5203 return false; 5204 } 5205 5206 /// Common code to parse out a block of text (typically YAML) between start and 5207 /// end directives. 5208 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5209 const char *AssemblerDirectiveEnd, 5210 std::string &CollectString) { 5211 5212 raw_string_ostream CollectStream(CollectString); 5213 5214 getLexer().setSkipSpace(false); 5215 5216 bool FoundEnd = false; 5217 while (!isToken(AsmToken::Eof)) { 5218 while (isToken(AsmToken::Space)) { 5219 CollectStream << getTokenStr(); 5220 Lex(); 5221 } 5222 5223 if (trySkipId(AssemblerDirectiveEnd)) { 5224 FoundEnd = true; 5225 break; 5226 } 5227 5228 CollectStream << Parser.parseStringToEndOfStatement() 5229 << getContext().getAsmInfo()->getSeparatorString(); 5230 5231 Parser.eatToEndOfStatement(); 5232 } 5233 5234 getLexer().setSkipSpace(true); 5235 5236 if (isToken(AsmToken::Eof) && !FoundEnd) { 5237 return TokError(Twine("expected directive ") + 5238 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5239 } 5240 5241 CollectStream.flush(); 5242 return false; 5243 } 5244 5245 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5246 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5247 std::string String; 5248 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5249 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5250 return true; 5251 5252 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5253 if (!PALMetadata->setFromString(String)) 5254 return Error(getLoc(), "invalid PAL metadata"); 5255 return false; 5256 } 5257 5258 /// Parse the assembler directive for old linear-format PAL metadata. 5259 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5260 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5261 return Error(getLoc(), 5262 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5263 "not available on non-amdpal OSes")).str()); 5264 } 5265 5266 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5267 PALMetadata->setLegacy(); 5268 for (;;) { 5269 uint32_t Key, Value; 5270 if (ParseAsAbsoluteExpression(Key)) { 5271 return TokError(Twine("invalid value in ") + 5272 Twine(PALMD::AssemblerDirective)); 5273 } 5274 if (!trySkipToken(AsmToken::Comma)) { 5275 return TokError(Twine("expected an even number of values in ") + 5276 Twine(PALMD::AssemblerDirective)); 5277 } 5278 if (ParseAsAbsoluteExpression(Value)) { 5279 return TokError(Twine("invalid value in ") + 5280 Twine(PALMD::AssemblerDirective)); 5281 } 5282 PALMetadata->setRegister(Key, Value); 5283 if (!trySkipToken(AsmToken::Comma)) 5284 break; 5285 } 5286 return false; 5287 } 5288 5289 /// ParseDirectiveAMDGPULDS 5290 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5291 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5292 if (getParser().checkForValidSection()) 5293 return true; 5294 5295 StringRef Name; 5296 SMLoc NameLoc = getLoc(); 5297 if (getParser().parseIdentifier(Name)) 5298 return TokError("expected identifier in directive"); 5299 5300 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5301 if (parseToken(AsmToken::Comma, "expected ','")) 5302 return true; 5303 5304 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5305 5306 int64_t Size; 5307 SMLoc SizeLoc = getLoc(); 5308 if (getParser().parseAbsoluteExpression(Size)) 5309 return true; 5310 if (Size < 0) 5311 return Error(SizeLoc, "size must be non-negative"); 5312 if (Size > LocalMemorySize) 5313 return Error(SizeLoc, "size is too large"); 5314 5315 int64_t Alignment = 4; 5316 if (trySkipToken(AsmToken::Comma)) { 5317 SMLoc AlignLoc = getLoc(); 5318 if (getParser().parseAbsoluteExpression(Alignment)) 5319 return true; 5320 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5321 return Error(AlignLoc, "alignment must be a power of two"); 5322 5323 // Alignment larger than the size of LDS is possible in theory, as long 5324 // as the linker manages to place to symbol at address 0, but we do want 5325 // to make sure the alignment fits nicely into a 32-bit integer. 5326 if (Alignment >= 1u << 31) 5327 return Error(AlignLoc, "alignment is too large"); 5328 } 5329 5330 if (parseToken(AsmToken::EndOfStatement, 5331 "unexpected token in '.amdgpu_lds' directive")) 5332 return true; 5333 5334 Symbol->redefineIfPossible(); 5335 if (!Symbol->isUndefined()) 5336 return Error(NameLoc, "invalid symbol redefinition"); 5337 5338 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5339 return false; 5340 } 5341 5342 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5343 StringRef IDVal = DirectiveID.getString(); 5344 5345 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5346 if (IDVal == ".amdhsa_kernel") 5347 return ParseDirectiveAMDHSAKernel(); 5348 5349 // TODO: Restructure/combine with PAL metadata directive. 5350 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5351 return ParseDirectiveHSAMetadata(); 5352 } else { 5353 if (IDVal == ".hsa_code_object_version") 5354 return ParseDirectiveHSACodeObjectVersion(); 5355 5356 if (IDVal == ".hsa_code_object_isa") 5357 return ParseDirectiveHSACodeObjectISA(); 5358 5359 if (IDVal == ".amd_kernel_code_t") 5360 return ParseDirectiveAMDKernelCodeT(); 5361 5362 if (IDVal == ".amdgpu_hsa_kernel") 5363 return ParseDirectiveAMDGPUHsaKernel(); 5364 5365 if (IDVal == ".amd_amdgpu_isa") 5366 return ParseDirectiveISAVersion(); 5367 5368 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5369 return ParseDirectiveHSAMetadata(); 5370 } 5371 5372 if (IDVal == ".amdgcn_target") 5373 return ParseDirectiveAMDGCNTarget(); 5374 5375 if (IDVal == ".amdgpu_lds") 5376 return ParseDirectiveAMDGPULDS(); 5377 5378 if (IDVal == PALMD::AssemblerDirectiveBegin) 5379 return ParseDirectivePALMetadataBegin(); 5380 5381 if (IDVal == PALMD::AssemblerDirective) 5382 return ParseDirectivePALMetadata(); 5383 5384 return true; 5385 } 5386 5387 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5388 unsigned RegNo) { 5389 5390 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5391 return isGFX9Plus(); 5392 5393 // GFX10 has 2 more SGPRs 104 and 105. 5394 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5395 return hasSGPR104_SGPR105(); 5396 5397 switch (RegNo) { 5398 case AMDGPU::SRC_SHARED_BASE: 5399 case AMDGPU::SRC_SHARED_LIMIT: 5400 case AMDGPU::SRC_PRIVATE_BASE: 5401 case AMDGPU::SRC_PRIVATE_LIMIT: 5402 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5403 return isGFX9Plus(); 5404 case AMDGPU::TBA: 5405 case AMDGPU::TBA_LO: 5406 case AMDGPU::TBA_HI: 5407 case AMDGPU::TMA: 5408 case AMDGPU::TMA_LO: 5409 case AMDGPU::TMA_HI: 5410 return !isGFX9Plus(); 5411 case AMDGPU::XNACK_MASK: 5412 case AMDGPU::XNACK_MASK_LO: 5413 case AMDGPU::XNACK_MASK_HI: 5414 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5415 case AMDGPU::SGPR_NULL: 5416 return isGFX10Plus(); 5417 default: 5418 break; 5419 } 5420 5421 if (isCI()) 5422 return true; 5423 5424 if (isSI() || isGFX10Plus()) { 5425 // No flat_scr on SI. 5426 // On GFX10 flat scratch is not a valid register operand and can only be 5427 // accessed with s_setreg/s_getreg. 5428 switch (RegNo) { 5429 case AMDGPU::FLAT_SCR: 5430 case AMDGPU::FLAT_SCR_LO: 5431 case AMDGPU::FLAT_SCR_HI: 5432 return false; 5433 default: 5434 return true; 5435 } 5436 } 5437 5438 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5439 // SI/CI have. 5440 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5441 return hasSGPR102_SGPR103(); 5442 5443 return true; 5444 } 5445 5446 OperandMatchResultTy 5447 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5448 OperandMode Mode) { 5449 // Try to parse with a custom parser 5450 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5451 5452 // If we successfully parsed the operand or if there as an error parsing, 5453 // we are done. 5454 // 5455 // If we are parsing after we reach EndOfStatement then this means we 5456 // are appending default values to the Operands list. This is only done 5457 // by custom parser, so we shouldn't continue on to the generic parsing. 5458 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5459 isToken(AsmToken::EndOfStatement)) 5460 return ResTy; 5461 5462 SMLoc RBraceLoc; 5463 SMLoc LBraceLoc = getLoc(); 5464 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5465 unsigned Prefix = Operands.size(); 5466 5467 for (;;) { 5468 auto Loc = getLoc(); 5469 ResTy = parseReg(Operands); 5470 if (ResTy == MatchOperand_NoMatch) 5471 Error(Loc, "expected a register"); 5472 if (ResTy != MatchOperand_Success) 5473 return MatchOperand_ParseFail; 5474 5475 RBraceLoc = getLoc(); 5476 if (trySkipToken(AsmToken::RBrac)) 5477 break; 5478 5479 if (!skipToken(AsmToken::Comma, 5480 "expected a comma or a closing square bracket")) { 5481 return MatchOperand_ParseFail; 5482 } 5483 } 5484 5485 if (Operands.size() - Prefix > 1) { 5486 Operands.insert(Operands.begin() + Prefix, 5487 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5488 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5489 } 5490 5491 return MatchOperand_Success; 5492 } 5493 5494 return parseRegOrImm(Operands); 5495 } 5496 5497 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5498 // Clear any forced encodings from the previous instruction. 5499 setForcedEncodingSize(0); 5500 setForcedDPP(false); 5501 setForcedSDWA(false); 5502 5503 if (Name.endswith("_e64")) { 5504 setForcedEncodingSize(64); 5505 return Name.substr(0, Name.size() - 4); 5506 } else if (Name.endswith("_e32")) { 5507 setForcedEncodingSize(32); 5508 return Name.substr(0, Name.size() - 4); 5509 } else if (Name.endswith("_dpp")) { 5510 setForcedDPP(true); 5511 return Name.substr(0, Name.size() - 4); 5512 } else if (Name.endswith("_sdwa")) { 5513 setForcedSDWA(true); 5514 return Name.substr(0, Name.size() - 5); 5515 } 5516 return Name; 5517 } 5518 5519 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5520 StringRef Name, 5521 SMLoc NameLoc, OperandVector &Operands) { 5522 // Add the instruction mnemonic 5523 Name = parseMnemonicSuffix(Name); 5524 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5525 5526 bool IsMIMG = Name.startswith("image_"); 5527 5528 while (!trySkipToken(AsmToken::EndOfStatement)) { 5529 OperandMode Mode = OperandMode_Default; 5530 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5531 Mode = OperandMode_NSA; 5532 CPolSeen = 0; 5533 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5534 5535 if (Res != MatchOperand_Success) { 5536 checkUnsupportedInstruction(Name, NameLoc); 5537 if (!Parser.hasPendingError()) { 5538 // FIXME: use real operand location rather than the current location. 5539 StringRef Msg = 5540 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5541 "not a valid operand."; 5542 Error(getLoc(), Msg); 5543 } 5544 while (!trySkipToken(AsmToken::EndOfStatement)) { 5545 lex(); 5546 } 5547 return true; 5548 } 5549 5550 // Eat the comma or space if there is one. 5551 trySkipToken(AsmToken::Comma); 5552 } 5553 5554 return false; 5555 } 5556 5557 //===----------------------------------------------------------------------===// 5558 // Utility functions 5559 //===----------------------------------------------------------------------===// 5560 5561 OperandMatchResultTy 5562 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5563 5564 if (!trySkipId(Prefix, AsmToken::Colon)) 5565 return MatchOperand_NoMatch; 5566 5567 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5568 } 5569 5570 OperandMatchResultTy 5571 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5572 AMDGPUOperand::ImmTy ImmTy, 5573 bool (*ConvertResult)(int64_t&)) { 5574 SMLoc S = getLoc(); 5575 int64_t Value = 0; 5576 5577 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5578 if (Res != MatchOperand_Success) 5579 return Res; 5580 5581 if (ConvertResult && !ConvertResult(Value)) { 5582 Error(S, "invalid " + StringRef(Prefix) + " value."); 5583 } 5584 5585 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5586 return MatchOperand_Success; 5587 } 5588 5589 OperandMatchResultTy 5590 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5591 OperandVector &Operands, 5592 AMDGPUOperand::ImmTy ImmTy, 5593 bool (*ConvertResult)(int64_t&)) { 5594 SMLoc S = getLoc(); 5595 if (!trySkipId(Prefix, AsmToken::Colon)) 5596 return MatchOperand_NoMatch; 5597 5598 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5599 return MatchOperand_ParseFail; 5600 5601 unsigned Val = 0; 5602 const unsigned MaxSize = 4; 5603 5604 // FIXME: How to verify the number of elements matches the number of src 5605 // operands? 5606 for (int I = 0; ; ++I) { 5607 int64_t Op; 5608 SMLoc Loc = getLoc(); 5609 if (!parseExpr(Op)) 5610 return MatchOperand_ParseFail; 5611 5612 if (Op != 0 && Op != 1) { 5613 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5614 return MatchOperand_ParseFail; 5615 } 5616 5617 Val |= (Op << I); 5618 5619 if (trySkipToken(AsmToken::RBrac)) 5620 break; 5621 5622 if (I + 1 == MaxSize) { 5623 Error(getLoc(), "expected a closing square bracket"); 5624 return MatchOperand_ParseFail; 5625 } 5626 5627 if (!skipToken(AsmToken::Comma, "expected a comma")) 5628 return MatchOperand_ParseFail; 5629 } 5630 5631 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5632 return MatchOperand_Success; 5633 } 5634 5635 OperandMatchResultTy 5636 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5637 AMDGPUOperand::ImmTy ImmTy) { 5638 int64_t Bit; 5639 SMLoc S = getLoc(); 5640 5641 if (trySkipId(Name)) { 5642 Bit = 1; 5643 } else if (trySkipId("no", Name)) { 5644 Bit = 0; 5645 } else { 5646 return MatchOperand_NoMatch; 5647 } 5648 5649 if (Name == "r128" && !hasMIMG_R128()) { 5650 Error(S, "r128 modifier is not supported on this GPU"); 5651 return MatchOperand_ParseFail; 5652 } 5653 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5654 Error(S, "a16 modifier is not supported on this GPU"); 5655 return MatchOperand_ParseFail; 5656 } 5657 5658 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5659 ImmTy = AMDGPUOperand::ImmTyR128A16; 5660 5661 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5662 return MatchOperand_Success; 5663 } 5664 5665 OperandMatchResultTy 5666 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5667 unsigned CPolOn = 0; 5668 unsigned CPolOff = 0; 5669 SMLoc S = getLoc(); 5670 5671 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5672 if (isGFX940() && !Mnemo.startswith("s_")) { 5673 if (trySkipId("sc0")) 5674 CPolOn = AMDGPU::CPol::SC0; 5675 else if (trySkipId("nosc0")) 5676 CPolOff = AMDGPU::CPol::SC0; 5677 else if (trySkipId("nt")) 5678 CPolOn = AMDGPU::CPol::NT; 5679 else if (trySkipId("nont")) 5680 CPolOff = AMDGPU::CPol::NT; 5681 else if (trySkipId("sc1")) 5682 CPolOn = AMDGPU::CPol::SC1; 5683 else if (trySkipId("nosc1")) 5684 CPolOff = AMDGPU::CPol::SC1; 5685 else 5686 return MatchOperand_NoMatch; 5687 } 5688 else if (trySkipId("glc")) 5689 CPolOn = AMDGPU::CPol::GLC; 5690 else if (trySkipId("noglc")) 5691 CPolOff = AMDGPU::CPol::GLC; 5692 else if (trySkipId("slc")) 5693 CPolOn = AMDGPU::CPol::SLC; 5694 else if (trySkipId("noslc")) 5695 CPolOff = AMDGPU::CPol::SLC; 5696 else if (trySkipId("dlc")) 5697 CPolOn = AMDGPU::CPol::DLC; 5698 else if (trySkipId("nodlc")) 5699 CPolOff = AMDGPU::CPol::DLC; 5700 else if (trySkipId("scc")) 5701 CPolOn = AMDGPU::CPol::SCC; 5702 else if (trySkipId("noscc")) 5703 CPolOff = AMDGPU::CPol::SCC; 5704 else 5705 return MatchOperand_NoMatch; 5706 5707 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5708 Error(S, "dlc modifier is not supported on this GPU"); 5709 return MatchOperand_ParseFail; 5710 } 5711 5712 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5713 Error(S, "scc modifier is not supported on this GPU"); 5714 return MatchOperand_ParseFail; 5715 } 5716 5717 if (CPolSeen & (CPolOn | CPolOff)) { 5718 Error(S, "duplicate cache policy modifier"); 5719 return MatchOperand_ParseFail; 5720 } 5721 5722 CPolSeen |= (CPolOn | CPolOff); 5723 5724 for (unsigned I = 1; I != Operands.size(); ++I) { 5725 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5726 if (Op.isCPol()) { 5727 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5728 return MatchOperand_Success; 5729 } 5730 } 5731 5732 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5733 AMDGPUOperand::ImmTyCPol)); 5734 5735 return MatchOperand_Success; 5736 } 5737 5738 static void addOptionalImmOperand( 5739 MCInst& Inst, const OperandVector& Operands, 5740 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5741 AMDGPUOperand::ImmTy ImmT, 5742 int64_t Default = 0) { 5743 auto i = OptionalIdx.find(ImmT); 5744 if (i != OptionalIdx.end()) { 5745 unsigned Idx = i->second; 5746 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5747 } else { 5748 Inst.addOperand(MCOperand::createImm(Default)); 5749 } 5750 } 5751 5752 OperandMatchResultTy 5753 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5754 StringRef &Value, 5755 SMLoc &StringLoc) { 5756 if (!trySkipId(Prefix, AsmToken::Colon)) 5757 return MatchOperand_NoMatch; 5758 5759 StringLoc = getLoc(); 5760 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5761 : MatchOperand_ParseFail; 5762 } 5763 5764 //===----------------------------------------------------------------------===// 5765 // MTBUF format 5766 //===----------------------------------------------------------------------===// 5767 5768 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5769 int64_t MaxVal, 5770 int64_t &Fmt) { 5771 int64_t Val; 5772 SMLoc Loc = getLoc(); 5773 5774 auto Res = parseIntWithPrefix(Pref, Val); 5775 if (Res == MatchOperand_ParseFail) 5776 return false; 5777 if (Res == MatchOperand_NoMatch) 5778 return true; 5779 5780 if (Val < 0 || Val > MaxVal) { 5781 Error(Loc, Twine("out of range ", StringRef(Pref))); 5782 return false; 5783 } 5784 5785 Fmt = Val; 5786 return true; 5787 } 5788 5789 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5790 // values to live in a joint format operand in the MCInst encoding. 5791 OperandMatchResultTy 5792 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5793 using namespace llvm::AMDGPU::MTBUFFormat; 5794 5795 int64_t Dfmt = DFMT_UNDEF; 5796 int64_t Nfmt = NFMT_UNDEF; 5797 5798 // dfmt and nfmt can appear in either order, and each is optional. 5799 for (int I = 0; I < 2; ++I) { 5800 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5801 return MatchOperand_ParseFail; 5802 5803 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5804 return MatchOperand_ParseFail; 5805 } 5806 // Skip optional comma between dfmt/nfmt 5807 // but guard against 2 commas following each other. 5808 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5809 !peekToken().is(AsmToken::Comma)) { 5810 trySkipToken(AsmToken::Comma); 5811 } 5812 } 5813 5814 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5815 return MatchOperand_NoMatch; 5816 5817 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5818 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5819 5820 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5821 return MatchOperand_Success; 5822 } 5823 5824 OperandMatchResultTy 5825 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5826 using namespace llvm::AMDGPU::MTBUFFormat; 5827 5828 int64_t Fmt = UFMT_UNDEF; 5829 5830 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5831 return MatchOperand_ParseFail; 5832 5833 if (Fmt == UFMT_UNDEF) 5834 return MatchOperand_NoMatch; 5835 5836 Format = Fmt; 5837 return MatchOperand_Success; 5838 } 5839 5840 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5841 int64_t &Nfmt, 5842 StringRef FormatStr, 5843 SMLoc Loc) { 5844 using namespace llvm::AMDGPU::MTBUFFormat; 5845 int64_t Format; 5846 5847 Format = getDfmt(FormatStr); 5848 if (Format != DFMT_UNDEF) { 5849 Dfmt = Format; 5850 return true; 5851 } 5852 5853 Format = getNfmt(FormatStr, getSTI()); 5854 if (Format != NFMT_UNDEF) { 5855 Nfmt = Format; 5856 return true; 5857 } 5858 5859 Error(Loc, "unsupported format"); 5860 return false; 5861 } 5862 5863 OperandMatchResultTy 5864 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5865 SMLoc FormatLoc, 5866 int64_t &Format) { 5867 using namespace llvm::AMDGPU::MTBUFFormat; 5868 5869 int64_t Dfmt = DFMT_UNDEF; 5870 int64_t Nfmt = NFMT_UNDEF; 5871 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5872 return MatchOperand_ParseFail; 5873 5874 if (trySkipToken(AsmToken::Comma)) { 5875 StringRef Str; 5876 SMLoc Loc = getLoc(); 5877 if (!parseId(Str, "expected a format string") || 5878 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5879 return MatchOperand_ParseFail; 5880 } 5881 if (Dfmt == DFMT_UNDEF) { 5882 Error(Loc, "duplicate numeric format"); 5883 return MatchOperand_ParseFail; 5884 } else if (Nfmt == NFMT_UNDEF) { 5885 Error(Loc, "duplicate data format"); 5886 return MatchOperand_ParseFail; 5887 } 5888 } 5889 5890 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5891 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5892 5893 if (isGFX10Plus()) { 5894 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5895 if (Ufmt == UFMT_UNDEF) { 5896 Error(FormatLoc, "unsupported format"); 5897 return MatchOperand_ParseFail; 5898 } 5899 Format = Ufmt; 5900 } else { 5901 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5902 } 5903 5904 return MatchOperand_Success; 5905 } 5906 5907 OperandMatchResultTy 5908 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5909 SMLoc Loc, 5910 int64_t &Format) { 5911 using namespace llvm::AMDGPU::MTBUFFormat; 5912 5913 auto Id = getUnifiedFormat(FormatStr); 5914 if (Id == UFMT_UNDEF) 5915 return MatchOperand_NoMatch; 5916 5917 if (!isGFX10Plus()) { 5918 Error(Loc, "unified format is not supported on this GPU"); 5919 return MatchOperand_ParseFail; 5920 } 5921 5922 Format = Id; 5923 return MatchOperand_Success; 5924 } 5925 5926 OperandMatchResultTy 5927 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5928 using namespace llvm::AMDGPU::MTBUFFormat; 5929 SMLoc Loc = getLoc(); 5930 5931 if (!parseExpr(Format)) 5932 return MatchOperand_ParseFail; 5933 if (!isValidFormatEncoding(Format, getSTI())) { 5934 Error(Loc, "out of range format"); 5935 return MatchOperand_ParseFail; 5936 } 5937 5938 return MatchOperand_Success; 5939 } 5940 5941 OperandMatchResultTy 5942 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5943 using namespace llvm::AMDGPU::MTBUFFormat; 5944 5945 if (!trySkipId("format", AsmToken::Colon)) 5946 return MatchOperand_NoMatch; 5947 5948 if (trySkipToken(AsmToken::LBrac)) { 5949 StringRef FormatStr; 5950 SMLoc Loc = getLoc(); 5951 if (!parseId(FormatStr, "expected a format string")) 5952 return MatchOperand_ParseFail; 5953 5954 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5955 if (Res == MatchOperand_NoMatch) 5956 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5957 if (Res != MatchOperand_Success) 5958 return Res; 5959 5960 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5961 return MatchOperand_ParseFail; 5962 5963 return MatchOperand_Success; 5964 } 5965 5966 return parseNumericFormat(Format); 5967 } 5968 5969 OperandMatchResultTy 5970 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5971 using namespace llvm::AMDGPU::MTBUFFormat; 5972 5973 int64_t Format = getDefaultFormatEncoding(getSTI()); 5974 OperandMatchResultTy Res; 5975 SMLoc Loc = getLoc(); 5976 5977 // Parse legacy format syntax. 5978 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5979 if (Res == MatchOperand_ParseFail) 5980 return Res; 5981 5982 bool FormatFound = (Res == MatchOperand_Success); 5983 5984 Operands.push_back( 5985 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5986 5987 if (FormatFound) 5988 trySkipToken(AsmToken::Comma); 5989 5990 if (isToken(AsmToken::EndOfStatement)) { 5991 // We are expecting an soffset operand, 5992 // but let matcher handle the error. 5993 return MatchOperand_Success; 5994 } 5995 5996 // Parse soffset. 5997 Res = parseRegOrImm(Operands); 5998 if (Res != MatchOperand_Success) 5999 return Res; 6000 6001 trySkipToken(AsmToken::Comma); 6002 6003 if (!FormatFound) { 6004 Res = parseSymbolicOrNumericFormat(Format); 6005 if (Res == MatchOperand_ParseFail) 6006 return Res; 6007 if (Res == MatchOperand_Success) { 6008 auto Size = Operands.size(); 6009 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6010 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6011 Op.setImm(Format); 6012 } 6013 return MatchOperand_Success; 6014 } 6015 6016 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6017 Error(getLoc(), "duplicate format"); 6018 return MatchOperand_ParseFail; 6019 } 6020 return MatchOperand_Success; 6021 } 6022 6023 //===----------------------------------------------------------------------===// 6024 // ds 6025 //===----------------------------------------------------------------------===// 6026 6027 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6028 const OperandVector &Operands) { 6029 OptionalImmIndexMap OptionalIdx; 6030 6031 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6032 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6033 6034 // Add the register arguments 6035 if (Op.isReg()) { 6036 Op.addRegOperands(Inst, 1); 6037 continue; 6038 } 6039 6040 // Handle optional arguments 6041 OptionalIdx[Op.getImmTy()] = i; 6042 } 6043 6044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6047 6048 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6049 } 6050 6051 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6052 bool IsGdsHardcoded) { 6053 OptionalImmIndexMap OptionalIdx; 6054 6055 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6056 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6057 6058 // Add the register arguments 6059 if (Op.isReg()) { 6060 Op.addRegOperands(Inst, 1); 6061 continue; 6062 } 6063 6064 if (Op.isToken() && Op.getToken() == "gds") { 6065 IsGdsHardcoded = true; 6066 continue; 6067 } 6068 6069 // Handle optional arguments 6070 OptionalIdx[Op.getImmTy()] = i; 6071 } 6072 6073 AMDGPUOperand::ImmTy OffsetType = 6074 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6075 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6076 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6077 AMDGPUOperand::ImmTyOffset; 6078 6079 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6080 6081 if (!IsGdsHardcoded) { 6082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6083 } 6084 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6085 } 6086 6087 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6088 OptionalImmIndexMap OptionalIdx; 6089 6090 unsigned OperandIdx[4]; 6091 unsigned EnMask = 0; 6092 int SrcIdx = 0; 6093 6094 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6096 6097 // Add the register arguments 6098 if (Op.isReg()) { 6099 assert(SrcIdx < 4); 6100 OperandIdx[SrcIdx] = Inst.size(); 6101 Op.addRegOperands(Inst, 1); 6102 ++SrcIdx; 6103 continue; 6104 } 6105 6106 if (Op.isOff()) { 6107 assert(SrcIdx < 4); 6108 OperandIdx[SrcIdx] = Inst.size(); 6109 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6110 ++SrcIdx; 6111 continue; 6112 } 6113 6114 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6115 Op.addImmOperands(Inst, 1); 6116 continue; 6117 } 6118 6119 if (Op.isToken() && Op.getToken() == "done") 6120 continue; 6121 6122 // Handle optional arguments 6123 OptionalIdx[Op.getImmTy()] = i; 6124 } 6125 6126 assert(SrcIdx == 4); 6127 6128 bool Compr = false; 6129 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6130 Compr = true; 6131 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6132 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6133 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6134 } 6135 6136 for (auto i = 0; i < SrcIdx; ++i) { 6137 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6138 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6139 } 6140 } 6141 6142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6144 6145 Inst.addOperand(MCOperand::createImm(EnMask)); 6146 } 6147 6148 //===----------------------------------------------------------------------===// 6149 // s_waitcnt 6150 //===----------------------------------------------------------------------===// 6151 6152 static bool 6153 encodeCnt( 6154 const AMDGPU::IsaVersion ISA, 6155 int64_t &IntVal, 6156 int64_t CntVal, 6157 bool Saturate, 6158 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6159 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6160 { 6161 bool Failed = false; 6162 6163 IntVal = encode(ISA, IntVal, CntVal); 6164 if (CntVal != decode(ISA, IntVal)) { 6165 if (Saturate) { 6166 IntVal = encode(ISA, IntVal, -1); 6167 } else { 6168 Failed = true; 6169 } 6170 } 6171 return Failed; 6172 } 6173 6174 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6175 6176 SMLoc CntLoc = getLoc(); 6177 StringRef CntName = getTokenStr(); 6178 6179 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6180 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6181 return false; 6182 6183 int64_t CntVal; 6184 SMLoc ValLoc = getLoc(); 6185 if (!parseExpr(CntVal)) 6186 return false; 6187 6188 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6189 6190 bool Failed = true; 6191 bool Sat = CntName.endswith("_sat"); 6192 6193 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6194 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6195 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6196 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6197 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6198 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6199 } else { 6200 Error(CntLoc, "invalid counter name " + CntName); 6201 return false; 6202 } 6203 6204 if (Failed) { 6205 Error(ValLoc, "too large value for " + CntName); 6206 return false; 6207 } 6208 6209 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6210 return false; 6211 6212 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6213 if (isToken(AsmToken::EndOfStatement)) { 6214 Error(getLoc(), "expected a counter name"); 6215 return false; 6216 } 6217 } 6218 6219 return true; 6220 } 6221 6222 OperandMatchResultTy 6223 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6224 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6225 int64_t Waitcnt = getWaitcntBitMask(ISA); 6226 SMLoc S = getLoc(); 6227 6228 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6229 while (!isToken(AsmToken::EndOfStatement)) { 6230 if (!parseCnt(Waitcnt)) 6231 return MatchOperand_ParseFail; 6232 } 6233 } else { 6234 if (!parseExpr(Waitcnt)) 6235 return MatchOperand_ParseFail; 6236 } 6237 6238 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6239 return MatchOperand_Success; 6240 } 6241 6242 bool 6243 AMDGPUOperand::isSWaitCnt() const { 6244 return isImm(); 6245 } 6246 6247 //===----------------------------------------------------------------------===// 6248 // hwreg 6249 //===----------------------------------------------------------------------===// 6250 6251 bool 6252 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6253 OperandInfoTy &Offset, 6254 OperandInfoTy &Width) { 6255 using namespace llvm::AMDGPU::Hwreg; 6256 6257 // The register may be specified by name or using a numeric code 6258 HwReg.Loc = getLoc(); 6259 if (isToken(AsmToken::Identifier) && 6260 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6261 HwReg.IsSymbolic = true; 6262 lex(); // skip register name 6263 } else if (!parseExpr(HwReg.Id, "a register name")) { 6264 return false; 6265 } 6266 6267 if (trySkipToken(AsmToken::RParen)) 6268 return true; 6269 6270 // parse optional params 6271 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6272 return false; 6273 6274 Offset.Loc = getLoc(); 6275 if (!parseExpr(Offset.Id)) 6276 return false; 6277 6278 if (!skipToken(AsmToken::Comma, "expected a comma")) 6279 return false; 6280 6281 Width.Loc = getLoc(); 6282 return parseExpr(Width.Id) && 6283 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6284 } 6285 6286 bool 6287 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6288 const OperandInfoTy &Offset, 6289 const OperandInfoTy &Width) { 6290 6291 using namespace llvm::AMDGPU::Hwreg; 6292 6293 if (HwReg.IsSymbolic) { 6294 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6295 Error(HwReg.Loc, 6296 "specified hardware register is not supported on this GPU"); 6297 return false; 6298 } 6299 } else { 6300 if (!isValidHwreg(HwReg.Id)) { 6301 Error(HwReg.Loc, 6302 "invalid code of hardware register: only 6-bit values are legal"); 6303 return false; 6304 } 6305 } 6306 if (!isValidHwregOffset(Offset.Id)) { 6307 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6308 return false; 6309 } 6310 if (!isValidHwregWidth(Width.Id)) { 6311 Error(Width.Loc, 6312 "invalid bitfield width: only values from 1 to 32 are legal"); 6313 return false; 6314 } 6315 return true; 6316 } 6317 6318 OperandMatchResultTy 6319 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6320 using namespace llvm::AMDGPU::Hwreg; 6321 6322 int64_t ImmVal = 0; 6323 SMLoc Loc = getLoc(); 6324 6325 if (trySkipId("hwreg", AsmToken::LParen)) { 6326 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6327 OperandInfoTy Offset(OFFSET_DEFAULT_); 6328 OperandInfoTy Width(WIDTH_DEFAULT_); 6329 if (parseHwregBody(HwReg, Offset, Width) && 6330 validateHwreg(HwReg, Offset, Width)) { 6331 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6332 } else { 6333 return MatchOperand_ParseFail; 6334 } 6335 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6336 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6337 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6338 return MatchOperand_ParseFail; 6339 } 6340 } else { 6341 return MatchOperand_ParseFail; 6342 } 6343 6344 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6345 return MatchOperand_Success; 6346 } 6347 6348 bool AMDGPUOperand::isHwreg() const { 6349 return isImmTy(ImmTyHwreg); 6350 } 6351 6352 //===----------------------------------------------------------------------===// 6353 // sendmsg 6354 //===----------------------------------------------------------------------===// 6355 6356 bool 6357 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6358 OperandInfoTy &Op, 6359 OperandInfoTy &Stream) { 6360 using namespace llvm::AMDGPU::SendMsg; 6361 6362 Msg.Loc = getLoc(); 6363 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6364 Msg.IsSymbolic = true; 6365 lex(); // skip message name 6366 } else if (!parseExpr(Msg.Id, "a message name")) { 6367 return false; 6368 } 6369 6370 if (trySkipToken(AsmToken::Comma)) { 6371 Op.IsDefined = true; 6372 Op.Loc = getLoc(); 6373 if (isToken(AsmToken::Identifier) && 6374 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6375 lex(); // skip operation name 6376 } else if (!parseExpr(Op.Id, "an operation name")) { 6377 return false; 6378 } 6379 6380 if (trySkipToken(AsmToken::Comma)) { 6381 Stream.IsDefined = true; 6382 Stream.Loc = getLoc(); 6383 if (!parseExpr(Stream.Id)) 6384 return false; 6385 } 6386 } 6387 6388 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6389 } 6390 6391 bool 6392 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6393 const OperandInfoTy &Op, 6394 const OperandInfoTy &Stream) { 6395 using namespace llvm::AMDGPU::SendMsg; 6396 6397 // Validation strictness depends on whether message is specified 6398 // in a symbolic or in a numeric form. In the latter case 6399 // only encoding possibility is checked. 6400 bool Strict = Msg.IsSymbolic; 6401 6402 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6403 Error(Msg.Loc, "invalid message id"); 6404 return false; 6405 } 6406 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6407 if (Op.IsDefined) { 6408 Error(Op.Loc, "message does not support operations"); 6409 } else { 6410 Error(Msg.Loc, "missing message operation"); 6411 } 6412 return false; 6413 } 6414 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6415 Error(Op.Loc, "invalid operation id"); 6416 return false; 6417 } 6418 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6419 Error(Stream.Loc, "message operation does not support streams"); 6420 return false; 6421 } 6422 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6423 Error(Stream.Loc, "invalid message stream id"); 6424 return false; 6425 } 6426 return true; 6427 } 6428 6429 OperandMatchResultTy 6430 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6431 using namespace llvm::AMDGPU::SendMsg; 6432 6433 int64_t ImmVal = 0; 6434 SMLoc Loc = getLoc(); 6435 6436 if (trySkipId("sendmsg", AsmToken::LParen)) { 6437 OperandInfoTy Msg(ID_UNKNOWN_); 6438 OperandInfoTy Op(OP_NONE_); 6439 OperandInfoTy Stream(STREAM_ID_NONE_); 6440 if (parseSendMsgBody(Msg, Op, Stream) && 6441 validateSendMsg(Msg, Op, Stream)) { 6442 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6443 } else { 6444 return MatchOperand_ParseFail; 6445 } 6446 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6447 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6448 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6449 return MatchOperand_ParseFail; 6450 } 6451 } else { 6452 return MatchOperand_ParseFail; 6453 } 6454 6455 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6456 return MatchOperand_Success; 6457 } 6458 6459 bool AMDGPUOperand::isSendMsg() const { 6460 return isImmTy(ImmTySendMsg); 6461 } 6462 6463 //===----------------------------------------------------------------------===// 6464 // v_interp 6465 //===----------------------------------------------------------------------===// 6466 6467 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6468 StringRef Str; 6469 SMLoc S = getLoc(); 6470 6471 if (!parseId(Str)) 6472 return MatchOperand_NoMatch; 6473 6474 int Slot = StringSwitch<int>(Str) 6475 .Case("p10", 0) 6476 .Case("p20", 1) 6477 .Case("p0", 2) 6478 .Default(-1); 6479 6480 if (Slot == -1) { 6481 Error(S, "invalid interpolation slot"); 6482 return MatchOperand_ParseFail; 6483 } 6484 6485 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6486 AMDGPUOperand::ImmTyInterpSlot)); 6487 return MatchOperand_Success; 6488 } 6489 6490 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6491 StringRef Str; 6492 SMLoc S = getLoc(); 6493 6494 if (!parseId(Str)) 6495 return MatchOperand_NoMatch; 6496 6497 if (!Str.startswith("attr")) { 6498 Error(S, "invalid interpolation attribute"); 6499 return MatchOperand_ParseFail; 6500 } 6501 6502 StringRef Chan = Str.take_back(2); 6503 int AttrChan = StringSwitch<int>(Chan) 6504 .Case(".x", 0) 6505 .Case(".y", 1) 6506 .Case(".z", 2) 6507 .Case(".w", 3) 6508 .Default(-1); 6509 if (AttrChan == -1) { 6510 Error(S, "invalid or missing interpolation attribute channel"); 6511 return MatchOperand_ParseFail; 6512 } 6513 6514 Str = Str.drop_back(2).drop_front(4); 6515 6516 uint8_t Attr; 6517 if (Str.getAsInteger(10, Attr)) { 6518 Error(S, "invalid or missing interpolation attribute number"); 6519 return MatchOperand_ParseFail; 6520 } 6521 6522 if (Attr > 63) { 6523 Error(S, "out of bounds interpolation attribute number"); 6524 return MatchOperand_ParseFail; 6525 } 6526 6527 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6528 6529 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6530 AMDGPUOperand::ImmTyInterpAttr)); 6531 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6532 AMDGPUOperand::ImmTyAttrChan)); 6533 return MatchOperand_Success; 6534 } 6535 6536 //===----------------------------------------------------------------------===// 6537 // exp 6538 //===----------------------------------------------------------------------===// 6539 6540 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6541 using namespace llvm::AMDGPU::Exp; 6542 6543 StringRef Str; 6544 SMLoc S = getLoc(); 6545 6546 if (!parseId(Str)) 6547 return MatchOperand_NoMatch; 6548 6549 unsigned Id = getTgtId(Str); 6550 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6551 Error(S, (Id == ET_INVALID) ? 6552 "invalid exp target" : 6553 "exp target is not supported on this GPU"); 6554 return MatchOperand_ParseFail; 6555 } 6556 6557 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6558 AMDGPUOperand::ImmTyExpTgt)); 6559 return MatchOperand_Success; 6560 } 6561 6562 //===----------------------------------------------------------------------===// 6563 // parser helpers 6564 //===----------------------------------------------------------------------===// 6565 6566 bool 6567 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6568 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6569 } 6570 6571 bool 6572 AMDGPUAsmParser::isId(const StringRef Id) const { 6573 return isId(getToken(), Id); 6574 } 6575 6576 bool 6577 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6578 return getTokenKind() == Kind; 6579 } 6580 6581 bool 6582 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6583 if (isId(Id)) { 6584 lex(); 6585 return true; 6586 } 6587 return false; 6588 } 6589 6590 bool 6591 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6592 if (isToken(AsmToken::Identifier)) { 6593 StringRef Tok = getTokenStr(); 6594 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6595 lex(); 6596 return true; 6597 } 6598 } 6599 return false; 6600 } 6601 6602 bool 6603 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6604 if (isId(Id) && peekToken().is(Kind)) { 6605 lex(); 6606 lex(); 6607 return true; 6608 } 6609 return false; 6610 } 6611 6612 bool 6613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6614 if (isToken(Kind)) { 6615 lex(); 6616 return true; 6617 } 6618 return false; 6619 } 6620 6621 bool 6622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6623 const StringRef ErrMsg) { 6624 if (!trySkipToken(Kind)) { 6625 Error(getLoc(), ErrMsg); 6626 return false; 6627 } 6628 return true; 6629 } 6630 6631 bool 6632 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6633 SMLoc S = getLoc(); 6634 6635 const MCExpr *Expr; 6636 if (Parser.parseExpression(Expr)) 6637 return false; 6638 6639 if (Expr->evaluateAsAbsolute(Imm)) 6640 return true; 6641 6642 if (Expected.empty()) { 6643 Error(S, "expected absolute expression"); 6644 } else { 6645 Error(S, Twine("expected ", Expected) + 6646 Twine(" or an absolute expression")); 6647 } 6648 return false; 6649 } 6650 6651 bool 6652 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6653 SMLoc S = getLoc(); 6654 6655 const MCExpr *Expr; 6656 if (Parser.parseExpression(Expr)) 6657 return false; 6658 6659 int64_t IntVal; 6660 if (Expr->evaluateAsAbsolute(IntVal)) { 6661 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6662 } else { 6663 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6664 } 6665 return true; 6666 } 6667 6668 bool 6669 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6670 if (isToken(AsmToken::String)) { 6671 Val = getToken().getStringContents(); 6672 lex(); 6673 return true; 6674 } else { 6675 Error(getLoc(), ErrMsg); 6676 return false; 6677 } 6678 } 6679 6680 bool 6681 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6682 if (isToken(AsmToken::Identifier)) { 6683 Val = getTokenStr(); 6684 lex(); 6685 return true; 6686 } else { 6687 if (!ErrMsg.empty()) 6688 Error(getLoc(), ErrMsg); 6689 return false; 6690 } 6691 } 6692 6693 AsmToken 6694 AMDGPUAsmParser::getToken() const { 6695 return Parser.getTok(); 6696 } 6697 6698 AsmToken 6699 AMDGPUAsmParser::peekToken() { 6700 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6701 } 6702 6703 void 6704 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6705 auto TokCount = getLexer().peekTokens(Tokens); 6706 6707 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6708 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6709 } 6710 6711 AsmToken::TokenKind 6712 AMDGPUAsmParser::getTokenKind() const { 6713 return getLexer().getKind(); 6714 } 6715 6716 SMLoc 6717 AMDGPUAsmParser::getLoc() const { 6718 return getToken().getLoc(); 6719 } 6720 6721 StringRef 6722 AMDGPUAsmParser::getTokenStr() const { 6723 return getToken().getString(); 6724 } 6725 6726 void 6727 AMDGPUAsmParser::lex() { 6728 Parser.Lex(); 6729 } 6730 6731 SMLoc 6732 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6733 const OperandVector &Operands) const { 6734 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6735 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6736 if (Test(Op)) 6737 return Op.getStartLoc(); 6738 } 6739 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6740 } 6741 6742 SMLoc 6743 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6744 const OperandVector &Operands) const { 6745 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6746 return getOperandLoc(Test, Operands); 6747 } 6748 6749 SMLoc 6750 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6751 const OperandVector &Operands) const { 6752 auto Test = [=](const AMDGPUOperand& Op) { 6753 return Op.isRegKind() && Op.getReg() == Reg; 6754 }; 6755 return getOperandLoc(Test, Operands); 6756 } 6757 6758 SMLoc 6759 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6760 auto Test = [](const AMDGPUOperand& Op) { 6761 return Op.IsImmKindLiteral() || Op.isExpr(); 6762 }; 6763 return getOperandLoc(Test, Operands); 6764 } 6765 6766 SMLoc 6767 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6768 auto Test = [](const AMDGPUOperand& Op) { 6769 return Op.isImmKindConst(); 6770 }; 6771 return getOperandLoc(Test, Operands); 6772 } 6773 6774 //===----------------------------------------------------------------------===// 6775 // swizzle 6776 //===----------------------------------------------------------------------===// 6777 6778 LLVM_READNONE 6779 static unsigned 6780 encodeBitmaskPerm(const unsigned AndMask, 6781 const unsigned OrMask, 6782 const unsigned XorMask) { 6783 using namespace llvm::AMDGPU::Swizzle; 6784 6785 return BITMASK_PERM_ENC | 6786 (AndMask << BITMASK_AND_SHIFT) | 6787 (OrMask << BITMASK_OR_SHIFT) | 6788 (XorMask << BITMASK_XOR_SHIFT); 6789 } 6790 6791 bool 6792 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6793 const unsigned MinVal, 6794 const unsigned MaxVal, 6795 const StringRef ErrMsg, 6796 SMLoc &Loc) { 6797 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6798 return false; 6799 } 6800 Loc = getLoc(); 6801 if (!parseExpr(Op)) { 6802 return false; 6803 } 6804 if (Op < MinVal || Op > MaxVal) { 6805 Error(Loc, ErrMsg); 6806 return false; 6807 } 6808 6809 return true; 6810 } 6811 6812 bool 6813 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6814 const unsigned MinVal, 6815 const unsigned MaxVal, 6816 const StringRef ErrMsg) { 6817 SMLoc Loc; 6818 for (unsigned i = 0; i < OpNum; ++i) { 6819 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6820 return false; 6821 } 6822 6823 return true; 6824 } 6825 6826 bool 6827 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6828 using namespace llvm::AMDGPU::Swizzle; 6829 6830 int64_t Lane[LANE_NUM]; 6831 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6832 "expected a 2-bit lane id")) { 6833 Imm = QUAD_PERM_ENC; 6834 for (unsigned I = 0; I < LANE_NUM; ++I) { 6835 Imm |= Lane[I] << (LANE_SHIFT * I); 6836 } 6837 return true; 6838 } 6839 return false; 6840 } 6841 6842 bool 6843 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6844 using namespace llvm::AMDGPU::Swizzle; 6845 6846 SMLoc Loc; 6847 int64_t GroupSize; 6848 int64_t LaneIdx; 6849 6850 if (!parseSwizzleOperand(GroupSize, 6851 2, 32, 6852 "group size must be in the interval [2,32]", 6853 Loc)) { 6854 return false; 6855 } 6856 if (!isPowerOf2_64(GroupSize)) { 6857 Error(Loc, "group size must be a power of two"); 6858 return false; 6859 } 6860 if (parseSwizzleOperand(LaneIdx, 6861 0, GroupSize - 1, 6862 "lane id must be in the interval [0,group size - 1]", 6863 Loc)) { 6864 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6865 return true; 6866 } 6867 return false; 6868 } 6869 6870 bool 6871 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6872 using namespace llvm::AMDGPU::Swizzle; 6873 6874 SMLoc Loc; 6875 int64_t GroupSize; 6876 6877 if (!parseSwizzleOperand(GroupSize, 6878 2, 32, 6879 "group size must be in the interval [2,32]", 6880 Loc)) { 6881 return false; 6882 } 6883 if (!isPowerOf2_64(GroupSize)) { 6884 Error(Loc, "group size must be a power of two"); 6885 return false; 6886 } 6887 6888 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6889 return true; 6890 } 6891 6892 bool 6893 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6894 using namespace llvm::AMDGPU::Swizzle; 6895 6896 SMLoc Loc; 6897 int64_t GroupSize; 6898 6899 if (!parseSwizzleOperand(GroupSize, 6900 1, 16, 6901 "group size must be in the interval [1,16]", 6902 Loc)) { 6903 return false; 6904 } 6905 if (!isPowerOf2_64(GroupSize)) { 6906 Error(Loc, "group size must be a power of two"); 6907 return false; 6908 } 6909 6910 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6911 return true; 6912 } 6913 6914 bool 6915 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6916 using namespace llvm::AMDGPU::Swizzle; 6917 6918 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6919 return false; 6920 } 6921 6922 StringRef Ctl; 6923 SMLoc StrLoc = getLoc(); 6924 if (!parseString(Ctl)) { 6925 return false; 6926 } 6927 if (Ctl.size() != BITMASK_WIDTH) { 6928 Error(StrLoc, "expected a 5-character mask"); 6929 return false; 6930 } 6931 6932 unsigned AndMask = 0; 6933 unsigned OrMask = 0; 6934 unsigned XorMask = 0; 6935 6936 for (size_t i = 0; i < Ctl.size(); ++i) { 6937 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6938 switch(Ctl[i]) { 6939 default: 6940 Error(StrLoc, "invalid mask"); 6941 return false; 6942 case '0': 6943 break; 6944 case '1': 6945 OrMask |= Mask; 6946 break; 6947 case 'p': 6948 AndMask |= Mask; 6949 break; 6950 case 'i': 6951 AndMask |= Mask; 6952 XorMask |= Mask; 6953 break; 6954 } 6955 } 6956 6957 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6958 return true; 6959 } 6960 6961 bool 6962 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6963 6964 SMLoc OffsetLoc = getLoc(); 6965 6966 if (!parseExpr(Imm, "a swizzle macro")) { 6967 return false; 6968 } 6969 if (!isUInt<16>(Imm)) { 6970 Error(OffsetLoc, "expected a 16-bit offset"); 6971 return false; 6972 } 6973 return true; 6974 } 6975 6976 bool 6977 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6978 using namespace llvm::AMDGPU::Swizzle; 6979 6980 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6981 6982 SMLoc ModeLoc = getLoc(); 6983 bool Ok = false; 6984 6985 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6986 Ok = parseSwizzleQuadPerm(Imm); 6987 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6988 Ok = parseSwizzleBitmaskPerm(Imm); 6989 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6990 Ok = parseSwizzleBroadcast(Imm); 6991 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6992 Ok = parseSwizzleSwap(Imm); 6993 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6994 Ok = parseSwizzleReverse(Imm); 6995 } else { 6996 Error(ModeLoc, "expected a swizzle mode"); 6997 } 6998 6999 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7000 } 7001 7002 return false; 7003 } 7004 7005 OperandMatchResultTy 7006 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7007 SMLoc S = getLoc(); 7008 int64_t Imm = 0; 7009 7010 if (trySkipId("offset")) { 7011 7012 bool Ok = false; 7013 if (skipToken(AsmToken::Colon, "expected a colon")) { 7014 if (trySkipId("swizzle")) { 7015 Ok = parseSwizzleMacro(Imm); 7016 } else { 7017 Ok = parseSwizzleOffset(Imm); 7018 } 7019 } 7020 7021 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7022 7023 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7024 } else { 7025 // Swizzle "offset" operand is optional. 7026 // If it is omitted, try parsing other optional operands. 7027 return parseOptionalOpr(Operands); 7028 } 7029 } 7030 7031 bool 7032 AMDGPUOperand::isSwizzle() const { 7033 return isImmTy(ImmTySwizzle); 7034 } 7035 7036 //===----------------------------------------------------------------------===// 7037 // VGPR Index Mode 7038 //===----------------------------------------------------------------------===// 7039 7040 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7041 7042 using namespace llvm::AMDGPU::VGPRIndexMode; 7043 7044 if (trySkipToken(AsmToken::RParen)) { 7045 return OFF; 7046 } 7047 7048 int64_t Imm = 0; 7049 7050 while (true) { 7051 unsigned Mode = 0; 7052 SMLoc S = getLoc(); 7053 7054 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7055 if (trySkipId(IdSymbolic[ModeId])) { 7056 Mode = 1 << ModeId; 7057 break; 7058 } 7059 } 7060 7061 if (Mode == 0) { 7062 Error(S, (Imm == 0)? 7063 "expected a VGPR index mode or a closing parenthesis" : 7064 "expected a VGPR index mode"); 7065 return UNDEF; 7066 } 7067 7068 if (Imm & Mode) { 7069 Error(S, "duplicate VGPR index mode"); 7070 return UNDEF; 7071 } 7072 Imm |= Mode; 7073 7074 if (trySkipToken(AsmToken::RParen)) 7075 break; 7076 if (!skipToken(AsmToken::Comma, 7077 "expected a comma or a closing parenthesis")) 7078 return UNDEF; 7079 } 7080 7081 return Imm; 7082 } 7083 7084 OperandMatchResultTy 7085 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7086 7087 using namespace llvm::AMDGPU::VGPRIndexMode; 7088 7089 int64_t Imm = 0; 7090 SMLoc S = getLoc(); 7091 7092 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7093 Imm = parseGPRIdxMacro(); 7094 if (Imm == UNDEF) 7095 return MatchOperand_ParseFail; 7096 } else { 7097 if (getParser().parseAbsoluteExpression(Imm)) 7098 return MatchOperand_ParseFail; 7099 if (Imm < 0 || !isUInt<4>(Imm)) { 7100 Error(S, "invalid immediate: only 4-bit values are legal"); 7101 return MatchOperand_ParseFail; 7102 } 7103 } 7104 7105 Operands.push_back( 7106 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7107 return MatchOperand_Success; 7108 } 7109 7110 bool AMDGPUOperand::isGPRIdxMode() const { 7111 return isImmTy(ImmTyGprIdxMode); 7112 } 7113 7114 //===----------------------------------------------------------------------===// 7115 // sopp branch targets 7116 //===----------------------------------------------------------------------===// 7117 7118 OperandMatchResultTy 7119 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7120 7121 // Make sure we are not parsing something 7122 // that looks like a label or an expression but is not. 7123 // This will improve error messages. 7124 if (isRegister() || isModifier()) 7125 return MatchOperand_NoMatch; 7126 7127 if (!parseExpr(Operands)) 7128 return MatchOperand_ParseFail; 7129 7130 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7131 assert(Opr.isImm() || Opr.isExpr()); 7132 SMLoc Loc = Opr.getStartLoc(); 7133 7134 // Currently we do not support arbitrary expressions as branch targets. 7135 // Only labels and absolute expressions are accepted. 7136 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7137 Error(Loc, "expected an absolute expression or a label"); 7138 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7139 Error(Loc, "expected a 16-bit signed jump offset"); 7140 } 7141 7142 return MatchOperand_Success; 7143 } 7144 7145 //===----------------------------------------------------------------------===// 7146 // Boolean holding registers 7147 //===----------------------------------------------------------------------===// 7148 7149 OperandMatchResultTy 7150 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7151 return parseReg(Operands); 7152 } 7153 7154 //===----------------------------------------------------------------------===// 7155 // mubuf 7156 //===----------------------------------------------------------------------===// 7157 7158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7159 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7160 } 7161 7162 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7163 const OperandVector &Operands, 7164 bool IsAtomic, 7165 bool IsLds) { 7166 bool IsLdsOpcode = IsLds; 7167 bool HasLdsModifier = false; 7168 OptionalImmIndexMap OptionalIdx; 7169 unsigned FirstOperandIdx = 1; 7170 bool IsAtomicReturn = false; 7171 7172 if (IsAtomic) { 7173 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7174 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7175 if (!Op.isCPol()) 7176 continue; 7177 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7178 break; 7179 } 7180 7181 if (!IsAtomicReturn) { 7182 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7183 if (NewOpc != -1) 7184 Inst.setOpcode(NewOpc); 7185 } 7186 7187 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7188 SIInstrFlags::IsAtomicRet; 7189 } 7190 7191 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7192 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7193 7194 // Add the register arguments 7195 if (Op.isReg()) { 7196 Op.addRegOperands(Inst, 1); 7197 // Insert a tied src for atomic return dst. 7198 // This cannot be postponed as subsequent calls to 7199 // addImmOperands rely on correct number of MC operands. 7200 if (IsAtomicReturn && i == FirstOperandIdx) 7201 Op.addRegOperands(Inst, 1); 7202 continue; 7203 } 7204 7205 // Handle the case where soffset is an immediate 7206 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7207 Op.addImmOperands(Inst, 1); 7208 continue; 7209 } 7210 7211 HasLdsModifier |= Op.isLDS(); 7212 7213 // Handle tokens like 'offen' which are sometimes hard-coded into the 7214 // asm string. There are no MCInst operands for these. 7215 if (Op.isToken()) { 7216 continue; 7217 } 7218 assert(Op.isImm()); 7219 7220 // Handle optional arguments 7221 OptionalIdx[Op.getImmTy()] = i; 7222 } 7223 7224 // This is a workaround for an llvm quirk which may result in an 7225 // incorrect instruction selection. Lds and non-lds versions of 7226 // MUBUF instructions are identical except that lds versions 7227 // have mandatory 'lds' modifier. However this modifier follows 7228 // optional modifiers and llvm asm matcher regards this 'lds' 7229 // modifier as an optional one. As a result, an lds version 7230 // of opcode may be selected even if it has no 'lds' modifier. 7231 if (IsLdsOpcode && !HasLdsModifier) { 7232 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7233 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7234 Inst.setOpcode(NoLdsOpcode); 7235 IsLdsOpcode = false; 7236 } 7237 } 7238 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7241 7242 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7244 } 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7246 } 7247 7248 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7249 OptionalImmIndexMap OptionalIdx; 7250 7251 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7252 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7253 7254 // Add the register arguments 7255 if (Op.isReg()) { 7256 Op.addRegOperands(Inst, 1); 7257 continue; 7258 } 7259 7260 // Handle the case where soffset is an immediate 7261 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7262 Op.addImmOperands(Inst, 1); 7263 continue; 7264 } 7265 7266 // Handle tokens like 'offen' which are sometimes hard-coded into the 7267 // asm string. There are no MCInst operands for these. 7268 if (Op.isToken()) { 7269 continue; 7270 } 7271 assert(Op.isImm()); 7272 7273 // Handle optional arguments 7274 OptionalIdx[Op.getImmTy()] = i; 7275 } 7276 7277 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7278 AMDGPUOperand::ImmTyOffset); 7279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7283 } 7284 7285 //===----------------------------------------------------------------------===// 7286 // mimg 7287 //===----------------------------------------------------------------------===// 7288 7289 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7290 bool IsAtomic) { 7291 unsigned I = 1; 7292 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7293 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7294 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7295 } 7296 7297 if (IsAtomic) { 7298 // Add src, same as dst 7299 assert(Desc.getNumDefs() == 1); 7300 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7301 } 7302 7303 OptionalImmIndexMap OptionalIdx; 7304 7305 for (unsigned E = Operands.size(); I != E; ++I) { 7306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7307 7308 // Add the register arguments 7309 if (Op.isReg()) { 7310 Op.addRegOperands(Inst, 1); 7311 } else if (Op.isImmModifier()) { 7312 OptionalIdx[Op.getImmTy()] = I; 7313 } else if (!Op.isToken()) { 7314 llvm_unreachable("unexpected operand type"); 7315 } 7316 } 7317 7318 bool IsGFX10Plus = isGFX10Plus(); 7319 7320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7321 if (IsGFX10Plus) 7322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7326 if (IsGFX10Plus) 7327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7328 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7331 if (!IsGFX10Plus) 7332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7334 } 7335 7336 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7337 cvtMIMG(Inst, Operands, true); 7338 } 7339 7340 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7341 OptionalImmIndexMap OptionalIdx; 7342 bool IsAtomicReturn = false; 7343 7344 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7345 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7346 if (!Op.isCPol()) 7347 continue; 7348 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7349 break; 7350 } 7351 7352 if (!IsAtomicReturn) { 7353 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7354 if (NewOpc != -1) 7355 Inst.setOpcode(NewOpc); 7356 } 7357 7358 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7359 SIInstrFlags::IsAtomicRet; 7360 7361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7363 7364 // Add the register arguments 7365 if (Op.isReg()) { 7366 Op.addRegOperands(Inst, 1); 7367 if (IsAtomicReturn && i == 1) 7368 Op.addRegOperands(Inst, 1); 7369 continue; 7370 } 7371 7372 // Handle the case where soffset is an immediate 7373 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7374 Op.addImmOperands(Inst, 1); 7375 continue; 7376 } 7377 7378 // Handle tokens like 'offen' which are sometimes hard-coded into the 7379 // asm string. There are no MCInst operands for these. 7380 if (Op.isToken()) { 7381 continue; 7382 } 7383 assert(Op.isImm()); 7384 7385 // Handle optional arguments 7386 OptionalIdx[Op.getImmTy()] = i; 7387 } 7388 7389 if ((int)Inst.getNumOperands() <= 7390 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7393 } 7394 7395 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7396 const OperandVector &Operands) { 7397 for (unsigned I = 1; I < Operands.size(); ++I) { 7398 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7399 if (Operand.isReg()) 7400 Operand.addRegOperands(Inst, 1); 7401 } 7402 7403 Inst.addOperand(MCOperand::createImm(1)); // a16 7404 } 7405 7406 //===----------------------------------------------------------------------===// 7407 // smrd 7408 //===----------------------------------------------------------------------===// 7409 7410 bool AMDGPUOperand::isSMRDOffset8() const { 7411 return isImm() && isUInt<8>(getImm()); 7412 } 7413 7414 bool AMDGPUOperand::isSMEMOffset() const { 7415 return isImm(); // Offset range is checked later by validator. 7416 } 7417 7418 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7419 // 32-bit literals are only supported on CI and we only want to use them 7420 // when the offset is > 8-bits. 7421 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7422 } 7423 7424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7425 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7426 } 7427 7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7429 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7430 } 7431 7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7433 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7434 } 7435 7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7437 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7438 } 7439 7440 //===----------------------------------------------------------------------===// 7441 // vop3 7442 //===----------------------------------------------------------------------===// 7443 7444 static bool ConvertOmodMul(int64_t &Mul) { 7445 if (Mul != 1 && Mul != 2 && Mul != 4) 7446 return false; 7447 7448 Mul >>= 1; 7449 return true; 7450 } 7451 7452 static bool ConvertOmodDiv(int64_t &Div) { 7453 if (Div == 1) { 7454 Div = 0; 7455 return true; 7456 } 7457 7458 if (Div == 2) { 7459 Div = 3; 7460 return true; 7461 } 7462 7463 return false; 7464 } 7465 7466 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7467 // This is intentional and ensures compatibility with sp3. 7468 // See bug 35397 for details. 7469 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7470 if (BoundCtrl == 0 || BoundCtrl == 1) { 7471 BoundCtrl = 1; 7472 return true; 7473 } 7474 return false; 7475 } 7476 7477 // Note: the order in this table matches the order of operands in AsmString. 7478 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7479 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7480 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7481 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7482 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7483 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7484 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7485 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7486 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7487 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7488 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7489 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7490 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7491 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7492 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7493 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7494 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7495 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7496 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7497 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7498 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7499 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7500 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7501 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7502 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7503 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7504 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7505 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7506 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7507 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7508 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7509 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7510 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7511 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7512 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7513 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7514 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7515 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7516 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7517 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7518 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7519 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7520 }; 7521 7522 void AMDGPUAsmParser::onBeginOfFile() { 7523 if (!getParser().getStreamer().getTargetStreamer() || 7524 getSTI().getTargetTriple().getArch() == Triple::r600) 7525 return; 7526 7527 if (!getTargetStreamer().getTargetID()) 7528 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7529 7530 if (isHsaAbiVersion3AndAbove(&getSTI())) 7531 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7532 } 7533 7534 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7535 7536 OperandMatchResultTy res = parseOptionalOpr(Operands); 7537 7538 // This is a hack to enable hardcoded mandatory operands which follow 7539 // optional operands. 7540 // 7541 // Current design assumes that all operands after the first optional operand 7542 // are also optional. However implementation of some instructions violates 7543 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7544 // 7545 // To alleviate this problem, we have to (implicitly) parse extra operands 7546 // to make sure autogenerated parser of custom operands never hit hardcoded 7547 // mandatory operands. 7548 7549 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7550 if (res != MatchOperand_Success || 7551 isToken(AsmToken::EndOfStatement)) 7552 break; 7553 7554 trySkipToken(AsmToken::Comma); 7555 res = parseOptionalOpr(Operands); 7556 } 7557 7558 return res; 7559 } 7560 7561 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7562 OperandMatchResultTy res; 7563 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7564 // try to parse any optional operand here 7565 if (Op.IsBit) { 7566 res = parseNamedBit(Op.Name, Operands, Op.Type); 7567 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7568 res = parseOModOperand(Operands); 7569 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7570 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7571 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7572 res = parseSDWASel(Operands, Op.Name, Op.Type); 7573 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7574 res = parseSDWADstUnused(Operands); 7575 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7576 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7577 Op.Type == AMDGPUOperand::ImmTyNegLo || 7578 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7579 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7580 Op.ConvertResult); 7581 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7582 res = parseDim(Operands); 7583 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7584 res = parseCPol(Operands); 7585 } else { 7586 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7587 } 7588 if (res != MatchOperand_NoMatch) { 7589 return res; 7590 } 7591 } 7592 return MatchOperand_NoMatch; 7593 } 7594 7595 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7596 StringRef Name = getTokenStr(); 7597 if (Name == "mul") { 7598 return parseIntWithPrefix("mul", Operands, 7599 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7600 } 7601 7602 if (Name == "div") { 7603 return parseIntWithPrefix("div", Operands, 7604 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7605 } 7606 7607 return MatchOperand_NoMatch; 7608 } 7609 7610 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7611 cvtVOP3P(Inst, Operands); 7612 7613 int Opc = Inst.getOpcode(); 7614 7615 int SrcNum; 7616 const int Ops[] = { AMDGPU::OpName::src0, 7617 AMDGPU::OpName::src1, 7618 AMDGPU::OpName::src2 }; 7619 for (SrcNum = 0; 7620 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7621 ++SrcNum); 7622 assert(SrcNum > 0); 7623 7624 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7625 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7626 7627 if ((OpSel & (1 << SrcNum)) != 0) { 7628 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7629 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7630 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7631 } 7632 } 7633 7634 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7635 // 1. This operand is input modifiers 7636 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7637 // 2. This is not last operand 7638 && Desc.NumOperands > (OpNum + 1) 7639 // 3. Next operand is register class 7640 && Desc.OpInfo[OpNum + 1].RegClass != -1 7641 // 4. Next register is not tied to any other operand 7642 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7643 } 7644 7645 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7646 { 7647 OptionalImmIndexMap OptionalIdx; 7648 unsigned Opc = Inst.getOpcode(); 7649 7650 unsigned I = 1; 7651 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7652 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7653 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7654 } 7655 7656 for (unsigned E = Operands.size(); I != E; ++I) { 7657 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7658 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7659 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7660 } else if (Op.isInterpSlot() || 7661 Op.isInterpAttr() || 7662 Op.isAttrChan()) { 7663 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7664 } else if (Op.isImmModifier()) { 7665 OptionalIdx[Op.getImmTy()] = I; 7666 } else { 7667 llvm_unreachable("unhandled operand type"); 7668 } 7669 } 7670 7671 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7673 } 7674 7675 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7677 } 7678 7679 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7681 } 7682 } 7683 7684 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7685 OptionalImmIndexMap &OptionalIdx) { 7686 unsigned Opc = Inst.getOpcode(); 7687 7688 unsigned I = 1; 7689 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7690 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7691 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7692 } 7693 7694 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7695 // This instruction has src modifiers 7696 for (unsigned E = Operands.size(); I != E; ++I) { 7697 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7698 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7699 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7700 } else if (Op.isImmModifier()) { 7701 OptionalIdx[Op.getImmTy()] = I; 7702 } else if (Op.isRegOrImm()) { 7703 Op.addRegOrImmOperands(Inst, 1); 7704 } else { 7705 llvm_unreachable("unhandled operand type"); 7706 } 7707 } 7708 } else { 7709 // No src modifiers 7710 for (unsigned E = Operands.size(); I != E; ++I) { 7711 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7712 if (Op.isMod()) { 7713 OptionalIdx[Op.getImmTy()] = I; 7714 } else { 7715 Op.addRegOrImmOperands(Inst, 1); 7716 } 7717 } 7718 } 7719 7720 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7722 } 7723 7724 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7726 } 7727 7728 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7729 // it has src2 register operand that is tied to dst operand 7730 // we don't allow modifiers for this operand in assembler so src2_modifiers 7731 // should be 0. 7732 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7733 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7734 Opc == AMDGPU::V_MAC_F32_e64_vi || 7735 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7736 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7737 Opc == AMDGPU::V_MAC_F16_e64_vi || 7738 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7739 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7740 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7741 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7742 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7743 auto it = Inst.begin(); 7744 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7745 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7746 ++it; 7747 // Copy the operand to ensure it's not invalidated when Inst grows. 7748 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7749 } 7750 } 7751 7752 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7753 OptionalImmIndexMap OptionalIdx; 7754 cvtVOP3(Inst, Operands, OptionalIdx); 7755 } 7756 7757 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7758 OptionalImmIndexMap &OptIdx) { 7759 const int Opc = Inst.getOpcode(); 7760 const MCInstrDesc &Desc = MII.get(Opc); 7761 7762 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7763 7764 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7765 assert(!IsPacked); 7766 Inst.addOperand(Inst.getOperand(0)); 7767 } 7768 7769 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7770 // instruction, and then figure out where to actually put the modifiers 7771 7772 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7773 if (OpSelIdx != -1) { 7774 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7775 } 7776 7777 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7778 if (OpSelHiIdx != -1) { 7779 int DefaultVal = IsPacked ? -1 : 0; 7780 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7781 DefaultVal); 7782 } 7783 7784 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7785 if (NegLoIdx != -1) { 7786 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7787 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7788 } 7789 7790 const int Ops[] = { AMDGPU::OpName::src0, 7791 AMDGPU::OpName::src1, 7792 AMDGPU::OpName::src2 }; 7793 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7794 AMDGPU::OpName::src1_modifiers, 7795 AMDGPU::OpName::src2_modifiers }; 7796 7797 unsigned OpSel = 0; 7798 unsigned OpSelHi = 0; 7799 unsigned NegLo = 0; 7800 unsigned NegHi = 0; 7801 7802 if (OpSelIdx != -1) 7803 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7804 7805 if (OpSelHiIdx != -1) 7806 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7807 7808 if (NegLoIdx != -1) { 7809 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7810 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7811 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7812 } 7813 7814 for (int J = 0; J < 3; ++J) { 7815 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7816 if (OpIdx == -1) 7817 break; 7818 7819 uint32_t ModVal = 0; 7820 7821 if ((OpSel & (1 << J)) != 0) 7822 ModVal |= SISrcMods::OP_SEL_0; 7823 7824 if ((OpSelHi & (1 << J)) != 0) 7825 ModVal |= SISrcMods::OP_SEL_1; 7826 7827 if ((NegLo & (1 << J)) != 0) 7828 ModVal |= SISrcMods::NEG; 7829 7830 if ((NegHi & (1 << J)) != 0) 7831 ModVal |= SISrcMods::NEG_HI; 7832 7833 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7834 7835 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7836 } 7837 } 7838 7839 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7840 OptionalImmIndexMap OptIdx; 7841 cvtVOP3(Inst, Operands, OptIdx); 7842 cvtVOP3P(Inst, Operands, OptIdx); 7843 } 7844 7845 //===----------------------------------------------------------------------===// 7846 // dpp 7847 //===----------------------------------------------------------------------===// 7848 7849 bool AMDGPUOperand::isDPP8() const { 7850 return isImmTy(ImmTyDPP8); 7851 } 7852 7853 bool AMDGPUOperand::isDPPCtrl() const { 7854 using namespace AMDGPU::DPP; 7855 7856 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7857 if (result) { 7858 int64_t Imm = getImm(); 7859 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7860 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7861 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7862 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7863 (Imm == DppCtrl::WAVE_SHL1) || 7864 (Imm == DppCtrl::WAVE_ROL1) || 7865 (Imm == DppCtrl::WAVE_SHR1) || 7866 (Imm == DppCtrl::WAVE_ROR1) || 7867 (Imm == DppCtrl::ROW_MIRROR) || 7868 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7869 (Imm == DppCtrl::BCAST15) || 7870 (Imm == DppCtrl::BCAST31) || 7871 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7872 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7873 } 7874 return false; 7875 } 7876 7877 //===----------------------------------------------------------------------===// 7878 // mAI 7879 //===----------------------------------------------------------------------===// 7880 7881 bool AMDGPUOperand::isBLGP() const { 7882 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7883 } 7884 7885 bool AMDGPUOperand::isCBSZ() const { 7886 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7887 } 7888 7889 bool AMDGPUOperand::isABID() const { 7890 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7891 } 7892 7893 bool AMDGPUOperand::isS16Imm() const { 7894 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7895 } 7896 7897 bool AMDGPUOperand::isU16Imm() const { 7898 return isImm() && isUInt<16>(getImm()); 7899 } 7900 7901 //===----------------------------------------------------------------------===// 7902 // dim 7903 //===----------------------------------------------------------------------===// 7904 7905 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7906 // We want to allow "dim:1D" etc., 7907 // but the initial 1 is tokenized as an integer. 7908 std::string Token; 7909 if (isToken(AsmToken::Integer)) { 7910 SMLoc Loc = getToken().getEndLoc(); 7911 Token = std::string(getTokenStr()); 7912 lex(); 7913 if (getLoc() != Loc) 7914 return false; 7915 } 7916 7917 StringRef Suffix; 7918 if (!parseId(Suffix)) 7919 return false; 7920 Token += Suffix; 7921 7922 StringRef DimId = Token; 7923 if (DimId.startswith("SQ_RSRC_IMG_")) 7924 DimId = DimId.drop_front(12); 7925 7926 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7927 if (!DimInfo) 7928 return false; 7929 7930 Encoding = DimInfo->Encoding; 7931 return true; 7932 } 7933 7934 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7935 if (!isGFX10Plus()) 7936 return MatchOperand_NoMatch; 7937 7938 SMLoc S = getLoc(); 7939 7940 if (!trySkipId("dim", AsmToken::Colon)) 7941 return MatchOperand_NoMatch; 7942 7943 unsigned Encoding; 7944 SMLoc Loc = getLoc(); 7945 if (!parseDimId(Encoding)) { 7946 Error(Loc, "invalid dim value"); 7947 return MatchOperand_ParseFail; 7948 } 7949 7950 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7951 AMDGPUOperand::ImmTyDim)); 7952 return MatchOperand_Success; 7953 } 7954 7955 //===----------------------------------------------------------------------===// 7956 // dpp 7957 //===----------------------------------------------------------------------===// 7958 7959 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7960 SMLoc S = getLoc(); 7961 7962 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7963 return MatchOperand_NoMatch; 7964 7965 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7966 7967 int64_t Sels[8]; 7968 7969 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7970 return MatchOperand_ParseFail; 7971 7972 for (size_t i = 0; i < 8; ++i) { 7973 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7974 return MatchOperand_ParseFail; 7975 7976 SMLoc Loc = getLoc(); 7977 if (getParser().parseAbsoluteExpression(Sels[i])) 7978 return MatchOperand_ParseFail; 7979 if (0 > Sels[i] || 7 < Sels[i]) { 7980 Error(Loc, "expected a 3-bit value"); 7981 return MatchOperand_ParseFail; 7982 } 7983 } 7984 7985 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7986 return MatchOperand_ParseFail; 7987 7988 unsigned DPP8 = 0; 7989 for (size_t i = 0; i < 8; ++i) 7990 DPP8 |= (Sels[i] << (i * 3)); 7991 7992 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7993 return MatchOperand_Success; 7994 } 7995 7996 bool 7997 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7998 const OperandVector &Operands) { 7999 if (Ctrl == "row_newbcast") 8000 return isGFX90A(); 8001 8002 if (Ctrl == "row_share" || 8003 Ctrl == "row_xmask") 8004 return isGFX10Plus(); 8005 8006 if (Ctrl == "wave_shl" || 8007 Ctrl == "wave_shr" || 8008 Ctrl == "wave_rol" || 8009 Ctrl == "wave_ror" || 8010 Ctrl == "row_bcast") 8011 return isVI() || isGFX9(); 8012 8013 return Ctrl == "row_mirror" || 8014 Ctrl == "row_half_mirror" || 8015 Ctrl == "quad_perm" || 8016 Ctrl == "row_shl" || 8017 Ctrl == "row_shr" || 8018 Ctrl == "row_ror"; 8019 } 8020 8021 int64_t 8022 AMDGPUAsmParser::parseDPPCtrlPerm() { 8023 // quad_perm:[%d,%d,%d,%d] 8024 8025 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8026 return -1; 8027 8028 int64_t Val = 0; 8029 for (int i = 0; i < 4; ++i) { 8030 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8031 return -1; 8032 8033 int64_t Temp; 8034 SMLoc Loc = getLoc(); 8035 if (getParser().parseAbsoluteExpression(Temp)) 8036 return -1; 8037 if (Temp < 0 || Temp > 3) { 8038 Error(Loc, "expected a 2-bit value"); 8039 return -1; 8040 } 8041 8042 Val += (Temp << i * 2); 8043 } 8044 8045 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8046 return -1; 8047 8048 return Val; 8049 } 8050 8051 int64_t 8052 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8053 using namespace AMDGPU::DPP; 8054 8055 // sel:%d 8056 8057 int64_t Val; 8058 SMLoc Loc = getLoc(); 8059 8060 if (getParser().parseAbsoluteExpression(Val)) 8061 return -1; 8062 8063 struct DppCtrlCheck { 8064 int64_t Ctrl; 8065 int Lo; 8066 int Hi; 8067 }; 8068 8069 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8070 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8071 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8072 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8073 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8074 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8075 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8076 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8077 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8078 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8079 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8080 .Default({-1, 0, 0}); 8081 8082 bool Valid; 8083 if (Check.Ctrl == -1) { 8084 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8085 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8086 } else { 8087 Valid = Check.Lo <= Val && Val <= Check.Hi; 8088 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8089 } 8090 8091 if (!Valid) { 8092 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8093 return -1; 8094 } 8095 8096 return Val; 8097 } 8098 8099 OperandMatchResultTy 8100 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8101 using namespace AMDGPU::DPP; 8102 8103 if (!isToken(AsmToken::Identifier) || 8104 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8105 return MatchOperand_NoMatch; 8106 8107 SMLoc S = getLoc(); 8108 int64_t Val = -1; 8109 StringRef Ctrl; 8110 8111 parseId(Ctrl); 8112 8113 if (Ctrl == "row_mirror") { 8114 Val = DppCtrl::ROW_MIRROR; 8115 } else if (Ctrl == "row_half_mirror") { 8116 Val = DppCtrl::ROW_HALF_MIRROR; 8117 } else { 8118 if (skipToken(AsmToken::Colon, "expected a colon")) { 8119 if (Ctrl == "quad_perm") { 8120 Val = parseDPPCtrlPerm(); 8121 } else { 8122 Val = parseDPPCtrlSel(Ctrl); 8123 } 8124 } 8125 } 8126 8127 if (Val == -1) 8128 return MatchOperand_ParseFail; 8129 8130 Operands.push_back( 8131 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8132 return MatchOperand_Success; 8133 } 8134 8135 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8136 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8137 } 8138 8139 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8140 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8141 } 8142 8143 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8144 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8145 } 8146 8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8148 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8149 } 8150 8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8152 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8153 } 8154 8155 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8156 OptionalImmIndexMap OptionalIdx; 8157 8158 unsigned Opc = Inst.getOpcode(); 8159 bool HasModifiers = 8160 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8161 unsigned I = 1; 8162 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8163 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8164 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8165 } 8166 8167 int Fi = 0; 8168 for (unsigned E = Operands.size(); I != E; ++I) { 8169 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8170 MCOI::TIED_TO); 8171 if (TiedTo != -1) { 8172 assert((unsigned)TiedTo < Inst.getNumOperands()); 8173 // handle tied old or src2 for MAC instructions 8174 Inst.addOperand(Inst.getOperand(TiedTo)); 8175 } 8176 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8177 // Add the register arguments 8178 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8179 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8180 // Skip it. 8181 continue; 8182 } 8183 8184 if (IsDPP8) { 8185 if (Op.isDPP8()) { 8186 Op.addImmOperands(Inst, 1); 8187 } else if (HasModifiers && 8188 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8189 Op.addRegWithFPInputModsOperands(Inst, 2); 8190 } else if (Op.isFI()) { 8191 Fi = Op.getImm(); 8192 } else if (Op.isReg()) { 8193 Op.addRegOperands(Inst, 1); 8194 } else { 8195 llvm_unreachable("Invalid operand type"); 8196 } 8197 } else { 8198 if (HasModifiers && 8199 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8200 Op.addRegWithFPInputModsOperands(Inst, 2); 8201 } else if (Op.isReg()) { 8202 Op.addRegOperands(Inst, 1); 8203 } else if (Op.isDPPCtrl()) { 8204 Op.addImmOperands(Inst, 1); 8205 } else if (Op.isImm()) { 8206 // Handle optional arguments 8207 OptionalIdx[Op.getImmTy()] = I; 8208 } else { 8209 llvm_unreachable("Invalid operand type"); 8210 } 8211 } 8212 } 8213 8214 if (IsDPP8) { 8215 using namespace llvm::AMDGPU::DPP; 8216 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8217 } else { 8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8221 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8222 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8223 } 8224 } 8225 } 8226 8227 //===----------------------------------------------------------------------===// 8228 // sdwa 8229 //===----------------------------------------------------------------------===// 8230 8231 OperandMatchResultTy 8232 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8233 AMDGPUOperand::ImmTy Type) { 8234 using namespace llvm::AMDGPU::SDWA; 8235 8236 SMLoc S = getLoc(); 8237 StringRef Value; 8238 OperandMatchResultTy res; 8239 8240 SMLoc StringLoc; 8241 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8242 if (res != MatchOperand_Success) { 8243 return res; 8244 } 8245 8246 int64_t Int; 8247 Int = StringSwitch<int64_t>(Value) 8248 .Case("BYTE_0", SdwaSel::BYTE_0) 8249 .Case("BYTE_1", SdwaSel::BYTE_1) 8250 .Case("BYTE_2", SdwaSel::BYTE_2) 8251 .Case("BYTE_3", SdwaSel::BYTE_3) 8252 .Case("WORD_0", SdwaSel::WORD_0) 8253 .Case("WORD_1", SdwaSel::WORD_1) 8254 .Case("DWORD", SdwaSel::DWORD) 8255 .Default(0xffffffff); 8256 8257 if (Int == 0xffffffff) { 8258 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8259 return MatchOperand_ParseFail; 8260 } 8261 8262 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8263 return MatchOperand_Success; 8264 } 8265 8266 OperandMatchResultTy 8267 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8268 using namespace llvm::AMDGPU::SDWA; 8269 8270 SMLoc S = getLoc(); 8271 StringRef Value; 8272 OperandMatchResultTy res; 8273 8274 SMLoc StringLoc; 8275 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8276 if (res != MatchOperand_Success) { 8277 return res; 8278 } 8279 8280 int64_t Int; 8281 Int = StringSwitch<int64_t>(Value) 8282 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8283 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8284 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8285 .Default(0xffffffff); 8286 8287 if (Int == 0xffffffff) { 8288 Error(StringLoc, "invalid dst_unused value"); 8289 return MatchOperand_ParseFail; 8290 } 8291 8292 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8293 return MatchOperand_Success; 8294 } 8295 8296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8297 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8298 } 8299 8300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8301 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8302 } 8303 8304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8305 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8306 } 8307 8308 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8309 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8310 } 8311 8312 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8313 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8314 } 8315 8316 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8317 uint64_t BasicInstType, 8318 bool SkipDstVcc, 8319 bool SkipSrcVcc) { 8320 using namespace llvm::AMDGPU::SDWA; 8321 8322 OptionalImmIndexMap OptionalIdx; 8323 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8324 bool SkippedVcc = false; 8325 8326 unsigned I = 1; 8327 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8328 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8329 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8330 } 8331 8332 for (unsigned E = Operands.size(); I != E; ++I) { 8333 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8334 if (SkipVcc && !SkippedVcc && Op.isReg() && 8335 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8336 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8337 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8338 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8339 // Skip VCC only if we didn't skip it on previous iteration. 8340 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8341 if (BasicInstType == SIInstrFlags::VOP2 && 8342 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8343 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8344 SkippedVcc = true; 8345 continue; 8346 } else if (BasicInstType == SIInstrFlags::VOPC && 8347 Inst.getNumOperands() == 0) { 8348 SkippedVcc = true; 8349 continue; 8350 } 8351 } 8352 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8353 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8354 } else if (Op.isImm()) { 8355 // Handle optional arguments 8356 OptionalIdx[Op.getImmTy()] = I; 8357 } else { 8358 llvm_unreachable("Invalid operand type"); 8359 } 8360 SkippedVcc = false; 8361 } 8362 8363 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8364 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8365 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8366 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8367 switch (BasicInstType) { 8368 case SIInstrFlags::VOP1: 8369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8370 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8371 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8372 } 8373 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8376 break; 8377 8378 case SIInstrFlags::VOP2: 8379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8380 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8382 } 8383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8385 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8387 break; 8388 8389 case SIInstrFlags::VOPC: 8390 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8394 break; 8395 8396 default: 8397 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8398 } 8399 } 8400 8401 // special case v_mac_{f16, f32}: 8402 // it has src2 register operand that is tied to dst operand 8403 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8404 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8405 auto it = Inst.begin(); 8406 std::advance( 8407 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8408 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8409 } 8410 } 8411 8412 //===----------------------------------------------------------------------===// 8413 // mAI 8414 //===----------------------------------------------------------------------===// 8415 8416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8417 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8418 } 8419 8420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8421 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8422 } 8423 8424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8425 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8426 } 8427 8428 /// Force static initialization. 8429 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8430 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8431 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8432 } 8433 8434 #define GET_REGISTER_MATCHER 8435 #define GET_MATCHER_IMPLEMENTATION 8436 #define GET_MNEMONIC_SPELL_CHECKER 8437 #define GET_MNEMONIC_CHECKER 8438 #include "AMDGPUGenAsmMatcher.inc" 8439 8440 // This function should be defined after auto-generated include so that we have 8441 // MatchClassKind enum defined 8442 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8443 unsigned Kind) { 8444 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8445 // But MatchInstructionImpl() expects to meet token and fails to validate 8446 // operand. This method checks if we are given immediate operand but expect to 8447 // get corresponding token. 8448 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8449 switch (Kind) { 8450 case MCK_addr64: 8451 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8452 case MCK_gds: 8453 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8454 case MCK_lds: 8455 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8456 case MCK_idxen: 8457 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8458 case MCK_offen: 8459 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8460 case MCK_SSrcB32: 8461 // When operands have expression values, they will return true for isToken, 8462 // because it is not possible to distinguish between a token and an 8463 // expression at parse time. MatchInstructionImpl() will always try to 8464 // match an operand as a token, when isToken returns true, and when the 8465 // name of the expression is not a valid token, the match will fail, 8466 // so we need to handle it here. 8467 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8468 case MCK_SSrcF32: 8469 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8470 case MCK_SoppBrTarget: 8471 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8472 case MCK_VReg32OrOff: 8473 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8474 case MCK_InterpSlot: 8475 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8476 case MCK_Attr: 8477 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8478 case MCK_AttrChan: 8479 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8480 case MCK_ImmSMEMOffset: 8481 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8482 case MCK_SReg_64: 8483 case MCK_SReg_64_XEXEC: 8484 // Null is defined as a 32-bit register but 8485 // it should also be enabled with 64-bit operands. 8486 // The following code enables it for SReg_64 operands 8487 // used as source and destination. Remaining source 8488 // operands are handled in isInlinableImm. 8489 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8490 default: 8491 return Match_InvalidOperand; 8492 } 8493 } 8494 8495 //===----------------------------------------------------------------------===// 8496 // endpgm 8497 //===----------------------------------------------------------------------===// 8498 8499 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8500 SMLoc S = getLoc(); 8501 int64_t Imm = 0; 8502 8503 if (!parseExpr(Imm)) { 8504 // The operand is optional, if not present default to 0 8505 Imm = 0; 8506 } 8507 8508 if (!isUInt<16>(Imm)) { 8509 Error(S, "expected a 16-bit value"); 8510 return MatchOperand_ParseFail; 8511 } 8512 8513 Operands.push_back( 8514 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8515 return MatchOperand_Success; 8516 } 8517 8518 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8519