1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isHwreg() const; 822 bool isSendMsg() const; 823 bool isSwizzle() const; 824 bool isSMRDOffset8() const; 825 bool isSMEMOffset() const; 826 bool isSMRDLiteralOffset() const; 827 bool isDPP8() const; 828 bool isDPPCtrl() const; 829 bool isBLGP() const; 830 bool isCBSZ() const; 831 bool isABID() const; 832 bool isGPRIdxMode() const; 833 bool isS16Imm() const; 834 bool isU16Imm() const; 835 bool isEndpgm() const; 836 837 StringRef getExpressionAsToken() const { 838 assert(isExpr()); 839 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 840 return S->getSymbol().getName(); 841 } 842 843 StringRef getToken() const { 844 assert(isToken()); 845 846 if (Kind == Expression) 847 return getExpressionAsToken(); 848 849 return StringRef(Tok.Data, Tok.Length); 850 } 851 852 int64_t getImm() const { 853 assert(isImm()); 854 return Imm.Val; 855 } 856 857 void setImm(int64_t Val) { 858 assert(isImm()); 859 Imm.Val = Val; 860 } 861 862 ImmTy getImmTy() const { 863 assert(isImm()); 864 return Imm.Type; 865 } 866 867 unsigned getReg() const override { 868 assert(isRegKind()); 869 return Reg.RegNo; 870 } 871 872 SMLoc getStartLoc() const override { 873 return StartLoc; 874 } 875 876 SMLoc getEndLoc() const override { 877 return EndLoc; 878 } 879 880 SMRange getLocRange() const { 881 return SMRange(StartLoc, EndLoc); 882 } 883 884 Modifiers getModifiers() const { 885 assert(isRegKind() || isImmTy(ImmTyNone)); 886 return isRegKind() ? Reg.Mods : Imm.Mods; 887 } 888 889 void setModifiers(Modifiers Mods) { 890 assert(isRegKind() || isImmTy(ImmTyNone)); 891 if (isRegKind()) 892 Reg.Mods = Mods; 893 else 894 Imm.Mods = Mods; 895 } 896 897 bool hasModifiers() const { 898 return getModifiers().hasModifiers(); 899 } 900 901 bool hasFPModifiers() const { 902 return getModifiers().hasFPModifiers(); 903 } 904 905 bool hasIntModifiers() const { 906 return getModifiers().hasIntModifiers(); 907 } 908 909 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 910 911 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 912 913 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 914 915 template <unsigned Bitwidth> 916 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 917 918 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 919 addKImmFPOperands<16>(Inst, N); 920 } 921 922 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 923 addKImmFPOperands<32>(Inst, N); 924 } 925 926 void addRegOperands(MCInst &Inst, unsigned N) const; 927 928 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 929 addRegOperands(Inst, N); 930 } 931 932 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 933 if (isRegKind()) 934 addRegOperands(Inst, N); 935 else if (isExpr()) 936 Inst.addOperand(MCOperand::createExpr(Expr)); 937 else 938 addImmOperands(Inst, N); 939 } 940 941 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 942 Modifiers Mods = getModifiers(); 943 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 944 if (isRegKind()) { 945 addRegOperands(Inst, N); 946 } else { 947 addImmOperands(Inst, N, false); 948 } 949 } 950 951 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 952 assert(!hasIntModifiers()); 953 addRegOrImmWithInputModsOperands(Inst, N); 954 } 955 956 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 957 assert(!hasFPModifiers()); 958 addRegOrImmWithInputModsOperands(Inst, N); 959 } 960 961 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 962 Modifiers Mods = getModifiers(); 963 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 964 assert(isRegKind()); 965 addRegOperands(Inst, N); 966 } 967 968 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 969 assert(!hasIntModifiers()); 970 addRegWithInputModsOperands(Inst, N); 971 } 972 973 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 974 assert(!hasFPModifiers()); 975 addRegWithInputModsOperands(Inst, N); 976 } 977 978 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 979 if (isImm()) 980 addImmOperands(Inst, N); 981 else { 982 assert(isExpr()); 983 Inst.addOperand(MCOperand::createExpr(Expr)); 984 } 985 } 986 987 static void printImmTy(raw_ostream& OS, ImmTy Type) { 988 switch (Type) { 989 case ImmTyNone: OS << "None"; break; 990 case ImmTyGDS: OS << "GDS"; break; 991 case ImmTyLDS: OS << "LDS"; break; 992 case ImmTyOffen: OS << "Offen"; break; 993 case ImmTyIdxen: OS << "Idxen"; break; 994 case ImmTyAddr64: OS << "Addr64"; break; 995 case ImmTyOffset: OS << "Offset"; break; 996 case ImmTyInstOffset: OS << "InstOffset"; break; 997 case ImmTyOffset0: OS << "Offset0"; break; 998 case ImmTyOffset1: OS << "Offset1"; break; 999 case ImmTyCPol: OS << "CPol"; break; 1000 case ImmTySWZ: OS << "SWZ"; break; 1001 case ImmTyTFE: OS << "TFE"; break; 1002 case ImmTyD16: OS << "D16"; break; 1003 case ImmTyFORMAT: OS << "FORMAT"; break; 1004 case ImmTyClampSI: OS << "ClampSI"; break; 1005 case ImmTyOModSI: OS << "OModSI"; break; 1006 case ImmTyDPP8: OS << "DPP8"; break; 1007 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1008 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1009 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1010 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1011 case ImmTyDppFi: OS << "FI"; break; 1012 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1013 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1014 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1015 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1016 case ImmTyDMask: OS << "DMask"; break; 1017 case ImmTyDim: OS << "Dim"; break; 1018 case ImmTyUNorm: OS << "UNorm"; break; 1019 case ImmTyDA: OS << "DA"; break; 1020 case ImmTyR128A16: OS << "R128A16"; break; 1021 case ImmTyA16: OS << "A16"; break; 1022 case ImmTyLWE: OS << "LWE"; break; 1023 case ImmTyOff: OS << "Off"; break; 1024 case ImmTyExpTgt: OS << "ExpTgt"; break; 1025 case ImmTyExpCompr: OS << "ExpCompr"; break; 1026 case ImmTyExpVM: OS << "ExpVM"; break; 1027 case ImmTyHwreg: OS << "Hwreg"; break; 1028 case ImmTySendMsg: OS << "SendMsg"; break; 1029 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1030 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1031 case ImmTyAttrChan: OS << "AttrChan"; break; 1032 case ImmTyOpSel: OS << "OpSel"; break; 1033 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1034 case ImmTyNegLo: OS << "NegLo"; break; 1035 case ImmTyNegHi: OS << "NegHi"; break; 1036 case ImmTySwizzle: OS << "Swizzle"; break; 1037 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1038 case ImmTyHigh: OS << "High"; break; 1039 case ImmTyBLGP: OS << "BLGP"; break; 1040 case ImmTyCBSZ: OS << "CBSZ"; break; 1041 case ImmTyABID: OS << "ABID"; break; 1042 case ImmTyEndpgm: OS << "Endpgm"; break; 1043 } 1044 } 1045 1046 void print(raw_ostream &OS) const override { 1047 switch (Kind) { 1048 case Register: 1049 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1050 break; 1051 case Immediate: 1052 OS << '<' << getImm(); 1053 if (getImmTy() != ImmTyNone) { 1054 OS << " type: "; printImmTy(OS, getImmTy()); 1055 } 1056 OS << " mods: " << Imm.Mods << '>'; 1057 break; 1058 case Token: 1059 OS << '\'' << getToken() << '\''; 1060 break; 1061 case Expression: 1062 OS << "<expr " << *Expr << '>'; 1063 break; 1064 } 1065 } 1066 1067 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1068 int64_t Val, SMLoc Loc, 1069 ImmTy Type = ImmTyNone, 1070 bool IsFPImm = false) { 1071 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1072 Op->Imm.Val = Val; 1073 Op->Imm.IsFPImm = IsFPImm; 1074 Op->Imm.Kind = ImmKindTyNone; 1075 Op->Imm.Type = Type; 1076 Op->Imm.Mods = Modifiers(); 1077 Op->StartLoc = Loc; 1078 Op->EndLoc = Loc; 1079 return Op; 1080 } 1081 1082 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1083 StringRef Str, SMLoc Loc, 1084 bool HasExplicitEncodingSize = true) { 1085 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1086 Res->Tok.Data = Str.data(); 1087 Res->Tok.Length = Str.size(); 1088 Res->StartLoc = Loc; 1089 Res->EndLoc = Loc; 1090 return Res; 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1094 unsigned RegNo, SMLoc S, 1095 SMLoc E) { 1096 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1097 Op->Reg.RegNo = RegNo; 1098 Op->Reg.Mods = Modifiers(); 1099 Op->StartLoc = S; 1100 Op->EndLoc = E; 1101 return Op; 1102 } 1103 1104 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1105 const class MCExpr *Expr, SMLoc S) { 1106 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1107 Op->Expr = Expr; 1108 Op->StartLoc = S; 1109 Op->EndLoc = S; 1110 return Op; 1111 } 1112 }; 1113 1114 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1115 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1116 return OS; 1117 } 1118 1119 //===----------------------------------------------------------------------===// 1120 // AsmParser 1121 //===----------------------------------------------------------------------===// 1122 1123 // Holds info related to the current kernel, e.g. count of SGPRs used. 1124 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1125 // .amdgpu_hsa_kernel or at EOF. 1126 class KernelScopeInfo { 1127 int SgprIndexUnusedMin = -1; 1128 int VgprIndexUnusedMin = -1; 1129 int AgprIndexUnusedMin = -1; 1130 MCContext *Ctx = nullptr; 1131 MCSubtargetInfo const *MSTI = nullptr; 1132 1133 void usesSgprAt(int i) { 1134 if (i >= SgprIndexUnusedMin) { 1135 SgprIndexUnusedMin = ++i; 1136 if (Ctx) { 1137 MCSymbol* const Sym = 1138 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 void usesVgprAt(int i) { 1145 if (i >= VgprIndexUnusedMin) { 1146 VgprIndexUnusedMin = ++i; 1147 if (Ctx) { 1148 MCSymbol* const Sym = 1149 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1150 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1151 VgprIndexUnusedMin); 1152 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1153 } 1154 } 1155 } 1156 1157 void usesAgprAt(int i) { 1158 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1159 if (!hasMAIInsts(*MSTI)) 1160 return; 1161 1162 if (i >= AgprIndexUnusedMin) { 1163 AgprIndexUnusedMin = ++i; 1164 if (Ctx) { 1165 MCSymbol* const Sym = 1166 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1167 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1168 1169 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1170 MCSymbol* const vSym = 1171 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1172 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1173 VgprIndexUnusedMin); 1174 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1175 } 1176 } 1177 } 1178 1179 public: 1180 KernelScopeInfo() = default; 1181 1182 void initialize(MCContext &Context) { 1183 Ctx = &Context; 1184 MSTI = Ctx->getSubtargetInfo(); 1185 1186 usesSgprAt(SgprIndexUnusedMin = -1); 1187 usesVgprAt(VgprIndexUnusedMin = -1); 1188 if (hasMAIInsts(*MSTI)) { 1189 usesAgprAt(AgprIndexUnusedMin = -1); 1190 } 1191 } 1192 1193 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1194 unsigned RegWidth) { 1195 switch (RegKind) { 1196 case IS_SGPR: 1197 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1198 break; 1199 case IS_AGPR: 1200 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1201 break; 1202 case IS_VGPR: 1203 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1204 break; 1205 default: 1206 break; 1207 } 1208 } 1209 }; 1210 1211 class AMDGPUAsmParser : public MCTargetAsmParser { 1212 MCAsmParser &Parser; 1213 1214 // Number of extra operands parsed after the first optional operand. 1215 // This may be necessary to skip hardcoded mandatory operands. 1216 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1217 1218 unsigned ForcedEncodingSize = 0; 1219 bool ForcedDPP = false; 1220 bool ForcedSDWA = false; 1221 KernelScopeInfo KernelScope; 1222 unsigned CPolSeen; 1223 1224 /// @name Auto-generated Match Functions 1225 /// { 1226 1227 #define GET_ASSEMBLER_HEADER 1228 #include "AMDGPUGenAsmMatcher.inc" 1229 1230 /// } 1231 1232 private: 1233 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1234 bool OutOfRangeError(SMRange Range); 1235 /// Calculate VGPR/SGPR blocks required for given target, reserved 1236 /// registers, and user-specified NextFreeXGPR values. 1237 /// 1238 /// \param Features [in] Target features, used for bug corrections. 1239 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1240 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1241 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1242 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1243 /// descriptor field, if valid. 1244 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1245 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1246 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1247 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1248 /// \param VGPRBlocks [out] Result VGPR block count. 1249 /// \param SGPRBlocks [out] Result SGPR block count. 1250 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1251 bool FlatScrUsed, bool XNACKUsed, 1252 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1253 SMRange VGPRRange, unsigned NextFreeSGPR, 1254 SMRange SGPRRange, unsigned &VGPRBlocks, 1255 unsigned &SGPRBlocks); 1256 bool ParseDirectiveAMDGCNTarget(); 1257 bool ParseDirectiveAMDHSAKernel(); 1258 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1259 bool ParseDirectiveHSACodeObjectVersion(); 1260 bool ParseDirectiveHSACodeObjectISA(); 1261 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1262 bool ParseDirectiveAMDKernelCodeT(); 1263 // TODO: Possibly make subtargetHasRegister const. 1264 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1265 bool ParseDirectiveAMDGPUHsaKernel(); 1266 1267 bool ParseDirectiveISAVersion(); 1268 bool ParseDirectiveHSAMetadata(); 1269 bool ParseDirectivePALMetadataBegin(); 1270 bool ParseDirectivePALMetadata(); 1271 bool ParseDirectiveAMDGPULDS(); 1272 1273 /// Common code to parse out a block of text (typically YAML) between start and 1274 /// end directives. 1275 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1276 const char *AssemblerDirectiveEnd, 1277 std::string &CollectString); 1278 1279 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1280 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1281 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1282 unsigned &RegNum, unsigned &RegWidth, 1283 bool RestoreOnFailure = false); 1284 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1285 unsigned &RegNum, unsigned &RegWidth, 1286 SmallVectorImpl<AsmToken> &Tokens); 1287 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1288 unsigned &RegWidth, 1289 SmallVectorImpl<AsmToken> &Tokens); 1290 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1291 unsigned &RegWidth, 1292 SmallVectorImpl<AsmToken> &Tokens); 1293 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1294 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1295 bool ParseRegRange(unsigned& Num, unsigned& Width); 1296 unsigned getRegularReg(RegisterKind RegKind, 1297 unsigned RegNum, 1298 unsigned RegWidth, 1299 SMLoc Loc); 1300 1301 bool isRegister(); 1302 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1303 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1304 void initializeGprCountSymbol(RegisterKind RegKind); 1305 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1306 unsigned RegWidth); 1307 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1308 bool IsAtomic, bool IsLds = false); 1309 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1310 bool IsGdsHardcoded); 1311 1312 public: 1313 enum AMDGPUMatchResultTy { 1314 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1315 }; 1316 enum OperandMode { 1317 OperandMode_Default, 1318 OperandMode_NSA, 1319 }; 1320 1321 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1322 1323 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1324 const MCInstrInfo &MII, 1325 const MCTargetOptions &Options) 1326 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1327 MCAsmParserExtension::Initialize(Parser); 1328 1329 if (getFeatureBits().none()) { 1330 // Set default features. 1331 copySTI().ToggleFeature("southern-islands"); 1332 } 1333 1334 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1335 1336 { 1337 // TODO: make those pre-defined variables read-only. 1338 // Currently there is none suitable machinery in the core llvm-mc for this. 1339 // MCSymbol::isRedefinable is intended for another purpose, and 1340 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1341 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1342 MCContext &Ctx = getContext(); 1343 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1344 MCSymbol *Sym = 1345 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1346 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1347 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1348 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1349 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1351 } else { 1352 MCSymbol *Sym = 1353 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1355 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1359 } 1360 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1361 initializeGprCountSymbol(IS_VGPR); 1362 initializeGprCountSymbol(IS_SGPR); 1363 } else 1364 KernelScope.initialize(getContext()); 1365 } 1366 } 1367 1368 bool hasMIMG_R128() const { 1369 return AMDGPU::hasMIMG_R128(getSTI()); 1370 } 1371 1372 bool hasPackedD16() const { 1373 return AMDGPU::hasPackedD16(getSTI()); 1374 } 1375 1376 bool hasGFX10A16() const { 1377 return AMDGPU::hasGFX10A16(getSTI()); 1378 } 1379 1380 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1381 1382 bool isSI() const { 1383 return AMDGPU::isSI(getSTI()); 1384 } 1385 1386 bool isCI() const { 1387 return AMDGPU::isCI(getSTI()); 1388 } 1389 1390 bool isVI() const { 1391 return AMDGPU::isVI(getSTI()); 1392 } 1393 1394 bool isGFX9() const { 1395 return AMDGPU::isGFX9(getSTI()); 1396 } 1397 1398 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1399 bool isGFX90A() const { 1400 return AMDGPU::isGFX90A(getSTI()); 1401 } 1402 1403 bool isGFX940() const { 1404 return AMDGPU::isGFX940(getSTI()); 1405 } 1406 1407 bool isGFX9Plus() const { 1408 return AMDGPU::isGFX9Plus(getSTI()); 1409 } 1410 1411 bool isGFX10() const { 1412 return AMDGPU::isGFX10(getSTI()); 1413 } 1414 1415 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1416 1417 bool isGFX10_BEncoding() const { 1418 return AMDGPU::isGFX10_BEncoding(getSTI()); 1419 } 1420 1421 bool hasInv2PiInlineImm() const { 1422 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1423 } 1424 1425 bool hasFlatOffsets() const { 1426 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1427 } 1428 1429 bool hasArchitectedFlatScratch() const { 1430 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1431 } 1432 1433 bool hasSGPR102_SGPR103() const { 1434 return !isVI() && !isGFX9(); 1435 } 1436 1437 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1438 1439 bool hasIntClamp() const { 1440 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1441 } 1442 1443 AMDGPUTargetStreamer &getTargetStreamer() { 1444 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1445 return static_cast<AMDGPUTargetStreamer &>(TS); 1446 } 1447 1448 const MCRegisterInfo *getMRI() const { 1449 // We need this const_cast because for some reason getContext() is not const 1450 // in MCAsmParser. 1451 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1452 } 1453 1454 const MCInstrInfo *getMII() const { 1455 return &MII; 1456 } 1457 1458 const FeatureBitset &getFeatureBits() const { 1459 return getSTI().getFeatureBits(); 1460 } 1461 1462 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1463 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1464 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1465 1466 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1467 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1468 bool isForcedDPP() const { return ForcedDPP; } 1469 bool isForcedSDWA() const { return ForcedSDWA; } 1470 ArrayRef<unsigned> getMatchedVariants() const; 1471 StringRef getMatchedVariantName() const; 1472 1473 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1474 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1475 bool RestoreOnFailure); 1476 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1477 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1478 SMLoc &EndLoc) override; 1479 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1480 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1481 unsigned Kind) override; 1482 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1483 OperandVector &Operands, MCStreamer &Out, 1484 uint64_t &ErrorInfo, 1485 bool MatchingInlineAsm) override; 1486 bool ParseDirective(AsmToken DirectiveID) override; 1487 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1488 OperandMode Mode = OperandMode_Default); 1489 StringRef parseMnemonicSuffix(StringRef Name); 1490 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1491 SMLoc NameLoc, OperandVector &Operands) override; 1492 //bool ProcessInstruction(MCInst &Inst); 1493 1494 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1495 1496 OperandMatchResultTy 1497 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1498 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1499 bool (*ConvertResult)(int64_t &) = nullptr); 1500 1501 OperandMatchResultTy 1502 parseOperandArrayWithPrefix(const char *Prefix, 1503 OperandVector &Operands, 1504 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1505 bool (*ConvertResult)(int64_t&) = nullptr); 1506 1507 OperandMatchResultTy 1508 parseNamedBit(StringRef Name, OperandVector &Operands, 1509 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1510 OperandMatchResultTy parseCPol(OperandVector &Operands); 1511 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1512 StringRef &Value, 1513 SMLoc &StringLoc); 1514 1515 bool isModifier(); 1516 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1517 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool parseSP3NegModifier(); 1521 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1522 OperandMatchResultTy parseReg(OperandVector &Operands); 1523 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1524 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1525 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1527 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1529 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1530 OperandMatchResultTy parseUfmt(int64_t &Format); 1531 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1532 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1534 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1535 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1536 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1537 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1538 1539 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1540 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1541 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1542 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1543 1544 bool parseCnt(int64_t &IntVal); 1545 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1546 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1547 1548 private: 1549 struct OperandInfoTy { 1550 SMLoc Loc; 1551 int64_t Id; 1552 bool IsSymbolic = false; 1553 bool IsDefined = false; 1554 1555 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1556 }; 1557 1558 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1559 bool validateSendMsg(const OperandInfoTy &Msg, 1560 const OperandInfoTy &Op, 1561 const OperandInfoTy &Stream); 1562 1563 bool parseHwregBody(OperandInfoTy &HwReg, 1564 OperandInfoTy &Offset, 1565 OperandInfoTy &Width); 1566 bool validateHwreg(const OperandInfoTy &HwReg, 1567 const OperandInfoTy &Offset, 1568 const OperandInfoTy &Width); 1569 1570 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1571 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1572 1573 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1574 const OperandVector &Operands) const; 1575 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1576 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1577 SMLoc getLitLoc(const OperandVector &Operands) const; 1578 SMLoc getConstLoc(const OperandVector &Operands) const; 1579 1580 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1581 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1582 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1583 bool validateSOPLiteral(const MCInst &Inst) const; 1584 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1585 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1586 bool validateIntClampSupported(const MCInst &Inst); 1587 bool validateMIMGAtomicDMask(const MCInst &Inst); 1588 bool validateMIMGGatherDMask(const MCInst &Inst); 1589 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateMIMGDataSize(const MCInst &Inst); 1591 bool validateMIMGAddrSize(const MCInst &Inst); 1592 bool validateMIMGD16(const MCInst &Inst); 1593 bool validateMIMGDim(const MCInst &Inst); 1594 bool validateMIMGMSAA(const MCInst &Inst); 1595 bool validateOpSel(const MCInst &Inst); 1596 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1597 bool validateVccOperand(unsigned Reg) const; 1598 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1599 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1600 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1601 bool validateAGPRLdSt(const MCInst &Inst) const; 1602 bool validateVGPRAlign(const MCInst &Inst) const; 1603 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateDivScale(const MCInst &Inst); 1605 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1606 const SMLoc &IDLoc); 1607 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1608 unsigned getConstantBusLimit(unsigned Opcode) const; 1609 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1610 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1611 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1612 1613 bool isSupportedMnemo(StringRef Mnemo, 1614 const FeatureBitset &FBS); 1615 bool isSupportedMnemo(StringRef Mnemo, 1616 const FeatureBitset &FBS, 1617 ArrayRef<unsigned> Variants); 1618 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1619 1620 bool isId(const StringRef Id) const; 1621 bool isId(const AsmToken &Token, const StringRef Id) const; 1622 bool isToken(const AsmToken::TokenKind Kind) const; 1623 bool trySkipId(const StringRef Id); 1624 bool trySkipId(const StringRef Pref, const StringRef Id); 1625 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1626 bool trySkipToken(const AsmToken::TokenKind Kind); 1627 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1628 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1629 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1630 1631 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1632 AsmToken::TokenKind getTokenKind() const; 1633 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1634 bool parseExpr(OperandVector &Operands); 1635 StringRef getTokenStr() const; 1636 AsmToken peekToken(); 1637 AsmToken getToken() const; 1638 SMLoc getLoc() const; 1639 void lex(); 1640 1641 public: 1642 void onBeginOfFile() override; 1643 1644 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1645 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1646 1647 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1648 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1649 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1650 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1651 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1652 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1653 1654 bool parseSwizzleOperand(int64_t &Op, 1655 const unsigned MinVal, 1656 const unsigned MaxVal, 1657 const StringRef ErrMsg, 1658 SMLoc &Loc); 1659 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1660 const unsigned MinVal, 1661 const unsigned MaxVal, 1662 const StringRef ErrMsg); 1663 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1664 bool parseSwizzleOffset(int64_t &Imm); 1665 bool parseSwizzleMacro(int64_t &Imm); 1666 bool parseSwizzleQuadPerm(int64_t &Imm); 1667 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1668 bool parseSwizzleBroadcast(int64_t &Imm); 1669 bool parseSwizzleSwap(int64_t &Imm); 1670 bool parseSwizzleReverse(int64_t &Imm); 1671 1672 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1673 int64_t parseGPRIdxMacro(); 1674 1675 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1676 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1677 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1678 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1679 1680 AMDGPUOperand::Ptr defaultCPol() const; 1681 1682 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1683 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1684 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1685 AMDGPUOperand::Ptr defaultFlatOffset() const; 1686 1687 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1688 1689 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1690 OptionalImmIndexMap &OptionalIdx); 1691 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1692 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1693 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1694 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1695 OptionalImmIndexMap &OptionalIdx); 1696 1697 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1698 1699 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1700 bool IsAtomic = false); 1701 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1702 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1703 1704 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1705 1706 bool parseDimId(unsigned &Encoding); 1707 OperandMatchResultTy parseDim(OperandVector &Operands); 1708 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1709 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1710 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1711 int64_t parseDPPCtrlSel(StringRef Ctrl); 1712 int64_t parseDPPCtrlPerm(); 1713 AMDGPUOperand::Ptr defaultRowMask() const; 1714 AMDGPUOperand::Ptr defaultBankMask() const; 1715 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1716 AMDGPUOperand::Ptr defaultFI() const; 1717 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1718 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1719 1720 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1721 AMDGPUOperand::ImmTy Type); 1722 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1723 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1724 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1725 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1726 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1727 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1728 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1729 uint64_t BasicInstType, 1730 bool SkipDstVcc = false, 1731 bool SkipSrcVcc = false); 1732 1733 AMDGPUOperand::Ptr defaultBLGP() const; 1734 AMDGPUOperand::Ptr defaultCBSZ() const; 1735 AMDGPUOperand::Ptr defaultABID() const; 1736 1737 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1738 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1739 }; 1740 1741 struct OptionalOperand { 1742 const char *Name; 1743 AMDGPUOperand::ImmTy Type; 1744 bool IsBit; 1745 bool (*ConvertResult)(int64_t&); 1746 }; 1747 1748 } // end anonymous namespace 1749 1750 // May be called with integer type with equivalent bitwidth. 1751 static const fltSemantics *getFltSemantics(unsigned Size) { 1752 switch (Size) { 1753 case 4: 1754 return &APFloat::IEEEsingle(); 1755 case 8: 1756 return &APFloat::IEEEdouble(); 1757 case 2: 1758 return &APFloat::IEEEhalf(); 1759 default: 1760 llvm_unreachable("unsupported fp type"); 1761 } 1762 } 1763 1764 static const fltSemantics *getFltSemantics(MVT VT) { 1765 return getFltSemantics(VT.getSizeInBits() / 8); 1766 } 1767 1768 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1769 switch (OperandType) { 1770 case AMDGPU::OPERAND_REG_IMM_INT32: 1771 case AMDGPU::OPERAND_REG_IMM_FP32: 1772 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1773 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1774 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1775 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1776 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1777 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1778 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1779 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1780 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1781 case AMDGPU::OPERAND_KIMM32: 1782 return &APFloat::IEEEsingle(); 1783 case AMDGPU::OPERAND_REG_IMM_INT64: 1784 case AMDGPU::OPERAND_REG_IMM_FP64: 1785 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1786 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1787 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1788 return &APFloat::IEEEdouble(); 1789 case AMDGPU::OPERAND_REG_IMM_INT16: 1790 case AMDGPU::OPERAND_REG_IMM_FP16: 1791 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1792 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1793 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1794 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1795 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1798 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1799 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1800 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1801 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1802 case AMDGPU::OPERAND_KIMM16: 1803 return &APFloat::IEEEhalf(); 1804 default: 1805 llvm_unreachable("unsupported fp type"); 1806 } 1807 } 1808 1809 //===----------------------------------------------------------------------===// 1810 // Operand 1811 //===----------------------------------------------------------------------===// 1812 1813 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1814 bool Lost; 1815 1816 // Convert literal to single precision 1817 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1818 APFloat::rmNearestTiesToEven, 1819 &Lost); 1820 // We allow precision lost but not overflow or underflow 1821 if (Status != APFloat::opOK && 1822 Lost && 1823 ((Status & APFloat::opOverflow) != 0 || 1824 (Status & APFloat::opUnderflow) != 0)) { 1825 return false; 1826 } 1827 1828 return true; 1829 } 1830 1831 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1832 return isUIntN(Size, Val) || isIntN(Size, Val); 1833 } 1834 1835 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1836 if (VT.getScalarType() == MVT::i16) { 1837 // FP immediate values are broken. 1838 return isInlinableIntLiteral(Val); 1839 } 1840 1841 // f16/v2f16 operands work correctly for all values. 1842 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1843 } 1844 1845 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1846 1847 // This is a hack to enable named inline values like 1848 // shared_base with both 32-bit and 64-bit operands. 1849 // Note that these values are defined as 1850 // 32-bit operands only. 1851 if (isInlineValue()) { 1852 return true; 1853 } 1854 1855 if (!isImmTy(ImmTyNone)) { 1856 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1857 return false; 1858 } 1859 // TODO: We should avoid using host float here. It would be better to 1860 // check the float bit values which is what a few other places do. 1861 // We've had bot failures before due to weird NaN support on mips hosts. 1862 1863 APInt Literal(64, Imm.Val); 1864 1865 if (Imm.IsFPImm) { // We got fp literal token 1866 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1867 return AMDGPU::isInlinableLiteral64(Imm.Val, 1868 AsmParser->hasInv2PiInlineImm()); 1869 } 1870 1871 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1872 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1873 return false; 1874 1875 if (type.getScalarSizeInBits() == 16) { 1876 return isInlineableLiteralOp16( 1877 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1878 type, AsmParser->hasInv2PiInlineImm()); 1879 } 1880 1881 // Check if single precision literal is inlinable 1882 return AMDGPU::isInlinableLiteral32( 1883 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1884 AsmParser->hasInv2PiInlineImm()); 1885 } 1886 1887 // We got int literal token. 1888 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1889 return AMDGPU::isInlinableLiteral64(Imm.Val, 1890 AsmParser->hasInv2PiInlineImm()); 1891 } 1892 1893 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1894 return false; 1895 } 1896 1897 if (type.getScalarSizeInBits() == 16) { 1898 return isInlineableLiteralOp16( 1899 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1900 type, AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 return AMDGPU::isInlinableLiteral32( 1904 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1905 AsmParser->hasInv2PiInlineImm()); 1906 } 1907 1908 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1909 // Check that this immediate can be added as literal 1910 if (!isImmTy(ImmTyNone)) { 1911 return false; 1912 } 1913 1914 if (!Imm.IsFPImm) { 1915 // We got int literal token. 1916 1917 if (type == MVT::f64 && hasFPModifiers()) { 1918 // Cannot apply fp modifiers to int literals preserving the same semantics 1919 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1920 // disable these cases. 1921 return false; 1922 } 1923 1924 unsigned Size = type.getSizeInBits(); 1925 if (Size == 64) 1926 Size = 32; 1927 1928 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1929 // types. 1930 return isSafeTruncation(Imm.Val, Size); 1931 } 1932 1933 // We got fp literal token 1934 if (type == MVT::f64) { // Expected 64-bit fp operand 1935 // We would set low 64-bits of literal to zeroes but we accept this literals 1936 return true; 1937 } 1938 1939 if (type == MVT::i64) { // Expected 64-bit int operand 1940 // We don't allow fp literals in 64-bit integer instructions. It is 1941 // unclear how we should encode them. 1942 return false; 1943 } 1944 1945 // We allow fp literals with f16x2 operands assuming that the specified 1946 // literal goes into the lower half and the upper half is zero. We also 1947 // require that the literal may be losslessly converted to f16. 1948 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1949 (type == MVT::v2i16)? MVT::i16 : 1950 (type == MVT::v2f32)? MVT::f32 : type; 1951 1952 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1953 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1954 } 1955 1956 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1957 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1958 } 1959 1960 bool AMDGPUOperand::isVRegWithInputMods() const { 1961 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1962 // GFX90A allows DPP on 64-bit operands. 1963 (isRegClass(AMDGPU::VReg_64RegClassID) && 1964 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1965 } 1966 1967 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1968 if (AsmParser->isVI()) 1969 return isVReg32(); 1970 else if (AsmParser->isGFX9Plus()) 1971 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1972 else 1973 return false; 1974 } 1975 1976 bool AMDGPUOperand::isSDWAFP16Operand() const { 1977 return isSDWAOperand(MVT::f16); 1978 } 1979 1980 bool AMDGPUOperand::isSDWAFP32Operand() const { 1981 return isSDWAOperand(MVT::f32); 1982 } 1983 1984 bool AMDGPUOperand::isSDWAInt16Operand() const { 1985 return isSDWAOperand(MVT::i16); 1986 } 1987 1988 bool AMDGPUOperand::isSDWAInt32Operand() const { 1989 return isSDWAOperand(MVT::i32); 1990 } 1991 1992 bool AMDGPUOperand::isBoolReg() const { 1993 auto FB = AsmParser->getFeatureBits(); 1994 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1995 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1996 } 1997 1998 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1999 { 2000 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2001 assert(Size == 2 || Size == 4 || Size == 8); 2002 2003 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2004 2005 if (Imm.Mods.Abs) { 2006 Val &= ~FpSignMask; 2007 } 2008 if (Imm.Mods.Neg) { 2009 Val ^= FpSignMask; 2010 } 2011 2012 return Val; 2013 } 2014 2015 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2016 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2017 Inst.getNumOperands())) { 2018 addLiteralImmOperand(Inst, Imm.Val, 2019 ApplyModifiers & 2020 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2021 } else { 2022 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2023 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2024 setImmKindNone(); 2025 } 2026 } 2027 2028 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2029 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2030 auto OpNum = Inst.getNumOperands(); 2031 // Check that this operand accepts literals 2032 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2033 2034 if (ApplyModifiers) { 2035 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2036 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2037 Val = applyInputFPModifiers(Val, Size); 2038 } 2039 2040 APInt Literal(64, Val); 2041 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2042 2043 if (Imm.IsFPImm) { // We got fp literal token 2044 switch (OpTy) { 2045 case AMDGPU::OPERAND_REG_IMM_INT64: 2046 case AMDGPU::OPERAND_REG_IMM_FP64: 2047 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2048 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2049 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2050 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2051 AsmParser->hasInv2PiInlineImm())) { 2052 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2053 setImmKindConst(); 2054 return; 2055 } 2056 2057 // Non-inlineable 2058 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2059 // For fp operands we check if low 32 bits are zeros 2060 if (Literal.getLoBits(32) != 0) { 2061 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2062 "Can't encode literal as exact 64-bit floating-point operand. " 2063 "Low 32-bits will be set to zero"); 2064 } 2065 2066 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2067 setImmKindLiteral(); 2068 return; 2069 } 2070 2071 // We don't allow fp literals in 64-bit integer instructions. It is 2072 // unclear how we should encode them. This case should be checked earlier 2073 // in predicate methods (isLiteralImm()) 2074 llvm_unreachable("fp literal in 64-bit integer instruction."); 2075 2076 case AMDGPU::OPERAND_REG_IMM_INT32: 2077 case AMDGPU::OPERAND_REG_IMM_FP32: 2078 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2083 case AMDGPU::OPERAND_REG_IMM_INT16: 2084 case AMDGPU::OPERAND_REG_IMM_FP16: 2085 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2086 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2087 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2088 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2089 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2092 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2093 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2094 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2095 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2097 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2098 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2099 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2100 case AMDGPU::OPERAND_KIMM32: 2101 case AMDGPU::OPERAND_KIMM16: { 2102 bool lost; 2103 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2104 // Convert literal to single precision 2105 FPLiteral.convert(*getOpFltSemantics(OpTy), 2106 APFloat::rmNearestTiesToEven, &lost); 2107 // We allow precision lost but not overflow or underflow. This should be 2108 // checked earlier in isLiteralImm() 2109 2110 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2111 Inst.addOperand(MCOperand::createImm(ImmVal)); 2112 setImmKindLiteral(); 2113 return; 2114 } 2115 default: 2116 llvm_unreachable("invalid operand size"); 2117 } 2118 2119 return; 2120 } 2121 2122 // We got int literal token. 2123 // Only sign extend inline immediates. 2124 switch (OpTy) { 2125 case AMDGPU::OPERAND_REG_IMM_INT32: 2126 case AMDGPU::OPERAND_REG_IMM_FP32: 2127 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2128 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2129 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2130 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2131 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2132 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2133 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2134 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2135 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2136 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2138 if (isSafeTruncation(Val, 32) && 2139 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2140 AsmParser->hasInv2PiInlineImm())) { 2141 Inst.addOperand(MCOperand::createImm(Val)); 2142 setImmKindConst(); 2143 return; 2144 } 2145 2146 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2147 setImmKindLiteral(); 2148 return; 2149 2150 case AMDGPU::OPERAND_REG_IMM_INT64: 2151 case AMDGPU::OPERAND_REG_IMM_FP64: 2152 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2153 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2154 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2155 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2156 Inst.addOperand(MCOperand::createImm(Val)); 2157 setImmKindConst(); 2158 return; 2159 } 2160 2161 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2162 setImmKindLiteral(); 2163 return; 2164 2165 case AMDGPU::OPERAND_REG_IMM_INT16: 2166 case AMDGPU::OPERAND_REG_IMM_FP16: 2167 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2168 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2169 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2170 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2171 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2172 if (isSafeTruncation(Val, 16) && 2173 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2174 AsmParser->hasInv2PiInlineImm())) { 2175 Inst.addOperand(MCOperand::createImm(Val)); 2176 setImmKindConst(); 2177 return; 2178 } 2179 2180 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2181 setImmKindLiteral(); 2182 return; 2183 2184 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2185 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2186 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2187 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2188 assert(isSafeTruncation(Val, 16)); 2189 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2190 AsmParser->hasInv2PiInlineImm())); 2191 2192 Inst.addOperand(MCOperand::createImm(Val)); 2193 return; 2194 } 2195 case AMDGPU::OPERAND_KIMM32: 2196 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2197 setImmKindNone(); 2198 return; 2199 case AMDGPU::OPERAND_KIMM16: 2200 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2201 setImmKindNone(); 2202 return; 2203 default: 2204 llvm_unreachable("invalid operand size"); 2205 } 2206 } 2207 2208 template <unsigned Bitwidth> 2209 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2210 APInt Literal(64, Imm.Val); 2211 setImmKindNone(); 2212 2213 if (!Imm.IsFPImm) { 2214 // We got int literal token. 2215 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2216 return; 2217 } 2218 2219 bool Lost; 2220 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2221 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2222 APFloat::rmNearestTiesToEven, &Lost); 2223 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2224 } 2225 2226 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2227 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2228 } 2229 2230 static bool isInlineValue(unsigned Reg) { 2231 switch (Reg) { 2232 case AMDGPU::SRC_SHARED_BASE: 2233 case AMDGPU::SRC_SHARED_LIMIT: 2234 case AMDGPU::SRC_PRIVATE_BASE: 2235 case AMDGPU::SRC_PRIVATE_LIMIT: 2236 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2237 return true; 2238 case AMDGPU::SRC_VCCZ: 2239 case AMDGPU::SRC_EXECZ: 2240 case AMDGPU::SRC_SCC: 2241 return true; 2242 case AMDGPU::SGPR_NULL: 2243 return true; 2244 default: 2245 return false; 2246 } 2247 } 2248 2249 bool AMDGPUOperand::isInlineValue() const { 2250 return isRegKind() && ::isInlineValue(getReg()); 2251 } 2252 2253 //===----------------------------------------------------------------------===// 2254 // AsmParser 2255 //===----------------------------------------------------------------------===// 2256 2257 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2258 if (Is == IS_VGPR) { 2259 switch (RegWidth) { 2260 default: return -1; 2261 case 32: 2262 return AMDGPU::VGPR_32RegClassID; 2263 case 64: 2264 return AMDGPU::VReg_64RegClassID; 2265 case 96: 2266 return AMDGPU::VReg_96RegClassID; 2267 case 128: 2268 return AMDGPU::VReg_128RegClassID; 2269 case 160: 2270 return AMDGPU::VReg_160RegClassID; 2271 case 192: 2272 return AMDGPU::VReg_192RegClassID; 2273 case 224: 2274 return AMDGPU::VReg_224RegClassID; 2275 case 256: 2276 return AMDGPU::VReg_256RegClassID; 2277 case 512: 2278 return AMDGPU::VReg_512RegClassID; 2279 case 1024: 2280 return AMDGPU::VReg_1024RegClassID; 2281 } 2282 } else if (Is == IS_TTMP) { 2283 switch (RegWidth) { 2284 default: return -1; 2285 case 32: 2286 return AMDGPU::TTMP_32RegClassID; 2287 case 64: 2288 return AMDGPU::TTMP_64RegClassID; 2289 case 128: 2290 return AMDGPU::TTMP_128RegClassID; 2291 case 256: 2292 return AMDGPU::TTMP_256RegClassID; 2293 case 512: 2294 return AMDGPU::TTMP_512RegClassID; 2295 } 2296 } else if (Is == IS_SGPR) { 2297 switch (RegWidth) { 2298 default: return -1; 2299 case 32: 2300 return AMDGPU::SGPR_32RegClassID; 2301 case 64: 2302 return AMDGPU::SGPR_64RegClassID; 2303 case 96: 2304 return AMDGPU::SGPR_96RegClassID; 2305 case 128: 2306 return AMDGPU::SGPR_128RegClassID; 2307 case 160: 2308 return AMDGPU::SGPR_160RegClassID; 2309 case 192: 2310 return AMDGPU::SGPR_192RegClassID; 2311 case 224: 2312 return AMDGPU::SGPR_224RegClassID; 2313 case 256: 2314 return AMDGPU::SGPR_256RegClassID; 2315 case 512: 2316 return AMDGPU::SGPR_512RegClassID; 2317 } 2318 } else if (Is == IS_AGPR) { 2319 switch (RegWidth) { 2320 default: return -1; 2321 case 32: 2322 return AMDGPU::AGPR_32RegClassID; 2323 case 64: 2324 return AMDGPU::AReg_64RegClassID; 2325 case 96: 2326 return AMDGPU::AReg_96RegClassID; 2327 case 128: 2328 return AMDGPU::AReg_128RegClassID; 2329 case 160: 2330 return AMDGPU::AReg_160RegClassID; 2331 case 192: 2332 return AMDGPU::AReg_192RegClassID; 2333 case 224: 2334 return AMDGPU::AReg_224RegClassID; 2335 case 256: 2336 return AMDGPU::AReg_256RegClassID; 2337 case 512: 2338 return AMDGPU::AReg_512RegClassID; 2339 case 1024: 2340 return AMDGPU::AReg_1024RegClassID; 2341 } 2342 } 2343 return -1; 2344 } 2345 2346 static unsigned getSpecialRegForName(StringRef RegName) { 2347 return StringSwitch<unsigned>(RegName) 2348 .Case("exec", AMDGPU::EXEC) 2349 .Case("vcc", AMDGPU::VCC) 2350 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2351 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2352 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2353 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2354 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2355 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2356 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2357 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2358 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2359 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2360 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2361 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2362 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2363 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2364 .Case("m0", AMDGPU::M0) 2365 .Case("vccz", AMDGPU::SRC_VCCZ) 2366 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2367 .Case("execz", AMDGPU::SRC_EXECZ) 2368 .Case("src_execz", AMDGPU::SRC_EXECZ) 2369 .Case("scc", AMDGPU::SRC_SCC) 2370 .Case("src_scc", AMDGPU::SRC_SCC) 2371 .Case("tba", AMDGPU::TBA) 2372 .Case("tma", AMDGPU::TMA) 2373 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2374 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2375 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2376 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2377 .Case("vcc_lo", AMDGPU::VCC_LO) 2378 .Case("vcc_hi", AMDGPU::VCC_HI) 2379 .Case("exec_lo", AMDGPU::EXEC_LO) 2380 .Case("exec_hi", AMDGPU::EXEC_HI) 2381 .Case("tma_lo", AMDGPU::TMA_LO) 2382 .Case("tma_hi", AMDGPU::TMA_HI) 2383 .Case("tba_lo", AMDGPU::TBA_LO) 2384 .Case("tba_hi", AMDGPU::TBA_HI) 2385 .Case("pc", AMDGPU::PC_REG) 2386 .Case("null", AMDGPU::SGPR_NULL) 2387 .Default(AMDGPU::NoRegister); 2388 } 2389 2390 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2391 SMLoc &EndLoc, bool RestoreOnFailure) { 2392 auto R = parseRegister(); 2393 if (!R) return true; 2394 assert(R->isReg()); 2395 RegNo = R->getReg(); 2396 StartLoc = R->getStartLoc(); 2397 EndLoc = R->getEndLoc(); 2398 return false; 2399 } 2400 2401 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2402 SMLoc &EndLoc) { 2403 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2404 } 2405 2406 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2407 SMLoc &StartLoc, 2408 SMLoc &EndLoc) { 2409 bool Result = 2410 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2411 bool PendingErrors = getParser().hasPendingError(); 2412 getParser().clearPendingErrors(); 2413 if (PendingErrors) 2414 return MatchOperand_ParseFail; 2415 if (Result) 2416 return MatchOperand_NoMatch; 2417 return MatchOperand_Success; 2418 } 2419 2420 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2421 RegisterKind RegKind, unsigned Reg1, 2422 SMLoc Loc) { 2423 switch (RegKind) { 2424 case IS_SPECIAL: 2425 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2426 Reg = AMDGPU::EXEC; 2427 RegWidth = 64; 2428 return true; 2429 } 2430 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2431 Reg = AMDGPU::FLAT_SCR; 2432 RegWidth = 64; 2433 return true; 2434 } 2435 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2436 Reg = AMDGPU::XNACK_MASK; 2437 RegWidth = 64; 2438 return true; 2439 } 2440 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2441 Reg = AMDGPU::VCC; 2442 RegWidth = 64; 2443 return true; 2444 } 2445 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2446 Reg = AMDGPU::TBA; 2447 RegWidth = 64; 2448 return true; 2449 } 2450 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2451 Reg = AMDGPU::TMA; 2452 RegWidth = 64; 2453 return true; 2454 } 2455 Error(Loc, "register does not fit in the list"); 2456 return false; 2457 case IS_VGPR: 2458 case IS_SGPR: 2459 case IS_AGPR: 2460 case IS_TTMP: 2461 if (Reg1 != Reg + RegWidth / 32) { 2462 Error(Loc, "registers in a list must have consecutive indices"); 2463 return false; 2464 } 2465 RegWidth += 32; 2466 return true; 2467 default: 2468 llvm_unreachable("unexpected register kind"); 2469 } 2470 } 2471 2472 struct RegInfo { 2473 StringLiteral Name; 2474 RegisterKind Kind; 2475 }; 2476 2477 static constexpr RegInfo RegularRegisters[] = { 2478 {{"v"}, IS_VGPR}, 2479 {{"s"}, IS_SGPR}, 2480 {{"ttmp"}, IS_TTMP}, 2481 {{"acc"}, IS_AGPR}, 2482 {{"a"}, IS_AGPR}, 2483 }; 2484 2485 static bool isRegularReg(RegisterKind Kind) { 2486 return Kind == IS_VGPR || 2487 Kind == IS_SGPR || 2488 Kind == IS_TTMP || 2489 Kind == IS_AGPR; 2490 } 2491 2492 static const RegInfo* getRegularRegInfo(StringRef Str) { 2493 for (const RegInfo &Reg : RegularRegisters) 2494 if (Str.startswith(Reg.Name)) 2495 return &Reg; 2496 return nullptr; 2497 } 2498 2499 static bool getRegNum(StringRef Str, unsigned& Num) { 2500 return !Str.getAsInteger(10, Num); 2501 } 2502 2503 bool 2504 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2505 const AsmToken &NextToken) const { 2506 2507 // A list of consecutive registers: [s0,s1,s2,s3] 2508 if (Token.is(AsmToken::LBrac)) 2509 return true; 2510 2511 if (!Token.is(AsmToken::Identifier)) 2512 return false; 2513 2514 // A single register like s0 or a range of registers like s[0:1] 2515 2516 StringRef Str = Token.getString(); 2517 const RegInfo *Reg = getRegularRegInfo(Str); 2518 if (Reg) { 2519 StringRef RegName = Reg->Name; 2520 StringRef RegSuffix = Str.substr(RegName.size()); 2521 if (!RegSuffix.empty()) { 2522 unsigned Num; 2523 // A single register with an index: rXX 2524 if (getRegNum(RegSuffix, Num)) 2525 return true; 2526 } else { 2527 // A range of registers: r[XX:YY]. 2528 if (NextToken.is(AsmToken::LBrac)) 2529 return true; 2530 } 2531 } 2532 2533 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2534 } 2535 2536 bool 2537 AMDGPUAsmParser::isRegister() 2538 { 2539 return isRegister(getToken(), peekToken()); 2540 } 2541 2542 unsigned 2543 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2544 unsigned RegNum, 2545 unsigned RegWidth, 2546 SMLoc Loc) { 2547 2548 assert(isRegularReg(RegKind)); 2549 2550 unsigned AlignSize = 1; 2551 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2552 // SGPR and TTMP registers must be aligned. 2553 // Max required alignment is 4 dwords. 2554 AlignSize = std::min(RegWidth / 32, 4u); 2555 } 2556 2557 if (RegNum % AlignSize != 0) { 2558 Error(Loc, "invalid register alignment"); 2559 return AMDGPU::NoRegister; 2560 } 2561 2562 unsigned RegIdx = RegNum / AlignSize; 2563 int RCID = getRegClass(RegKind, RegWidth); 2564 if (RCID == -1) { 2565 Error(Loc, "invalid or unsupported register size"); 2566 return AMDGPU::NoRegister; 2567 } 2568 2569 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2570 const MCRegisterClass RC = TRI->getRegClass(RCID); 2571 if (RegIdx >= RC.getNumRegs()) { 2572 Error(Loc, "register index is out of range"); 2573 return AMDGPU::NoRegister; 2574 } 2575 2576 return RC.getRegister(RegIdx); 2577 } 2578 2579 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2580 int64_t RegLo, RegHi; 2581 if (!skipToken(AsmToken::LBrac, "missing register index")) 2582 return false; 2583 2584 SMLoc FirstIdxLoc = getLoc(); 2585 SMLoc SecondIdxLoc; 2586 2587 if (!parseExpr(RegLo)) 2588 return false; 2589 2590 if (trySkipToken(AsmToken::Colon)) { 2591 SecondIdxLoc = getLoc(); 2592 if (!parseExpr(RegHi)) 2593 return false; 2594 } else { 2595 RegHi = RegLo; 2596 } 2597 2598 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2599 return false; 2600 2601 if (!isUInt<32>(RegLo)) { 2602 Error(FirstIdxLoc, "invalid register index"); 2603 return false; 2604 } 2605 2606 if (!isUInt<32>(RegHi)) { 2607 Error(SecondIdxLoc, "invalid register index"); 2608 return false; 2609 } 2610 2611 if (RegLo > RegHi) { 2612 Error(FirstIdxLoc, "first register index should not exceed second index"); 2613 return false; 2614 } 2615 2616 Num = static_cast<unsigned>(RegLo); 2617 RegWidth = 32 * ((RegHi - RegLo) + 1); 2618 return true; 2619 } 2620 2621 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2622 unsigned &RegNum, unsigned &RegWidth, 2623 SmallVectorImpl<AsmToken> &Tokens) { 2624 assert(isToken(AsmToken::Identifier)); 2625 unsigned Reg = getSpecialRegForName(getTokenStr()); 2626 if (Reg) { 2627 RegNum = 0; 2628 RegWidth = 32; 2629 RegKind = IS_SPECIAL; 2630 Tokens.push_back(getToken()); 2631 lex(); // skip register name 2632 } 2633 return Reg; 2634 } 2635 2636 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2637 unsigned &RegNum, unsigned &RegWidth, 2638 SmallVectorImpl<AsmToken> &Tokens) { 2639 assert(isToken(AsmToken::Identifier)); 2640 StringRef RegName = getTokenStr(); 2641 auto Loc = getLoc(); 2642 2643 const RegInfo *RI = getRegularRegInfo(RegName); 2644 if (!RI) { 2645 Error(Loc, "invalid register name"); 2646 return AMDGPU::NoRegister; 2647 } 2648 2649 Tokens.push_back(getToken()); 2650 lex(); // skip register name 2651 2652 RegKind = RI->Kind; 2653 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2654 if (!RegSuffix.empty()) { 2655 // Single 32-bit register: vXX. 2656 if (!getRegNum(RegSuffix, RegNum)) { 2657 Error(Loc, "invalid register index"); 2658 return AMDGPU::NoRegister; 2659 } 2660 RegWidth = 32; 2661 } else { 2662 // Range of registers: v[XX:YY]. ":YY" is optional. 2663 if (!ParseRegRange(RegNum, RegWidth)) 2664 return AMDGPU::NoRegister; 2665 } 2666 2667 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2668 } 2669 2670 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2671 unsigned &RegWidth, 2672 SmallVectorImpl<AsmToken> &Tokens) { 2673 unsigned Reg = AMDGPU::NoRegister; 2674 auto ListLoc = getLoc(); 2675 2676 if (!skipToken(AsmToken::LBrac, 2677 "expected a register or a list of registers")) { 2678 return AMDGPU::NoRegister; 2679 } 2680 2681 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2682 2683 auto Loc = getLoc(); 2684 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2685 return AMDGPU::NoRegister; 2686 if (RegWidth != 32) { 2687 Error(Loc, "expected a single 32-bit register"); 2688 return AMDGPU::NoRegister; 2689 } 2690 2691 for (; trySkipToken(AsmToken::Comma); ) { 2692 RegisterKind NextRegKind; 2693 unsigned NextReg, NextRegNum, NextRegWidth; 2694 Loc = getLoc(); 2695 2696 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2697 NextRegNum, NextRegWidth, 2698 Tokens)) { 2699 return AMDGPU::NoRegister; 2700 } 2701 if (NextRegWidth != 32) { 2702 Error(Loc, "expected a single 32-bit register"); 2703 return AMDGPU::NoRegister; 2704 } 2705 if (NextRegKind != RegKind) { 2706 Error(Loc, "registers in a list must be of the same kind"); 2707 return AMDGPU::NoRegister; 2708 } 2709 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 if (!skipToken(AsmToken::RBrac, 2714 "expected a comma or a closing square bracket")) { 2715 return AMDGPU::NoRegister; 2716 } 2717 2718 if (isRegularReg(RegKind)) 2719 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2720 2721 return Reg; 2722 } 2723 2724 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2725 unsigned &RegNum, unsigned &RegWidth, 2726 SmallVectorImpl<AsmToken> &Tokens) { 2727 auto Loc = getLoc(); 2728 Reg = AMDGPU::NoRegister; 2729 2730 if (isToken(AsmToken::Identifier)) { 2731 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2732 if (Reg == AMDGPU::NoRegister) 2733 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2734 } else { 2735 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2736 } 2737 2738 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2739 if (Reg == AMDGPU::NoRegister) { 2740 assert(Parser.hasPendingError()); 2741 return false; 2742 } 2743 2744 if (!subtargetHasRegister(*TRI, Reg)) { 2745 if (Reg == AMDGPU::SGPR_NULL) { 2746 Error(Loc, "'null' operand is not supported on this GPU"); 2747 } else { 2748 Error(Loc, "register not available on this GPU"); 2749 } 2750 return false; 2751 } 2752 2753 return true; 2754 } 2755 2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2757 unsigned &RegNum, unsigned &RegWidth, 2758 bool RestoreOnFailure /*=false*/) { 2759 Reg = AMDGPU::NoRegister; 2760 2761 SmallVector<AsmToken, 1> Tokens; 2762 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2763 if (RestoreOnFailure) { 2764 while (!Tokens.empty()) { 2765 getLexer().UnLex(Tokens.pop_back_val()); 2766 } 2767 } 2768 return true; 2769 } 2770 return false; 2771 } 2772 2773 Optional<StringRef> 2774 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2775 switch (RegKind) { 2776 case IS_VGPR: 2777 return StringRef(".amdgcn.next_free_vgpr"); 2778 case IS_SGPR: 2779 return StringRef(".amdgcn.next_free_sgpr"); 2780 default: 2781 return None; 2782 } 2783 } 2784 2785 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2786 auto SymbolName = getGprCountSymbolName(RegKind); 2787 assert(SymbolName && "initializing invalid register kind"); 2788 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2789 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2790 } 2791 2792 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2793 unsigned DwordRegIndex, 2794 unsigned RegWidth) { 2795 // Symbols are only defined for GCN targets 2796 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2797 return true; 2798 2799 auto SymbolName = getGprCountSymbolName(RegKind); 2800 if (!SymbolName) 2801 return true; 2802 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2803 2804 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2805 int64_t OldCount; 2806 2807 if (!Sym->isVariable()) 2808 return !Error(getLoc(), 2809 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2810 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2811 return !Error( 2812 getLoc(), 2813 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2814 2815 if (OldCount <= NewMax) 2816 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2817 2818 return true; 2819 } 2820 2821 std::unique_ptr<AMDGPUOperand> 2822 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2823 const auto &Tok = getToken(); 2824 SMLoc StartLoc = Tok.getLoc(); 2825 SMLoc EndLoc = Tok.getEndLoc(); 2826 RegisterKind RegKind; 2827 unsigned Reg, RegNum, RegWidth; 2828 2829 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2830 return nullptr; 2831 } 2832 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2833 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2834 return nullptr; 2835 } else 2836 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2837 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2838 } 2839 2840 OperandMatchResultTy 2841 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2842 // TODO: add syntactic sugar for 1/(2*PI) 2843 2844 assert(!isRegister()); 2845 assert(!isModifier()); 2846 2847 const auto& Tok = getToken(); 2848 const auto& NextTok = peekToken(); 2849 bool IsReal = Tok.is(AsmToken::Real); 2850 SMLoc S = getLoc(); 2851 bool Negate = false; 2852 2853 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2854 lex(); 2855 IsReal = true; 2856 Negate = true; 2857 } 2858 2859 if (IsReal) { 2860 // Floating-point expressions are not supported. 2861 // Can only allow floating-point literals with an 2862 // optional sign. 2863 2864 StringRef Num = getTokenStr(); 2865 lex(); 2866 2867 APFloat RealVal(APFloat::IEEEdouble()); 2868 auto roundMode = APFloat::rmNearestTiesToEven; 2869 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2870 return MatchOperand_ParseFail; 2871 } 2872 if (Negate) 2873 RealVal.changeSign(); 2874 2875 Operands.push_back( 2876 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2877 AMDGPUOperand::ImmTyNone, true)); 2878 2879 return MatchOperand_Success; 2880 2881 } else { 2882 int64_t IntVal; 2883 const MCExpr *Expr; 2884 SMLoc S = getLoc(); 2885 2886 if (HasSP3AbsModifier) { 2887 // This is a workaround for handling expressions 2888 // as arguments of SP3 'abs' modifier, for example: 2889 // |1.0| 2890 // |-1| 2891 // |1+x| 2892 // This syntax is not compatible with syntax of standard 2893 // MC expressions (due to the trailing '|'). 2894 SMLoc EndLoc; 2895 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2896 return MatchOperand_ParseFail; 2897 } else { 2898 if (Parser.parseExpression(Expr)) 2899 return MatchOperand_ParseFail; 2900 } 2901 2902 if (Expr->evaluateAsAbsolute(IntVal)) { 2903 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2904 } else { 2905 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2906 } 2907 2908 return MatchOperand_Success; 2909 } 2910 2911 return MatchOperand_NoMatch; 2912 } 2913 2914 OperandMatchResultTy 2915 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2916 if (!isRegister()) 2917 return MatchOperand_NoMatch; 2918 2919 if (auto R = parseRegister()) { 2920 assert(R->isReg()); 2921 Operands.push_back(std::move(R)); 2922 return MatchOperand_Success; 2923 } 2924 return MatchOperand_ParseFail; 2925 } 2926 2927 OperandMatchResultTy 2928 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2929 auto res = parseReg(Operands); 2930 if (res != MatchOperand_NoMatch) { 2931 return res; 2932 } else if (isModifier()) { 2933 return MatchOperand_NoMatch; 2934 } else { 2935 return parseImm(Operands, HasSP3AbsMod); 2936 } 2937 } 2938 2939 bool 2940 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2941 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2942 const auto &str = Token.getString(); 2943 return str == "abs" || str == "neg" || str == "sext"; 2944 } 2945 return false; 2946 } 2947 2948 bool 2949 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2950 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2951 } 2952 2953 bool 2954 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2955 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2956 } 2957 2958 bool 2959 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2960 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2961 } 2962 2963 // Check if this is an operand modifier or an opcode modifier 2964 // which may look like an expression but it is not. We should 2965 // avoid parsing these modifiers as expressions. Currently 2966 // recognized sequences are: 2967 // |...| 2968 // abs(...) 2969 // neg(...) 2970 // sext(...) 2971 // -reg 2972 // -|...| 2973 // -abs(...) 2974 // name:... 2975 // Note that simple opcode modifiers like 'gds' may be parsed as 2976 // expressions; this is a special case. See getExpressionAsToken. 2977 // 2978 bool 2979 AMDGPUAsmParser::isModifier() { 2980 2981 AsmToken Tok = getToken(); 2982 AsmToken NextToken[2]; 2983 peekTokens(NextToken); 2984 2985 return isOperandModifier(Tok, NextToken[0]) || 2986 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2987 isOpcodeModifierWithVal(Tok, NextToken[0]); 2988 } 2989 2990 // Check if the current token is an SP3 'neg' modifier. 2991 // Currently this modifier is allowed in the following context: 2992 // 2993 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2994 // 2. Before an 'abs' modifier: -abs(...) 2995 // 3. Before an SP3 'abs' modifier: -|...| 2996 // 2997 // In all other cases "-" is handled as a part 2998 // of an expression that follows the sign. 2999 // 3000 // Note: When "-" is followed by an integer literal, 3001 // this is interpreted as integer negation rather 3002 // than a floating-point NEG modifier applied to N. 3003 // Beside being contr-intuitive, such use of floating-point 3004 // NEG modifier would have resulted in different meaning 3005 // of integer literals used with VOP1/2/C and VOP3, 3006 // for example: 3007 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3008 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3009 // Negative fp literals with preceding "-" are 3010 // handled likewise for uniformity 3011 // 3012 bool 3013 AMDGPUAsmParser::parseSP3NegModifier() { 3014 3015 AsmToken NextToken[2]; 3016 peekTokens(NextToken); 3017 3018 if (isToken(AsmToken::Minus) && 3019 (isRegister(NextToken[0], NextToken[1]) || 3020 NextToken[0].is(AsmToken::Pipe) || 3021 isId(NextToken[0], "abs"))) { 3022 lex(); 3023 return true; 3024 } 3025 3026 return false; 3027 } 3028 3029 OperandMatchResultTy 3030 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3031 bool AllowImm) { 3032 bool Neg, SP3Neg; 3033 bool Abs, SP3Abs; 3034 SMLoc Loc; 3035 3036 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3037 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3038 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3039 return MatchOperand_ParseFail; 3040 } 3041 3042 SP3Neg = parseSP3NegModifier(); 3043 3044 Loc = getLoc(); 3045 Neg = trySkipId("neg"); 3046 if (Neg && SP3Neg) { 3047 Error(Loc, "expected register or immediate"); 3048 return MatchOperand_ParseFail; 3049 } 3050 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3051 return MatchOperand_ParseFail; 3052 3053 Abs = trySkipId("abs"); 3054 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3055 return MatchOperand_ParseFail; 3056 3057 Loc = getLoc(); 3058 SP3Abs = trySkipToken(AsmToken::Pipe); 3059 if (Abs && SP3Abs) { 3060 Error(Loc, "expected register or immediate"); 3061 return MatchOperand_ParseFail; 3062 } 3063 3064 OperandMatchResultTy Res; 3065 if (AllowImm) { 3066 Res = parseRegOrImm(Operands, SP3Abs); 3067 } else { 3068 Res = parseReg(Operands); 3069 } 3070 if (Res != MatchOperand_Success) { 3071 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3072 } 3073 3074 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3075 return MatchOperand_ParseFail; 3076 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3077 return MatchOperand_ParseFail; 3078 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3079 return MatchOperand_ParseFail; 3080 3081 AMDGPUOperand::Modifiers Mods; 3082 Mods.Abs = Abs || SP3Abs; 3083 Mods.Neg = Neg || SP3Neg; 3084 3085 if (Mods.hasFPModifiers()) { 3086 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3087 if (Op.isExpr()) { 3088 Error(Op.getStartLoc(), "expected an absolute expression"); 3089 return MatchOperand_ParseFail; 3090 } 3091 Op.setModifiers(Mods); 3092 } 3093 return MatchOperand_Success; 3094 } 3095 3096 OperandMatchResultTy 3097 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3098 bool AllowImm) { 3099 bool Sext = trySkipId("sext"); 3100 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3101 return MatchOperand_ParseFail; 3102 3103 OperandMatchResultTy Res; 3104 if (AllowImm) { 3105 Res = parseRegOrImm(Operands); 3106 } else { 3107 Res = parseReg(Operands); 3108 } 3109 if (Res != MatchOperand_Success) { 3110 return Sext? MatchOperand_ParseFail : Res; 3111 } 3112 3113 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3114 return MatchOperand_ParseFail; 3115 3116 AMDGPUOperand::Modifiers Mods; 3117 Mods.Sext = Sext; 3118 3119 if (Mods.hasIntModifiers()) { 3120 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3121 if (Op.isExpr()) { 3122 Error(Op.getStartLoc(), "expected an absolute expression"); 3123 return MatchOperand_ParseFail; 3124 } 3125 Op.setModifiers(Mods); 3126 } 3127 3128 return MatchOperand_Success; 3129 } 3130 3131 OperandMatchResultTy 3132 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3133 return parseRegOrImmWithFPInputMods(Operands, false); 3134 } 3135 3136 OperandMatchResultTy 3137 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3138 return parseRegOrImmWithIntInputMods(Operands, false); 3139 } 3140 3141 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3142 auto Loc = getLoc(); 3143 if (trySkipId("off")) { 3144 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3145 AMDGPUOperand::ImmTyOff, false)); 3146 return MatchOperand_Success; 3147 } 3148 3149 if (!isRegister()) 3150 return MatchOperand_NoMatch; 3151 3152 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3153 if (Reg) { 3154 Operands.push_back(std::move(Reg)); 3155 return MatchOperand_Success; 3156 } 3157 3158 return MatchOperand_ParseFail; 3159 3160 } 3161 3162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3163 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3164 3165 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3166 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3167 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3168 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3169 return Match_InvalidOperand; 3170 3171 if ((TSFlags & SIInstrFlags::VOP3) && 3172 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3173 getForcedEncodingSize() != 64) 3174 return Match_PreferE32; 3175 3176 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3177 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3178 // v_mac_f32/16 allow only dst_sel == DWORD; 3179 auto OpNum = 3180 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3181 const auto &Op = Inst.getOperand(OpNum); 3182 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3183 return Match_InvalidOperand; 3184 } 3185 } 3186 3187 return Match_Success; 3188 } 3189 3190 static ArrayRef<unsigned> getAllVariants() { 3191 static const unsigned Variants[] = { 3192 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3193 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3194 }; 3195 3196 return makeArrayRef(Variants); 3197 } 3198 3199 // What asm variants we should check 3200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3201 if (getForcedEncodingSize() == 32) { 3202 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3203 return makeArrayRef(Variants); 3204 } 3205 3206 if (isForcedVOP3()) { 3207 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3208 return makeArrayRef(Variants); 3209 } 3210 3211 if (isForcedSDWA()) { 3212 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3213 AMDGPUAsmVariants::SDWA9}; 3214 return makeArrayRef(Variants); 3215 } 3216 3217 if (isForcedDPP()) { 3218 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3219 return makeArrayRef(Variants); 3220 } 3221 3222 return getAllVariants(); 3223 } 3224 3225 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3226 if (getForcedEncodingSize() == 32) 3227 return "e32"; 3228 3229 if (isForcedVOP3()) 3230 return "e64"; 3231 3232 if (isForcedSDWA()) 3233 return "sdwa"; 3234 3235 if (isForcedDPP()) 3236 return "dpp"; 3237 3238 return ""; 3239 } 3240 3241 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3242 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3243 const unsigned Num = Desc.getNumImplicitUses(); 3244 for (unsigned i = 0; i < Num; ++i) { 3245 unsigned Reg = Desc.ImplicitUses[i]; 3246 switch (Reg) { 3247 case AMDGPU::FLAT_SCR: 3248 case AMDGPU::VCC: 3249 case AMDGPU::VCC_LO: 3250 case AMDGPU::VCC_HI: 3251 case AMDGPU::M0: 3252 return Reg; 3253 default: 3254 break; 3255 } 3256 } 3257 return AMDGPU::NoRegister; 3258 } 3259 3260 // NB: This code is correct only when used to check constant 3261 // bus limitations because GFX7 support no f16 inline constants. 3262 // Note that there are no cases when a GFX7 opcode violates 3263 // constant bus limitations due to the use of an f16 constant. 3264 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3265 unsigned OpIdx) const { 3266 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3267 3268 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3269 return false; 3270 } 3271 3272 const MCOperand &MO = Inst.getOperand(OpIdx); 3273 3274 int64_t Val = MO.getImm(); 3275 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3276 3277 switch (OpSize) { // expected operand size 3278 case 8: 3279 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3280 case 4: 3281 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3282 case 2: { 3283 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3284 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3285 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3286 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3287 return AMDGPU::isInlinableIntLiteral(Val); 3288 3289 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3290 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3291 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3292 return AMDGPU::isInlinableIntLiteralV216(Val); 3293 3294 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3295 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3296 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3297 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3298 3299 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3300 } 3301 default: 3302 llvm_unreachable("invalid operand size"); 3303 } 3304 } 3305 3306 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3307 if (!isGFX10Plus()) 3308 return 1; 3309 3310 switch (Opcode) { 3311 // 64-bit shift instructions can use only one scalar value input 3312 case AMDGPU::V_LSHLREV_B64_e64: 3313 case AMDGPU::V_LSHLREV_B64_gfx10: 3314 case AMDGPU::V_LSHRREV_B64_e64: 3315 case AMDGPU::V_LSHRREV_B64_gfx10: 3316 case AMDGPU::V_ASHRREV_I64_e64: 3317 case AMDGPU::V_ASHRREV_I64_gfx10: 3318 case AMDGPU::V_LSHL_B64_e64: 3319 case AMDGPU::V_LSHR_B64_e64: 3320 case AMDGPU::V_ASHR_I64_e64: 3321 return 1; 3322 default: 3323 return 2; 3324 } 3325 } 3326 3327 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3328 const MCOperand &MO = Inst.getOperand(OpIdx); 3329 if (MO.isImm()) { 3330 return !isInlineConstant(Inst, OpIdx); 3331 } else if (MO.isReg()) { 3332 auto Reg = MO.getReg(); 3333 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3334 auto PReg = mc2PseudoReg(Reg); 3335 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3336 } else { 3337 return true; 3338 } 3339 } 3340 3341 bool 3342 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3343 const OperandVector &Operands) { 3344 const unsigned Opcode = Inst.getOpcode(); 3345 const MCInstrDesc &Desc = MII.get(Opcode); 3346 unsigned LastSGPR = AMDGPU::NoRegister; 3347 unsigned ConstantBusUseCount = 0; 3348 unsigned NumLiterals = 0; 3349 unsigned LiteralSize; 3350 3351 if (Desc.TSFlags & 3352 (SIInstrFlags::VOPC | 3353 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3354 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3355 SIInstrFlags::SDWA)) { 3356 // Check special imm operands (used by madmk, etc) 3357 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3358 ++NumLiterals; 3359 LiteralSize = 4; 3360 } 3361 3362 SmallDenseSet<unsigned> SGPRsUsed; 3363 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3364 if (SGPRUsed != AMDGPU::NoRegister) { 3365 SGPRsUsed.insert(SGPRUsed); 3366 ++ConstantBusUseCount; 3367 } 3368 3369 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3370 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3371 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3372 3373 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3374 3375 for (int OpIdx : OpIndices) { 3376 if (OpIdx == -1) break; 3377 3378 const MCOperand &MO = Inst.getOperand(OpIdx); 3379 if (usesConstantBus(Inst, OpIdx)) { 3380 if (MO.isReg()) { 3381 LastSGPR = mc2PseudoReg(MO.getReg()); 3382 // Pairs of registers with a partial intersections like these 3383 // s0, s[0:1] 3384 // flat_scratch_lo, flat_scratch 3385 // flat_scratch_lo, flat_scratch_hi 3386 // are theoretically valid but they are disabled anyway. 3387 // Note that this code mimics SIInstrInfo::verifyInstruction 3388 if (!SGPRsUsed.count(LastSGPR)) { 3389 SGPRsUsed.insert(LastSGPR); 3390 ++ConstantBusUseCount; 3391 } 3392 } else { // Expression or a literal 3393 3394 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3395 continue; // special operand like VINTERP attr_chan 3396 3397 // An instruction may use only one literal. 3398 // This has been validated on the previous step. 3399 // See validateVOPLiteral. 3400 // This literal may be used as more than one operand. 3401 // If all these operands are of the same size, 3402 // this literal counts as one scalar value. 3403 // Otherwise it counts as 2 scalar values. 3404 // See "GFX10 Shader Programming", section 3.6.2.3. 3405 3406 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3407 if (Size < 4) Size = 4; 3408 3409 if (NumLiterals == 0) { 3410 NumLiterals = 1; 3411 LiteralSize = Size; 3412 } else if (LiteralSize != Size) { 3413 NumLiterals = 2; 3414 } 3415 } 3416 } 3417 } 3418 } 3419 ConstantBusUseCount += NumLiterals; 3420 3421 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3422 return true; 3423 3424 SMLoc LitLoc = getLitLoc(Operands); 3425 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3426 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3427 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3428 return false; 3429 } 3430 3431 bool 3432 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3433 const OperandVector &Operands) { 3434 const unsigned Opcode = Inst.getOpcode(); 3435 const MCInstrDesc &Desc = MII.get(Opcode); 3436 3437 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3438 if (DstIdx == -1 || 3439 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3440 return true; 3441 } 3442 3443 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3444 3445 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3446 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3447 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3448 3449 assert(DstIdx != -1); 3450 const MCOperand &Dst = Inst.getOperand(DstIdx); 3451 assert(Dst.isReg()); 3452 3453 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3454 3455 for (int SrcIdx : SrcIndices) { 3456 if (SrcIdx == -1) break; 3457 const MCOperand &Src = Inst.getOperand(SrcIdx); 3458 if (Src.isReg()) { 3459 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3460 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3461 Error(getRegLoc(SrcReg, Operands), 3462 "destination must be different than all sources"); 3463 return false; 3464 } 3465 } 3466 } 3467 3468 return true; 3469 } 3470 3471 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3472 3473 const unsigned Opc = Inst.getOpcode(); 3474 const MCInstrDesc &Desc = MII.get(Opc); 3475 3476 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3477 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3478 assert(ClampIdx != -1); 3479 return Inst.getOperand(ClampIdx).getImm() == 0; 3480 } 3481 3482 return true; 3483 } 3484 3485 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3486 3487 const unsigned Opc = Inst.getOpcode(); 3488 const MCInstrDesc &Desc = MII.get(Opc); 3489 3490 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3491 return true; 3492 3493 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3494 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3495 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3496 3497 assert(VDataIdx != -1); 3498 3499 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3500 return true; 3501 3502 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3503 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3504 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3505 if (DMask == 0) 3506 DMask = 1; 3507 3508 unsigned DataSize = 3509 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3510 if (hasPackedD16()) { 3511 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3512 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3513 DataSize = (DataSize + 1) / 2; 3514 } 3515 3516 return (VDataSize / 4) == DataSize + TFESize; 3517 } 3518 3519 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3520 const unsigned Opc = Inst.getOpcode(); 3521 const MCInstrDesc &Desc = MII.get(Opc); 3522 3523 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3524 return true; 3525 3526 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3527 3528 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3529 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3530 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3531 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3532 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3533 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3534 3535 assert(VAddr0Idx != -1); 3536 assert(SrsrcIdx != -1); 3537 assert(SrsrcIdx > VAddr0Idx); 3538 3539 if (DimIdx == -1) 3540 return true; // intersect_ray 3541 3542 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3543 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3544 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3545 unsigned ActualAddrSize = 3546 IsNSA ? SrsrcIdx - VAddr0Idx 3547 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3548 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3549 3550 unsigned ExpectedAddrSize = 3551 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3552 3553 if (!IsNSA) { 3554 if (ExpectedAddrSize > 8) 3555 ExpectedAddrSize = 16; 3556 3557 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3558 // This provides backward compatibility for assembly created 3559 // before 160b/192b/224b types were directly supported. 3560 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3561 return true; 3562 } 3563 3564 return ActualAddrSize == ExpectedAddrSize; 3565 } 3566 3567 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3568 3569 const unsigned Opc = Inst.getOpcode(); 3570 const MCInstrDesc &Desc = MII.get(Opc); 3571 3572 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3573 return true; 3574 if (!Desc.mayLoad() || !Desc.mayStore()) 3575 return true; // Not atomic 3576 3577 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3578 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3579 3580 // This is an incomplete check because image_atomic_cmpswap 3581 // may only use 0x3 and 0xf while other atomic operations 3582 // may use 0x1 and 0x3. However these limitations are 3583 // verified when we check that dmask matches dst size. 3584 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3585 } 3586 3587 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3588 3589 const unsigned Opc = Inst.getOpcode(); 3590 const MCInstrDesc &Desc = MII.get(Opc); 3591 3592 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3593 return true; 3594 3595 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3596 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3597 3598 // GATHER4 instructions use dmask in a different fashion compared to 3599 // other MIMG instructions. The only useful DMASK values are 3600 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3601 // (red,red,red,red) etc.) The ISA document doesn't mention 3602 // this. 3603 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3604 } 3605 3606 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3607 const unsigned Opc = Inst.getOpcode(); 3608 const MCInstrDesc &Desc = MII.get(Opc); 3609 3610 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3611 return true; 3612 3613 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3614 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3615 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3616 3617 if (!BaseOpcode->MSAA) 3618 return true; 3619 3620 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3621 assert(DimIdx != -1); 3622 3623 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3624 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3625 3626 return DimInfo->MSAA; 3627 } 3628 3629 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3630 { 3631 switch (Opcode) { 3632 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3633 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3634 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3635 return true; 3636 default: 3637 return false; 3638 } 3639 } 3640 3641 // movrels* opcodes should only allow VGPRS as src0. 3642 // This is specified in .td description for vop1/vop3, 3643 // but sdwa is handled differently. See isSDWAOperand. 3644 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3645 const OperandVector &Operands) { 3646 3647 const unsigned Opc = Inst.getOpcode(); 3648 const MCInstrDesc &Desc = MII.get(Opc); 3649 3650 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3651 return true; 3652 3653 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3654 assert(Src0Idx != -1); 3655 3656 SMLoc ErrLoc; 3657 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3658 if (Src0.isReg()) { 3659 auto Reg = mc2PseudoReg(Src0.getReg()); 3660 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3661 if (!isSGPR(Reg, TRI)) 3662 return true; 3663 ErrLoc = getRegLoc(Reg, Operands); 3664 } else { 3665 ErrLoc = getConstLoc(Operands); 3666 } 3667 3668 Error(ErrLoc, "source operand must be a VGPR"); 3669 return false; 3670 } 3671 3672 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3673 const OperandVector &Operands) { 3674 3675 const unsigned Opc = Inst.getOpcode(); 3676 3677 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3678 return true; 3679 3680 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3681 assert(Src0Idx != -1); 3682 3683 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3684 if (!Src0.isReg()) 3685 return true; 3686 3687 auto Reg = mc2PseudoReg(Src0.getReg()); 3688 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3689 if (isSGPR(Reg, TRI)) { 3690 Error(getRegLoc(Reg, Operands), 3691 "source operand must be either a VGPR or an inline constant"); 3692 return false; 3693 } 3694 3695 return true; 3696 } 3697 3698 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3699 const OperandVector &Operands) { 3700 const unsigned Opc = Inst.getOpcode(); 3701 const MCInstrDesc &Desc = MII.get(Opc); 3702 3703 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3704 return true; 3705 3706 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3707 if (Src2Idx == -1) 3708 return true; 3709 3710 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3711 if (!Src2.isReg()) 3712 return true; 3713 3714 MCRegister Src2Reg = Src2.getReg(); 3715 MCRegister DstReg = Inst.getOperand(0).getReg(); 3716 if (Src2Reg == DstReg) 3717 return true; 3718 3719 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3720 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3721 return true; 3722 3723 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3724 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3725 "source 2 operand must not partially overlap with dst"); 3726 return false; 3727 } 3728 3729 return true; 3730 } 3731 3732 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3733 switch (Inst.getOpcode()) { 3734 default: 3735 return true; 3736 case V_DIV_SCALE_F32_gfx6_gfx7: 3737 case V_DIV_SCALE_F32_vi: 3738 case V_DIV_SCALE_F32_gfx10: 3739 case V_DIV_SCALE_F64_gfx6_gfx7: 3740 case V_DIV_SCALE_F64_vi: 3741 case V_DIV_SCALE_F64_gfx10: 3742 break; 3743 } 3744 3745 // TODO: Check that src0 = src1 or src2. 3746 3747 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3748 AMDGPU::OpName::src2_modifiers, 3749 AMDGPU::OpName::src2_modifiers}) { 3750 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3751 .getImm() & 3752 SISrcMods::ABS) { 3753 return false; 3754 } 3755 } 3756 3757 return true; 3758 } 3759 3760 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3761 3762 const unsigned Opc = Inst.getOpcode(); 3763 const MCInstrDesc &Desc = MII.get(Opc); 3764 3765 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3766 return true; 3767 3768 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3769 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3770 if (isCI() || isSI()) 3771 return false; 3772 } 3773 3774 return true; 3775 } 3776 3777 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3778 const unsigned Opc = Inst.getOpcode(); 3779 const MCInstrDesc &Desc = MII.get(Opc); 3780 3781 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3782 return true; 3783 3784 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3785 if (DimIdx < 0) 3786 return true; 3787 3788 long Imm = Inst.getOperand(DimIdx).getImm(); 3789 if (Imm < 0 || Imm >= 8) 3790 return false; 3791 3792 return true; 3793 } 3794 3795 static bool IsRevOpcode(const unsigned Opcode) 3796 { 3797 switch (Opcode) { 3798 case AMDGPU::V_SUBREV_F32_e32: 3799 case AMDGPU::V_SUBREV_F32_e64: 3800 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3801 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3802 case AMDGPU::V_SUBREV_F32_e32_vi: 3803 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3804 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3805 case AMDGPU::V_SUBREV_F32_e64_vi: 3806 3807 case AMDGPU::V_SUBREV_CO_U32_e32: 3808 case AMDGPU::V_SUBREV_CO_U32_e64: 3809 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3810 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3811 3812 case AMDGPU::V_SUBBREV_U32_e32: 3813 case AMDGPU::V_SUBBREV_U32_e64: 3814 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3815 case AMDGPU::V_SUBBREV_U32_e32_vi: 3816 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3817 case AMDGPU::V_SUBBREV_U32_e64_vi: 3818 3819 case AMDGPU::V_SUBREV_U32_e32: 3820 case AMDGPU::V_SUBREV_U32_e64: 3821 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3822 case AMDGPU::V_SUBREV_U32_e32_vi: 3823 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3824 case AMDGPU::V_SUBREV_U32_e64_vi: 3825 3826 case AMDGPU::V_SUBREV_F16_e32: 3827 case AMDGPU::V_SUBREV_F16_e64: 3828 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3829 case AMDGPU::V_SUBREV_F16_e32_vi: 3830 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3831 case AMDGPU::V_SUBREV_F16_e64_vi: 3832 3833 case AMDGPU::V_SUBREV_U16_e32: 3834 case AMDGPU::V_SUBREV_U16_e64: 3835 case AMDGPU::V_SUBREV_U16_e32_vi: 3836 case AMDGPU::V_SUBREV_U16_e64_vi: 3837 3838 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3839 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3840 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3841 3842 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3843 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3844 3845 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3846 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3847 3848 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3849 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3850 3851 case AMDGPU::V_LSHRREV_B32_e32: 3852 case AMDGPU::V_LSHRREV_B32_e64: 3853 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3854 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3855 case AMDGPU::V_LSHRREV_B32_e32_vi: 3856 case AMDGPU::V_LSHRREV_B32_e64_vi: 3857 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3858 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3859 3860 case AMDGPU::V_ASHRREV_I32_e32: 3861 case AMDGPU::V_ASHRREV_I32_e64: 3862 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3863 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3864 case AMDGPU::V_ASHRREV_I32_e32_vi: 3865 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3866 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3867 case AMDGPU::V_ASHRREV_I32_e64_vi: 3868 3869 case AMDGPU::V_LSHLREV_B32_e32: 3870 case AMDGPU::V_LSHLREV_B32_e64: 3871 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3872 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3873 case AMDGPU::V_LSHLREV_B32_e32_vi: 3874 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3875 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3876 case AMDGPU::V_LSHLREV_B32_e64_vi: 3877 3878 case AMDGPU::V_LSHLREV_B16_e32: 3879 case AMDGPU::V_LSHLREV_B16_e64: 3880 case AMDGPU::V_LSHLREV_B16_e32_vi: 3881 case AMDGPU::V_LSHLREV_B16_e64_vi: 3882 case AMDGPU::V_LSHLREV_B16_gfx10: 3883 3884 case AMDGPU::V_LSHRREV_B16_e32: 3885 case AMDGPU::V_LSHRREV_B16_e64: 3886 case AMDGPU::V_LSHRREV_B16_e32_vi: 3887 case AMDGPU::V_LSHRREV_B16_e64_vi: 3888 case AMDGPU::V_LSHRREV_B16_gfx10: 3889 3890 case AMDGPU::V_ASHRREV_I16_e32: 3891 case AMDGPU::V_ASHRREV_I16_e64: 3892 case AMDGPU::V_ASHRREV_I16_e32_vi: 3893 case AMDGPU::V_ASHRREV_I16_e64_vi: 3894 case AMDGPU::V_ASHRREV_I16_gfx10: 3895 3896 case AMDGPU::V_LSHLREV_B64_e64: 3897 case AMDGPU::V_LSHLREV_B64_gfx10: 3898 case AMDGPU::V_LSHLREV_B64_vi: 3899 3900 case AMDGPU::V_LSHRREV_B64_e64: 3901 case AMDGPU::V_LSHRREV_B64_gfx10: 3902 case AMDGPU::V_LSHRREV_B64_vi: 3903 3904 case AMDGPU::V_ASHRREV_I64_e64: 3905 case AMDGPU::V_ASHRREV_I64_gfx10: 3906 case AMDGPU::V_ASHRREV_I64_vi: 3907 3908 case AMDGPU::V_PK_LSHLREV_B16: 3909 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3910 case AMDGPU::V_PK_LSHLREV_B16_vi: 3911 3912 case AMDGPU::V_PK_LSHRREV_B16: 3913 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3914 case AMDGPU::V_PK_LSHRREV_B16_vi: 3915 case AMDGPU::V_PK_ASHRREV_I16: 3916 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3917 case AMDGPU::V_PK_ASHRREV_I16_vi: 3918 return true; 3919 default: 3920 return false; 3921 } 3922 } 3923 3924 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3925 3926 using namespace SIInstrFlags; 3927 const unsigned Opcode = Inst.getOpcode(); 3928 const MCInstrDesc &Desc = MII.get(Opcode); 3929 3930 // lds_direct register is defined so that it can be used 3931 // with 9-bit operands only. Ignore encodings which do not accept these. 3932 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3933 if ((Desc.TSFlags & Enc) == 0) 3934 return None; 3935 3936 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3937 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3938 if (SrcIdx == -1) 3939 break; 3940 const auto &Src = Inst.getOperand(SrcIdx); 3941 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3942 3943 if (isGFX90A()) 3944 return StringRef("lds_direct is not supported on this GPU"); 3945 3946 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3947 return StringRef("lds_direct cannot be used with this instruction"); 3948 3949 if (SrcName != OpName::src0) 3950 return StringRef("lds_direct may be used as src0 only"); 3951 } 3952 } 3953 3954 return None; 3955 } 3956 3957 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3958 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3959 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3960 if (Op.isFlatOffset()) 3961 return Op.getStartLoc(); 3962 } 3963 return getLoc(); 3964 } 3965 3966 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3967 const OperandVector &Operands) { 3968 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3969 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3970 return true; 3971 3972 auto Opcode = Inst.getOpcode(); 3973 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3974 assert(OpNum != -1); 3975 3976 const auto &Op = Inst.getOperand(OpNum); 3977 if (!hasFlatOffsets() && Op.getImm() != 0) { 3978 Error(getFlatOffsetLoc(Operands), 3979 "flat offset modifier is not supported on this GPU"); 3980 return false; 3981 } 3982 3983 // For FLAT segment the offset must be positive; 3984 // MSB is ignored and forced to zero. 3985 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3986 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3987 if (!isIntN(OffsetSize, Op.getImm())) { 3988 Error(getFlatOffsetLoc(Operands), 3989 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3990 return false; 3991 } 3992 } else { 3993 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3994 if (!isUIntN(OffsetSize, Op.getImm())) { 3995 Error(getFlatOffsetLoc(Operands), 3996 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3997 return false; 3998 } 3999 } 4000 4001 return true; 4002 } 4003 4004 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4005 // Start with second operand because SMEM Offset cannot be dst or src0. 4006 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4008 if (Op.isSMEMOffset()) 4009 return Op.getStartLoc(); 4010 } 4011 return getLoc(); 4012 } 4013 4014 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4015 const OperandVector &Operands) { 4016 if (isCI() || isSI()) 4017 return true; 4018 4019 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4020 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4021 return true; 4022 4023 auto Opcode = Inst.getOpcode(); 4024 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4025 if (OpNum == -1) 4026 return true; 4027 4028 const auto &Op = Inst.getOperand(OpNum); 4029 if (!Op.isImm()) 4030 return true; 4031 4032 uint64_t Offset = Op.getImm(); 4033 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4034 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4035 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4036 return true; 4037 4038 Error(getSMEMOffsetLoc(Operands), 4039 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4040 "expected a 21-bit signed offset"); 4041 4042 return false; 4043 } 4044 4045 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4046 unsigned Opcode = Inst.getOpcode(); 4047 const MCInstrDesc &Desc = MII.get(Opcode); 4048 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4049 return true; 4050 4051 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4052 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4053 4054 const int OpIndices[] = { Src0Idx, Src1Idx }; 4055 4056 unsigned NumExprs = 0; 4057 unsigned NumLiterals = 0; 4058 uint32_t LiteralValue; 4059 4060 for (int OpIdx : OpIndices) { 4061 if (OpIdx == -1) break; 4062 4063 const MCOperand &MO = Inst.getOperand(OpIdx); 4064 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4065 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4066 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4067 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4068 if (NumLiterals == 0 || LiteralValue != Value) { 4069 LiteralValue = Value; 4070 ++NumLiterals; 4071 } 4072 } else if (MO.isExpr()) { 4073 ++NumExprs; 4074 } 4075 } 4076 } 4077 4078 return NumLiterals + NumExprs <= 1; 4079 } 4080 4081 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4082 const unsigned Opc = Inst.getOpcode(); 4083 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4084 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4085 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4086 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4087 4088 if (OpSel & ~3) 4089 return false; 4090 } 4091 4092 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4093 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4094 if (OpSelIdx != -1) { 4095 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4096 return false; 4097 } 4098 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4099 if (OpSelHiIdx != -1) { 4100 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4101 return false; 4102 } 4103 } 4104 4105 return true; 4106 } 4107 4108 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4109 const OperandVector &Operands) { 4110 const unsigned Opc = Inst.getOpcode(); 4111 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4112 if (DppCtrlIdx < 0) 4113 return true; 4114 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4115 4116 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4117 // DPP64 is supported for row_newbcast only. 4118 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4119 if (Src0Idx >= 0 && 4120 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4121 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4122 Error(S, "64 bit dpp only supports row_newbcast"); 4123 return false; 4124 } 4125 } 4126 4127 return true; 4128 } 4129 4130 // Check if VCC register matches wavefront size 4131 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4132 auto FB = getFeatureBits(); 4133 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4134 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4135 } 4136 4137 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4138 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4139 const OperandVector &Operands) { 4140 unsigned Opcode = Inst.getOpcode(); 4141 const MCInstrDesc &Desc = MII.get(Opcode); 4142 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4143 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4144 ImmIdx == -1) 4145 return true; 4146 4147 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4148 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4149 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4150 4151 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4152 4153 unsigned NumExprs = 0; 4154 unsigned NumLiterals = 0; 4155 uint32_t LiteralValue; 4156 4157 for (int OpIdx : OpIndices) { 4158 if (OpIdx == -1) 4159 continue; 4160 4161 const MCOperand &MO = Inst.getOperand(OpIdx); 4162 if (!MO.isImm() && !MO.isExpr()) 4163 continue; 4164 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4165 continue; 4166 4167 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4168 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4169 Error(getConstLoc(Operands), 4170 "inline constants are not allowed for this operand"); 4171 return false; 4172 } 4173 4174 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4175 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4176 if (NumLiterals == 0 || LiteralValue != Value) { 4177 LiteralValue = Value; 4178 ++NumLiterals; 4179 } 4180 } else if (MO.isExpr()) { 4181 ++NumExprs; 4182 } 4183 } 4184 NumLiterals += NumExprs; 4185 4186 if (!NumLiterals) 4187 return true; 4188 4189 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4190 Error(getLitLoc(Operands), "literal operands are not supported"); 4191 return false; 4192 } 4193 4194 if (NumLiterals > 1) { 4195 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4196 return false; 4197 } 4198 4199 return true; 4200 } 4201 4202 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4203 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4204 const MCRegisterInfo *MRI) { 4205 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4206 if (OpIdx < 0) 4207 return -1; 4208 4209 const MCOperand &Op = Inst.getOperand(OpIdx); 4210 if (!Op.isReg()) 4211 return -1; 4212 4213 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4214 auto Reg = Sub ? Sub : Op.getReg(); 4215 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4216 return AGPR32.contains(Reg) ? 1 : 0; 4217 } 4218 4219 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4220 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4221 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4222 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4223 SIInstrFlags::DS)) == 0) 4224 return true; 4225 4226 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4227 : AMDGPU::OpName::vdata; 4228 4229 const MCRegisterInfo *MRI = getMRI(); 4230 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4231 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4232 4233 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4234 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4235 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4236 return false; 4237 } 4238 4239 auto FB = getFeatureBits(); 4240 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4241 if (DataAreg < 0 || DstAreg < 0) 4242 return true; 4243 return DstAreg == DataAreg; 4244 } 4245 4246 return DstAreg < 1 && DataAreg < 1; 4247 } 4248 4249 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4250 auto FB = getFeatureBits(); 4251 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4252 return true; 4253 4254 const MCRegisterInfo *MRI = getMRI(); 4255 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4256 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4257 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4258 const MCOperand &Op = Inst.getOperand(I); 4259 if (!Op.isReg()) 4260 continue; 4261 4262 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4263 if (!Sub) 4264 continue; 4265 4266 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4267 return false; 4268 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4269 return false; 4270 } 4271 4272 return true; 4273 } 4274 4275 // gfx90a has an undocumented limitation: 4276 // DS_GWS opcodes must use even aligned registers. 4277 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4278 const OperandVector &Operands) { 4279 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4280 return true; 4281 4282 int Opc = Inst.getOpcode(); 4283 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4284 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4285 return true; 4286 4287 const MCRegisterInfo *MRI = getMRI(); 4288 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4289 int Data0Pos = 4290 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4291 assert(Data0Pos != -1); 4292 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4293 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4294 if (RegIdx & 1) { 4295 SMLoc RegLoc = getRegLoc(Reg, Operands); 4296 Error(RegLoc, "vgpr must be even aligned"); 4297 return false; 4298 } 4299 4300 return true; 4301 } 4302 4303 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4304 const OperandVector &Operands, 4305 const SMLoc &IDLoc) { 4306 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4307 AMDGPU::OpName::cpol); 4308 if (CPolPos == -1) 4309 return true; 4310 4311 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4312 4313 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4314 if ((TSFlags & (SIInstrFlags::SMRD)) && 4315 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4316 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4317 return false; 4318 } 4319 4320 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4321 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4322 StringRef CStr(S.getPointer()); 4323 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4324 Error(S, "scc is not supported on this GPU"); 4325 return false; 4326 } 4327 4328 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4329 return true; 4330 4331 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4332 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4333 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4334 : "instruction must use glc"); 4335 return false; 4336 } 4337 } else { 4338 if (CPol & CPol::GLC) { 4339 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4340 StringRef CStr(S.getPointer()); 4341 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4342 Error(S, isGFX940() ? "instruction must not use sc0" 4343 : "instruction must not use glc"); 4344 return false; 4345 } 4346 } 4347 4348 return true; 4349 } 4350 4351 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4352 const SMLoc &IDLoc, 4353 const OperandVector &Operands) { 4354 if (auto ErrMsg = validateLdsDirect(Inst)) { 4355 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4356 return false; 4357 } 4358 if (!validateSOPLiteral(Inst)) { 4359 Error(getLitLoc(Operands), 4360 "only one literal operand is allowed"); 4361 return false; 4362 } 4363 if (!validateVOPLiteral(Inst, Operands)) { 4364 return false; 4365 } 4366 if (!validateConstantBusLimitations(Inst, Operands)) { 4367 return false; 4368 } 4369 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4370 return false; 4371 } 4372 if (!validateIntClampSupported(Inst)) { 4373 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4374 "integer clamping is not supported on this GPU"); 4375 return false; 4376 } 4377 if (!validateOpSel(Inst)) { 4378 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4379 "invalid op_sel operand"); 4380 return false; 4381 } 4382 if (!validateDPP(Inst, Operands)) { 4383 return false; 4384 } 4385 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4386 if (!validateMIMGD16(Inst)) { 4387 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4388 "d16 modifier is not supported on this GPU"); 4389 return false; 4390 } 4391 if (!validateMIMGDim(Inst)) { 4392 Error(IDLoc, "dim modifier is required on this GPU"); 4393 return false; 4394 } 4395 if (!validateMIMGMSAA(Inst)) { 4396 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4397 "invalid dim; must be MSAA type"); 4398 return false; 4399 } 4400 if (!validateMIMGDataSize(Inst)) { 4401 Error(IDLoc, 4402 "image data size does not match dmask and tfe"); 4403 return false; 4404 } 4405 if (!validateMIMGAddrSize(Inst)) { 4406 Error(IDLoc, 4407 "image address size does not match dim and a16"); 4408 return false; 4409 } 4410 if (!validateMIMGAtomicDMask(Inst)) { 4411 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4412 "invalid atomic image dmask"); 4413 return false; 4414 } 4415 if (!validateMIMGGatherDMask(Inst)) { 4416 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4417 "invalid image_gather dmask: only one bit must be set"); 4418 return false; 4419 } 4420 if (!validateMovrels(Inst, Operands)) { 4421 return false; 4422 } 4423 if (!validateFlatOffset(Inst, Operands)) { 4424 return false; 4425 } 4426 if (!validateSMEMOffset(Inst, Operands)) { 4427 return false; 4428 } 4429 if (!validateMAIAccWrite(Inst, Operands)) { 4430 return false; 4431 } 4432 if (!validateMFMA(Inst, Operands)) { 4433 return false; 4434 } 4435 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4436 return false; 4437 } 4438 4439 if (!validateAGPRLdSt(Inst)) { 4440 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4441 ? "invalid register class: data and dst should be all VGPR or AGPR" 4442 : "invalid register class: agpr loads and stores not supported on this GPU" 4443 ); 4444 return false; 4445 } 4446 if (!validateVGPRAlign(Inst)) { 4447 Error(IDLoc, 4448 "invalid register class: vgpr tuples must be 64 bit aligned"); 4449 return false; 4450 } 4451 if (!validateGWS(Inst, Operands)) { 4452 return false; 4453 } 4454 4455 if (!validateDivScale(Inst)) { 4456 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4457 return false; 4458 } 4459 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4460 return false; 4461 } 4462 4463 return true; 4464 } 4465 4466 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4467 const FeatureBitset &FBS, 4468 unsigned VariantID = 0); 4469 4470 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4471 const FeatureBitset &AvailableFeatures, 4472 unsigned VariantID); 4473 4474 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4475 const FeatureBitset &FBS) { 4476 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4477 } 4478 4479 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4480 const FeatureBitset &FBS, 4481 ArrayRef<unsigned> Variants) { 4482 for (auto Variant : Variants) { 4483 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4484 return true; 4485 } 4486 4487 return false; 4488 } 4489 4490 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4491 const SMLoc &IDLoc) { 4492 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4493 4494 // Check if requested instruction variant is supported. 4495 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4496 return false; 4497 4498 // This instruction is not supported. 4499 // Clear any other pending errors because they are no longer relevant. 4500 getParser().clearPendingErrors(); 4501 4502 // Requested instruction variant is not supported. 4503 // Check if any other variants are supported. 4504 StringRef VariantName = getMatchedVariantName(); 4505 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4506 return Error(IDLoc, 4507 Twine(VariantName, 4508 " variant of this instruction is not supported")); 4509 } 4510 4511 // Finally check if this instruction is supported on any other GPU. 4512 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4513 return Error(IDLoc, "instruction not supported on this GPU"); 4514 } 4515 4516 // Instruction not supported on any GPU. Probably a typo. 4517 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4518 return Error(IDLoc, "invalid instruction" + Suggestion); 4519 } 4520 4521 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4522 OperandVector &Operands, 4523 MCStreamer &Out, 4524 uint64_t &ErrorInfo, 4525 bool MatchingInlineAsm) { 4526 MCInst Inst; 4527 unsigned Result = Match_Success; 4528 for (auto Variant : getMatchedVariants()) { 4529 uint64_t EI; 4530 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4531 Variant); 4532 // We order match statuses from least to most specific. We use most specific 4533 // status as resulting 4534 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4535 if ((R == Match_Success) || 4536 (R == Match_PreferE32) || 4537 (R == Match_MissingFeature && Result != Match_PreferE32) || 4538 (R == Match_InvalidOperand && Result != Match_MissingFeature 4539 && Result != Match_PreferE32) || 4540 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4541 && Result != Match_MissingFeature 4542 && Result != Match_PreferE32)) { 4543 Result = R; 4544 ErrorInfo = EI; 4545 } 4546 if (R == Match_Success) 4547 break; 4548 } 4549 4550 if (Result == Match_Success) { 4551 if (!validateInstruction(Inst, IDLoc, Operands)) { 4552 return true; 4553 } 4554 Inst.setLoc(IDLoc); 4555 Out.emitInstruction(Inst, getSTI()); 4556 return false; 4557 } 4558 4559 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4560 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4561 return true; 4562 } 4563 4564 switch (Result) { 4565 default: break; 4566 case Match_MissingFeature: 4567 // It has been verified that the specified instruction 4568 // mnemonic is valid. A match was found but it requires 4569 // features which are not supported on this GPU. 4570 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4571 4572 case Match_InvalidOperand: { 4573 SMLoc ErrorLoc = IDLoc; 4574 if (ErrorInfo != ~0ULL) { 4575 if (ErrorInfo >= Operands.size()) { 4576 return Error(IDLoc, "too few operands for instruction"); 4577 } 4578 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4579 if (ErrorLoc == SMLoc()) 4580 ErrorLoc = IDLoc; 4581 } 4582 return Error(ErrorLoc, "invalid operand for instruction"); 4583 } 4584 4585 case Match_PreferE32: 4586 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4587 "should be encoded as e32"); 4588 case Match_MnemonicFail: 4589 llvm_unreachable("Invalid instructions should have been handled already"); 4590 } 4591 llvm_unreachable("Implement any new match types added!"); 4592 } 4593 4594 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4595 int64_t Tmp = -1; 4596 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4597 return true; 4598 } 4599 if (getParser().parseAbsoluteExpression(Tmp)) { 4600 return true; 4601 } 4602 Ret = static_cast<uint32_t>(Tmp); 4603 return false; 4604 } 4605 4606 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4607 uint32_t &Minor) { 4608 if (ParseAsAbsoluteExpression(Major)) 4609 return TokError("invalid major version"); 4610 4611 if (!trySkipToken(AsmToken::Comma)) 4612 return TokError("minor version number required, comma expected"); 4613 4614 if (ParseAsAbsoluteExpression(Minor)) 4615 return TokError("invalid minor version"); 4616 4617 return false; 4618 } 4619 4620 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4621 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4622 return TokError("directive only supported for amdgcn architecture"); 4623 4624 std::string TargetIDDirective; 4625 SMLoc TargetStart = getTok().getLoc(); 4626 if (getParser().parseEscapedString(TargetIDDirective)) 4627 return true; 4628 4629 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4630 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4631 return getParser().Error(TargetRange.Start, 4632 (Twine(".amdgcn_target directive's target id ") + 4633 Twine(TargetIDDirective) + 4634 Twine(" does not match the specified target id ") + 4635 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4636 4637 return false; 4638 } 4639 4640 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4641 return Error(Range.Start, "value out of range", Range); 4642 } 4643 4644 bool AMDGPUAsmParser::calculateGPRBlocks( 4645 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4646 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4647 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4648 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4649 // TODO(scott.linder): These calculations are duplicated from 4650 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4651 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4652 4653 unsigned NumVGPRs = NextFreeVGPR; 4654 unsigned NumSGPRs = NextFreeSGPR; 4655 4656 if (Version.Major >= 10) 4657 NumSGPRs = 0; 4658 else { 4659 unsigned MaxAddressableNumSGPRs = 4660 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4661 4662 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4663 NumSGPRs > MaxAddressableNumSGPRs) 4664 return OutOfRangeError(SGPRRange); 4665 4666 NumSGPRs += 4667 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4668 4669 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4670 NumSGPRs > MaxAddressableNumSGPRs) 4671 return OutOfRangeError(SGPRRange); 4672 4673 if (Features.test(FeatureSGPRInitBug)) 4674 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4675 } 4676 4677 VGPRBlocks = 4678 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4679 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4680 4681 return false; 4682 } 4683 4684 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4685 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4686 return TokError("directive only supported for amdgcn architecture"); 4687 4688 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4689 return TokError("directive only supported for amdhsa OS"); 4690 4691 StringRef KernelName; 4692 if (getParser().parseIdentifier(KernelName)) 4693 return true; 4694 4695 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4696 4697 StringSet<> Seen; 4698 4699 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4700 4701 SMRange VGPRRange; 4702 uint64_t NextFreeVGPR = 0; 4703 uint64_t AccumOffset = 0; 4704 uint64_t SharedVGPRCount = 0; 4705 SMRange SGPRRange; 4706 uint64_t NextFreeSGPR = 0; 4707 4708 // Count the number of user SGPRs implied from the enabled feature bits. 4709 unsigned ImpliedUserSGPRCount = 0; 4710 4711 // Track if the asm explicitly contains the directive for the user SGPR 4712 // count. 4713 Optional<unsigned> ExplicitUserSGPRCount; 4714 bool ReserveVCC = true; 4715 bool ReserveFlatScr = true; 4716 Optional<bool> EnableWavefrontSize32; 4717 4718 while (true) { 4719 while (trySkipToken(AsmToken::EndOfStatement)); 4720 4721 StringRef ID; 4722 SMRange IDRange = getTok().getLocRange(); 4723 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4724 return true; 4725 4726 if (ID == ".end_amdhsa_kernel") 4727 break; 4728 4729 if (Seen.find(ID) != Seen.end()) 4730 return TokError(".amdhsa_ directives cannot be repeated"); 4731 Seen.insert(ID); 4732 4733 SMLoc ValStart = getLoc(); 4734 int64_t IVal; 4735 if (getParser().parseAbsoluteExpression(IVal)) 4736 return true; 4737 SMLoc ValEnd = getLoc(); 4738 SMRange ValRange = SMRange(ValStart, ValEnd); 4739 4740 if (IVal < 0) 4741 return OutOfRangeError(ValRange); 4742 4743 uint64_t Val = IVal; 4744 4745 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4746 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4747 return OutOfRangeError(RANGE); \ 4748 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4749 4750 if (ID == ".amdhsa_group_segment_fixed_size") { 4751 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4752 return OutOfRangeError(ValRange); 4753 KD.group_segment_fixed_size = Val; 4754 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4755 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4756 return OutOfRangeError(ValRange); 4757 KD.private_segment_fixed_size = Val; 4758 } else if (ID == ".amdhsa_kernarg_size") { 4759 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4760 return OutOfRangeError(ValRange); 4761 KD.kernarg_size = Val; 4762 } else if (ID == ".amdhsa_user_sgpr_count") { 4763 ExplicitUserSGPRCount = Val; 4764 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4765 if (hasArchitectedFlatScratch()) 4766 return Error(IDRange.Start, 4767 "directive is not supported with architected flat scratch", 4768 IDRange); 4769 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4770 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4771 Val, ValRange); 4772 if (Val) 4773 ImpliedUserSGPRCount += 4; 4774 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4775 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4776 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4777 ValRange); 4778 if (Val) 4779 ImpliedUserSGPRCount += 2; 4780 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4781 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4782 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4783 ValRange); 4784 if (Val) 4785 ImpliedUserSGPRCount += 2; 4786 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4787 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4788 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4789 Val, ValRange); 4790 if (Val) 4791 ImpliedUserSGPRCount += 2; 4792 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4793 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4794 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4795 ValRange); 4796 if (Val) 4797 ImpliedUserSGPRCount += 2; 4798 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4799 if (hasArchitectedFlatScratch()) 4800 return Error(IDRange.Start, 4801 "directive is not supported with architected flat scratch", 4802 IDRange); 4803 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4804 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4805 ValRange); 4806 if (Val) 4807 ImpliedUserSGPRCount += 2; 4808 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4809 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4810 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4811 Val, ValRange); 4812 if (Val) 4813 ImpliedUserSGPRCount += 1; 4814 } else if (ID == ".amdhsa_wavefront_size32") { 4815 if (IVersion.Major < 10) 4816 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4817 EnableWavefrontSize32 = Val; 4818 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4819 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4820 Val, ValRange); 4821 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4822 if (hasArchitectedFlatScratch()) 4823 return Error(IDRange.Start, 4824 "directive is not supported with architected flat scratch", 4825 IDRange); 4826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4827 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4828 } else if (ID == ".amdhsa_enable_private_segment") { 4829 if (!hasArchitectedFlatScratch()) 4830 return Error( 4831 IDRange.Start, 4832 "directive is not supported without architected flat scratch", 4833 IDRange); 4834 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4835 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4836 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4837 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4838 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4839 ValRange); 4840 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4841 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4842 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4843 ValRange); 4844 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4845 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4846 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4847 ValRange); 4848 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4849 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4850 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4851 ValRange); 4852 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4853 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4854 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4855 ValRange); 4856 } else if (ID == ".amdhsa_next_free_vgpr") { 4857 VGPRRange = ValRange; 4858 NextFreeVGPR = Val; 4859 } else if (ID == ".amdhsa_next_free_sgpr") { 4860 SGPRRange = ValRange; 4861 NextFreeSGPR = Val; 4862 } else if (ID == ".amdhsa_accum_offset") { 4863 if (!isGFX90A()) 4864 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4865 AccumOffset = Val; 4866 } else if (ID == ".amdhsa_reserve_vcc") { 4867 if (!isUInt<1>(Val)) 4868 return OutOfRangeError(ValRange); 4869 ReserveVCC = Val; 4870 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4871 if (IVersion.Major < 7) 4872 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4873 if (hasArchitectedFlatScratch()) 4874 return Error(IDRange.Start, 4875 "directive is not supported with architected flat scratch", 4876 IDRange); 4877 if (!isUInt<1>(Val)) 4878 return OutOfRangeError(ValRange); 4879 ReserveFlatScr = Val; 4880 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4881 if (IVersion.Major < 8) 4882 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4883 if (!isUInt<1>(Val)) 4884 return OutOfRangeError(ValRange); 4885 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4886 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4887 IDRange); 4888 } else if (ID == ".amdhsa_float_round_mode_32") { 4889 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4890 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4891 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4892 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4893 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4894 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4895 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4896 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4897 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4899 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4900 ValRange); 4901 } else if (ID == ".amdhsa_dx10_clamp") { 4902 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4903 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4904 } else if (ID == ".amdhsa_ieee_mode") { 4905 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4906 Val, ValRange); 4907 } else if (ID == ".amdhsa_fp16_overflow") { 4908 if (IVersion.Major < 9) 4909 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4910 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4911 ValRange); 4912 } else if (ID == ".amdhsa_tg_split") { 4913 if (!isGFX90A()) 4914 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4915 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4916 ValRange); 4917 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4918 if (IVersion.Major < 10) 4919 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4920 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4921 ValRange); 4922 } else if (ID == ".amdhsa_memory_ordered") { 4923 if (IVersion.Major < 10) 4924 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4925 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4926 ValRange); 4927 } else if (ID == ".amdhsa_forward_progress") { 4928 if (IVersion.Major < 10) 4929 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4930 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4931 ValRange); 4932 } else if (ID == ".amdhsa_shared_vgpr_count") { 4933 if (IVersion.Major < 10) 4934 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4935 SharedVGPRCount = Val; 4936 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4937 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4938 ValRange); 4939 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4940 PARSE_BITS_ENTRY( 4941 KD.compute_pgm_rsrc2, 4942 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4943 ValRange); 4944 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4946 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4947 Val, ValRange); 4948 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4949 PARSE_BITS_ENTRY( 4950 KD.compute_pgm_rsrc2, 4951 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4952 ValRange); 4953 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4954 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4955 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4956 Val, ValRange); 4957 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4959 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4960 Val, ValRange); 4961 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4963 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4964 Val, ValRange); 4965 } else if (ID == ".amdhsa_exception_int_div_zero") { 4966 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4967 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4968 Val, ValRange); 4969 } else { 4970 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4971 } 4972 4973 #undef PARSE_BITS_ENTRY 4974 } 4975 4976 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4977 return TokError(".amdhsa_next_free_vgpr directive is required"); 4978 4979 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4980 return TokError(".amdhsa_next_free_sgpr directive is required"); 4981 4982 unsigned VGPRBlocks; 4983 unsigned SGPRBlocks; 4984 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4985 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4986 EnableWavefrontSize32, NextFreeVGPR, 4987 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4988 SGPRBlocks)) 4989 return true; 4990 4991 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4992 VGPRBlocks)) 4993 return OutOfRangeError(VGPRRange); 4994 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4995 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4996 4997 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4998 SGPRBlocks)) 4999 return OutOfRangeError(SGPRRange); 5000 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5001 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5002 SGPRBlocks); 5003 5004 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5005 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5006 "enabled user SGPRs"); 5007 5008 unsigned UserSGPRCount = 5009 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5010 5011 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5012 return TokError("too many user SGPRs enabled"); 5013 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5014 UserSGPRCount); 5015 5016 if (isGFX90A()) { 5017 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5018 return TokError(".amdhsa_accum_offset directive is required"); 5019 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5020 return TokError("accum_offset should be in range [4..256] in " 5021 "increments of 4"); 5022 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5023 return TokError("accum_offset exceeds total VGPR allocation"); 5024 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5025 (AccumOffset / 4 - 1)); 5026 } 5027 5028 if (IVersion.Major == 10) { 5029 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5030 if (SharedVGPRCount && EnableWavefrontSize32) { 5031 return TokError("shared_vgpr_count directive not valid on " 5032 "wavefront size 32"); 5033 } 5034 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5035 return TokError("shared_vgpr_count*2 + " 5036 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5037 "exceed 63\n"); 5038 } 5039 } 5040 5041 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5042 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5043 ReserveFlatScr); 5044 return false; 5045 } 5046 5047 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5048 uint32_t Major; 5049 uint32_t Minor; 5050 5051 if (ParseDirectiveMajorMinor(Major, Minor)) 5052 return true; 5053 5054 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5055 return false; 5056 } 5057 5058 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5059 uint32_t Major; 5060 uint32_t Minor; 5061 uint32_t Stepping; 5062 StringRef VendorName; 5063 StringRef ArchName; 5064 5065 // If this directive has no arguments, then use the ISA version for the 5066 // targeted GPU. 5067 if (isToken(AsmToken::EndOfStatement)) { 5068 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5069 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5070 ISA.Stepping, 5071 "AMD", "AMDGPU"); 5072 return false; 5073 } 5074 5075 if (ParseDirectiveMajorMinor(Major, Minor)) 5076 return true; 5077 5078 if (!trySkipToken(AsmToken::Comma)) 5079 return TokError("stepping version number required, comma expected"); 5080 5081 if (ParseAsAbsoluteExpression(Stepping)) 5082 return TokError("invalid stepping version"); 5083 5084 if (!trySkipToken(AsmToken::Comma)) 5085 return TokError("vendor name required, comma expected"); 5086 5087 if (!parseString(VendorName, "invalid vendor name")) 5088 return true; 5089 5090 if (!trySkipToken(AsmToken::Comma)) 5091 return TokError("arch name required, comma expected"); 5092 5093 if (!parseString(ArchName, "invalid arch name")) 5094 return true; 5095 5096 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5097 VendorName, ArchName); 5098 return false; 5099 } 5100 5101 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5102 amd_kernel_code_t &Header) { 5103 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5104 // assembly for backwards compatibility. 5105 if (ID == "max_scratch_backing_memory_byte_size") { 5106 Parser.eatToEndOfStatement(); 5107 return false; 5108 } 5109 5110 SmallString<40> ErrStr; 5111 raw_svector_ostream Err(ErrStr); 5112 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5113 return TokError(Err.str()); 5114 } 5115 Lex(); 5116 5117 if (ID == "enable_wavefront_size32") { 5118 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5119 if (!isGFX10Plus()) 5120 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5121 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5122 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5123 } else { 5124 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5125 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5126 } 5127 } 5128 5129 if (ID == "wavefront_size") { 5130 if (Header.wavefront_size == 5) { 5131 if (!isGFX10Plus()) 5132 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5133 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5134 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5135 } else if (Header.wavefront_size == 6) { 5136 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5137 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5138 } 5139 } 5140 5141 if (ID == "enable_wgp_mode") { 5142 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5143 !isGFX10Plus()) 5144 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5145 } 5146 5147 if (ID == "enable_mem_ordered") { 5148 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5149 !isGFX10Plus()) 5150 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5151 } 5152 5153 if (ID == "enable_fwd_progress") { 5154 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5155 !isGFX10Plus()) 5156 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5157 } 5158 5159 return false; 5160 } 5161 5162 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5163 amd_kernel_code_t Header; 5164 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5165 5166 while (true) { 5167 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5168 // will set the current token to EndOfStatement. 5169 while(trySkipToken(AsmToken::EndOfStatement)); 5170 5171 StringRef ID; 5172 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5173 return true; 5174 5175 if (ID == ".end_amd_kernel_code_t") 5176 break; 5177 5178 if (ParseAMDKernelCodeTValue(ID, Header)) 5179 return true; 5180 } 5181 5182 getTargetStreamer().EmitAMDKernelCodeT(Header); 5183 5184 return false; 5185 } 5186 5187 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5188 StringRef KernelName; 5189 if (!parseId(KernelName, "expected symbol name")) 5190 return true; 5191 5192 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5193 ELF::STT_AMDGPU_HSA_KERNEL); 5194 5195 KernelScope.initialize(getContext()); 5196 return false; 5197 } 5198 5199 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5200 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5201 return Error(getLoc(), 5202 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5203 "architectures"); 5204 } 5205 5206 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5207 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5208 return Error(getParser().getTok().getLoc(), "target id must match options"); 5209 5210 getTargetStreamer().EmitISAVersion(); 5211 Lex(); 5212 5213 return false; 5214 } 5215 5216 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5217 const char *AssemblerDirectiveBegin; 5218 const char *AssemblerDirectiveEnd; 5219 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5220 isHsaAbiVersion3AndAbove(&getSTI()) 5221 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5222 HSAMD::V3::AssemblerDirectiveEnd) 5223 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5224 HSAMD::AssemblerDirectiveEnd); 5225 5226 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5227 return Error(getLoc(), 5228 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5229 "not available on non-amdhsa OSes")).str()); 5230 } 5231 5232 std::string HSAMetadataString; 5233 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5234 HSAMetadataString)) 5235 return true; 5236 5237 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5238 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5239 return Error(getLoc(), "invalid HSA metadata"); 5240 } else { 5241 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5242 return Error(getLoc(), "invalid HSA metadata"); 5243 } 5244 5245 return false; 5246 } 5247 5248 /// Common code to parse out a block of text (typically YAML) between start and 5249 /// end directives. 5250 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5251 const char *AssemblerDirectiveEnd, 5252 std::string &CollectString) { 5253 5254 raw_string_ostream CollectStream(CollectString); 5255 5256 getLexer().setSkipSpace(false); 5257 5258 bool FoundEnd = false; 5259 while (!isToken(AsmToken::Eof)) { 5260 while (isToken(AsmToken::Space)) { 5261 CollectStream << getTokenStr(); 5262 Lex(); 5263 } 5264 5265 if (trySkipId(AssemblerDirectiveEnd)) { 5266 FoundEnd = true; 5267 break; 5268 } 5269 5270 CollectStream << Parser.parseStringToEndOfStatement() 5271 << getContext().getAsmInfo()->getSeparatorString(); 5272 5273 Parser.eatToEndOfStatement(); 5274 } 5275 5276 getLexer().setSkipSpace(true); 5277 5278 if (isToken(AsmToken::Eof) && !FoundEnd) { 5279 return TokError(Twine("expected directive ") + 5280 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5281 } 5282 5283 CollectStream.flush(); 5284 return false; 5285 } 5286 5287 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5288 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5289 std::string String; 5290 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5291 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5292 return true; 5293 5294 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5295 if (!PALMetadata->setFromString(String)) 5296 return Error(getLoc(), "invalid PAL metadata"); 5297 return false; 5298 } 5299 5300 /// Parse the assembler directive for old linear-format PAL metadata. 5301 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5302 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5303 return Error(getLoc(), 5304 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5305 "not available on non-amdpal OSes")).str()); 5306 } 5307 5308 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5309 PALMetadata->setLegacy(); 5310 for (;;) { 5311 uint32_t Key, Value; 5312 if (ParseAsAbsoluteExpression(Key)) { 5313 return TokError(Twine("invalid value in ") + 5314 Twine(PALMD::AssemblerDirective)); 5315 } 5316 if (!trySkipToken(AsmToken::Comma)) { 5317 return TokError(Twine("expected an even number of values in ") + 5318 Twine(PALMD::AssemblerDirective)); 5319 } 5320 if (ParseAsAbsoluteExpression(Value)) { 5321 return TokError(Twine("invalid value in ") + 5322 Twine(PALMD::AssemblerDirective)); 5323 } 5324 PALMetadata->setRegister(Key, Value); 5325 if (!trySkipToken(AsmToken::Comma)) 5326 break; 5327 } 5328 return false; 5329 } 5330 5331 /// ParseDirectiveAMDGPULDS 5332 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5333 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5334 if (getParser().checkForValidSection()) 5335 return true; 5336 5337 StringRef Name; 5338 SMLoc NameLoc = getLoc(); 5339 if (getParser().parseIdentifier(Name)) 5340 return TokError("expected identifier in directive"); 5341 5342 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5343 if (parseToken(AsmToken::Comma, "expected ','")) 5344 return true; 5345 5346 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5347 5348 int64_t Size; 5349 SMLoc SizeLoc = getLoc(); 5350 if (getParser().parseAbsoluteExpression(Size)) 5351 return true; 5352 if (Size < 0) 5353 return Error(SizeLoc, "size must be non-negative"); 5354 if (Size > LocalMemorySize) 5355 return Error(SizeLoc, "size is too large"); 5356 5357 int64_t Alignment = 4; 5358 if (trySkipToken(AsmToken::Comma)) { 5359 SMLoc AlignLoc = getLoc(); 5360 if (getParser().parseAbsoluteExpression(Alignment)) 5361 return true; 5362 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5363 return Error(AlignLoc, "alignment must be a power of two"); 5364 5365 // Alignment larger than the size of LDS is possible in theory, as long 5366 // as the linker manages to place to symbol at address 0, but we do want 5367 // to make sure the alignment fits nicely into a 32-bit integer. 5368 if (Alignment >= 1u << 31) 5369 return Error(AlignLoc, "alignment is too large"); 5370 } 5371 5372 if (parseToken(AsmToken::EndOfStatement, 5373 "unexpected token in '.amdgpu_lds' directive")) 5374 return true; 5375 5376 Symbol->redefineIfPossible(); 5377 if (!Symbol->isUndefined()) 5378 return Error(NameLoc, "invalid symbol redefinition"); 5379 5380 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5381 return false; 5382 } 5383 5384 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5385 StringRef IDVal = DirectiveID.getString(); 5386 5387 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5388 if (IDVal == ".amdhsa_kernel") 5389 return ParseDirectiveAMDHSAKernel(); 5390 5391 // TODO: Restructure/combine with PAL metadata directive. 5392 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5393 return ParseDirectiveHSAMetadata(); 5394 } else { 5395 if (IDVal == ".hsa_code_object_version") 5396 return ParseDirectiveHSACodeObjectVersion(); 5397 5398 if (IDVal == ".hsa_code_object_isa") 5399 return ParseDirectiveHSACodeObjectISA(); 5400 5401 if (IDVal == ".amd_kernel_code_t") 5402 return ParseDirectiveAMDKernelCodeT(); 5403 5404 if (IDVal == ".amdgpu_hsa_kernel") 5405 return ParseDirectiveAMDGPUHsaKernel(); 5406 5407 if (IDVal == ".amd_amdgpu_isa") 5408 return ParseDirectiveISAVersion(); 5409 5410 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5411 return ParseDirectiveHSAMetadata(); 5412 } 5413 5414 if (IDVal == ".amdgcn_target") 5415 return ParseDirectiveAMDGCNTarget(); 5416 5417 if (IDVal == ".amdgpu_lds") 5418 return ParseDirectiveAMDGPULDS(); 5419 5420 if (IDVal == PALMD::AssemblerDirectiveBegin) 5421 return ParseDirectivePALMetadataBegin(); 5422 5423 if (IDVal == PALMD::AssemblerDirective) 5424 return ParseDirectivePALMetadata(); 5425 5426 return true; 5427 } 5428 5429 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5430 unsigned RegNo) { 5431 5432 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5433 return isGFX9Plus(); 5434 5435 // GFX10 has 2 more SGPRs 104 and 105. 5436 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5437 return hasSGPR104_SGPR105(); 5438 5439 switch (RegNo) { 5440 case AMDGPU::SRC_SHARED_BASE: 5441 case AMDGPU::SRC_SHARED_LIMIT: 5442 case AMDGPU::SRC_PRIVATE_BASE: 5443 case AMDGPU::SRC_PRIVATE_LIMIT: 5444 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5445 return isGFX9Plus(); 5446 case AMDGPU::TBA: 5447 case AMDGPU::TBA_LO: 5448 case AMDGPU::TBA_HI: 5449 case AMDGPU::TMA: 5450 case AMDGPU::TMA_LO: 5451 case AMDGPU::TMA_HI: 5452 return !isGFX9Plus(); 5453 case AMDGPU::XNACK_MASK: 5454 case AMDGPU::XNACK_MASK_LO: 5455 case AMDGPU::XNACK_MASK_HI: 5456 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5457 case AMDGPU::SGPR_NULL: 5458 return isGFX10Plus(); 5459 default: 5460 break; 5461 } 5462 5463 if (isCI()) 5464 return true; 5465 5466 if (isSI() || isGFX10Plus()) { 5467 // No flat_scr on SI. 5468 // On GFX10 flat scratch is not a valid register operand and can only be 5469 // accessed with s_setreg/s_getreg. 5470 switch (RegNo) { 5471 case AMDGPU::FLAT_SCR: 5472 case AMDGPU::FLAT_SCR_LO: 5473 case AMDGPU::FLAT_SCR_HI: 5474 return false; 5475 default: 5476 return true; 5477 } 5478 } 5479 5480 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5481 // SI/CI have. 5482 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5483 return hasSGPR102_SGPR103(); 5484 5485 return true; 5486 } 5487 5488 OperandMatchResultTy 5489 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5490 OperandMode Mode) { 5491 // Try to parse with a custom parser 5492 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5493 5494 // If we successfully parsed the operand or if there as an error parsing, 5495 // we are done. 5496 // 5497 // If we are parsing after we reach EndOfStatement then this means we 5498 // are appending default values to the Operands list. This is only done 5499 // by custom parser, so we shouldn't continue on to the generic parsing. 5500 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5501 isToken(AsmToken::EndOfStatement)) 5502 return ResTy; 5503 5504 SMLoc RBraceLoc; 5505 SMLoc LBraceLoc = getLoc(); 5506 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5507 unsigned Prefix = Operands.size(); 5508 5509 for (;;) { 5510 auto Loc = getLoc(); 5511 ResTy = parseReg(Operands); 5512 if (ResTy == MatchOperand_NoMatch) 5513 Error(Loc, "expected a register"); 5514 if (ResTy != MatchOperand_Success) 5515 return MatchOperand_ParseFail; 5516 5517 RBraceLoc = getLoc(); 5518 if (trySkipToken(AsmToken::RBrac)) 5519 break; 5520 5521 if (!skipToken(AsmToken::Comma, 5522 "expected a comma or a closing square bracket")) { 5523 return MatchOperand_ParseFail; 5524 } 5525 } 5526 5527 if (Operands.size() - Prefix > 1) { 5528 Operands.insert(Operands.begin() + Prefix, 5529 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5530 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5531 } 5532 5533 return MatchOperand_Success; 5534 } 5535 5536 return parseRegOrImm(Operands); 5537 } 5538 5539 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5540 // Clear any forced encodings from the previous instruction. 5541 setForcedEncodingSize(0); 5542 setForcedDPP(false); 5543 setForcedSDWA(false); 5544 5545 if (Name.endswith("_e64")) { 5546 setForcedEncodingSize(64); 5547 return Name.substr(0, Name.size() - 4); 5548 } else if (Name.endswith("_e32")) { 5549 setForcedEncodingSize(32); 5550 return Name.substr(0, Name.size() - 4); 5551 } else if (Name.endswith("_dpp")) { 5552 setForcedDPP(true); 5553 return Name.substr(0, Name.size() - 4); 5554 } else if (Name.endswith("_sdwa")) { 5555 setForcedSDWA(true); 5556 return Name.substr(0, Name.size() - 5); 5557 } 5558 return Name; 5559 } 5560 5561 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5562 StringRef Name, 5563 SMLoc NameLoc, OperandVector &Operands) { 5564 // Add the instruction mnemonic 5565 Name = parseMnemonicSuffix(Name); 5566 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5567 5568 bool IsMIMG = Name.startswith("image_"); 5569 5570 while (!trySkipToken(AsmToken::EndOfStatement)) { 5571 OperandMode Mode = OperandMode_Default; 5572 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5573 Mode = OperandMode_NSA; 5574 CPolSeen = 0; 5575 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5576 5577 if (Res != MatchOperand_Success) { 5578 checkUnsupportedInstruction(Name, NameLoc); 5579 if (!Parser.hasPendingError()) { 5580 // FIXME: use real operand location rather than the current location. 5581 StringRef Msg = 5582 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5583 "not a valid operand."; 5584 Error(getLoc(), Msg); 5585 } 5586 while (!trySkipToken(AsmToken::EndOfStatement)) { 5587 lex(); 5588 } 5589 return true; 5590 } 5591 5592 // Eat the comma or space if there is one. 5593 trySkipToken(AsmToken::Comma); 5594 } 5595 5596 return false; 5597 } 5598 5599 //===----------------------------------------------------------------------===// 5600 // Utility functions 5601 //===----------------------------------------------------------------------===// 5602 5603 OperandMatchResultTy 5604 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5605 5606 if (!trySkipId(Prefix, AsmToken::Colon)) 5607 return MatchOperand_NoMatch; 5608 5609 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5610 } 5611 5612 OperandMatchResultTy 5613 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5614 AMDGPUOperand::ImmTy ImmTy, 5615 bool (*ConvertResult)(int64_t&)) { 5616 SMLoc S = getLoc(); 5617 int64_t Value = 0; 5618 5619 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5620 if (Res != MatchOperand_Success) 5621 return Res; 5622 5623 if (ConvertResult && !ConvertResult(Value)) { 5624 Error(S, "invalid " + StringRef(Prefix) + " value."); 5625 } 5626 5627 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5628 return MatchOperand_Success; 5629 } 5630 5631 OperandMatchResultTy 5632 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5633 OperandVector &Operands, 5634 AMDGPUOperand::ImmTy ImmTy, 5635 bool (*ConvertResult)(int64_t&)) { 5636 SMLoc S = getLoc(); 5637 if (!trySkipId(Prefix, AsmToken::Colon)) 5638 return MatchOperand_NoMatch; 5639 5640 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5641 return MatchOperand_ParseFail; 5642 5643 unsigned Val = 0; 5644 const unsigned MaxSize = 4; 5645 5646 // FIXME: How to verify the number of elements matches the number of src 5647 // operands? 5648 for (int I = 0; ; ++I) { 5649 int64_t Op; 5650 SMLoc Loc = getLoc(); 5651 if (!parseExpr(Op)) 5652 return MatchOperand_ParseFail; 5653 5654 if (Op != 0 && Op != 1) { 5655 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5656 return MatchOperand_ParseFail; 5657 } 5658 5659 Val |= (Op << I); 5660 5661 if (trySkipToken(AsmToken::RBrac)) 5662 break; 5663 5664 if (I + 1 == MaxSize) { 5665 Error(getLoc(), "expected a closing square bracket"); 5666 return MatchOperand_ParseFail; 5667 } 5668 5669 if (!skipToken(AsmToken::Comma, "expected a comma")) 5670 return MatchOperand_ParseFail; 5671 } 5672 5673 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5674 return MatchOperand_Success; 5675 } 5676 5677 OperandMatchResultTy 5678 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5679 AMDGPUOperand::ImmTy ImmTy) { 5680 int64_t Bit; 5681 SMLoc S = getLoc(); 5682 5683 if (trySkipId(Name)) { 5684 Bit = 1; 5685 } else if (trySkipId("no", Name)) { 5686 Bit = 0; 5687 } else { 5688 return MatchOperand_NoMatch; 5689 } 5690 5691 if (Name == "r128" && !hasMIMG_R128()) { 5692 Error(S, "r128 modifier is not supported on this GPU"); 5693 return MatchOperand_ParseFail; 5694 } 5695 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5696 Error(S, "a16 modifier is not supported on this GPU"); 5697 return MatchOperand_ParseFail; 5698 } 5699 5700 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5701 ImmTy = AMDGPUOperand::ImmTyR128A16; 5702 5703 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5704 return MatchOperand_Success; 5705 } 5706 5707 OperandMatchResultTy 5708 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5709 unsigned CPolOn = 0; 5710 unsigned CPolOff = 0; 5711 SMLoc S = getLoc(); 5712 5713 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5714 if (isGFX940() && !Mnemo.startswith("s_")) { 5715 if (trySkipId("sc0")) 5716 CPolOn = AMDGPU::CPol::SC0; 5717 else if (trySkipId("nosc0")) 5718 CPolOff = AMDGPU::CPol::SC0; 5719 else if (trySkipId("nt")) 5720 CPolOn = AMDGPU::CPol::NT; 5721 else if (trySkipId("nont")) 5722 CPolOff = AMDGPU::CPol::NT; 5723 else if (trySkipId("sc1")) 5724 CPolOn = AMDGPU::CPol::SC1; 5725 else if (trySkipId("nosc1")) 5726 CPolOff = AMDGPU::CPol::SC1; 5727 else 5728 return MatchOperand_NoMatch; 5729 } 5730 else if (trySkipId("glc")) 5731 CPolOn = AMDGPU::CPol::GLC; 5732 else if (trySkipId("noglc")) 5733 CPolOff = AMDGPU::CPol::GLC; 5734 else if (trySkipId("slc")) 5735 CPolOn = AMDGPU::CPol::SLC; 5736 else if (trySkipId("noslc")) 5737 CPolOff = AMDGPU::CPol::SLC; 5738 else if (trySkipId("dlc")) 5739 CPolOn = AMDGPU::CPol::DLC; 5740 else if (trySkipId("nodlc")) 5741 CPolOff = AMDGPU::CPol::DLC; 5742 else if (trySkipId("scc")) 5743 CPolOn = AMDGPU::CPol::SCC; 5744 else if (trySkipId("noscc")) 5745 CPolOff = AMDGPU::CPol::SCC; 5746 else 5747 return MatchOperand_NoMatch; 5748 5749 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5750 Error(S, "dlc modifier is not supported on this GPU"); 5751 return MatchOperand_ParseFail; 5752 } 5753 5754 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5755 Error(S, "scc modifier is not supported on this GPU"); 5756 return MatchOperand_ParseFail; 5757 } 5758 5759 if (CPolSeen & (CPolOn | CPolOff)) { 5760 Error(S, "duplicate cache policy modifier"); 5761 return MatchOperand_ParseFail; 5762 } 5763 5764 CPolSeen |= (CPolOn | CPolOff); 5765 5766 for (unsigned I = 1; I != Operands.size(); ++I) { 5767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5768 if (Op.isCPol()) { 5769 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5770 return MatchOperand_Success; 5771 } 5772 } 5773 5774 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5775 AMDGPUOperand::ImmTyCPol)); 5776 5777 return MatchOperand_Success; 5778 } 5779 5780 static void addOptionalImmOperand( 5781 MCInst& Inst, const OperandVector& Operands, 5782 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5783 AMDGPUOperand::ImmTy ImmT, 5784 int64_t Default = 0) { 5785 auto i = OptionalIdx.find(ImmT); 5786 if (i != OptionalIdx.end()) { 5787 unsigned Idx = i->second; 5788 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5789 } else { 5790 Inst.addOperand(MCOperand::createImm(Default)); 5791 } 5792 } 5793 5794 OperandMatchResultTy 5795 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5796 StringRef &Value, 5797 SMLoc &StringLoc) { 5798 if (!trySkipId(Prefix, AsmToken::Colon)) 5799 return MatchOperand_NoMatch; 5800 5801 StringLoc = getLoc(); 5802 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5803 : MatchOperand_ParseFail; 5804 } 5805 5806 //===----------------------------------------------------------------------===// 5807 // MTBUF format 5808 //===----------------------------------------------------------------------===// 5809 5810 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5811 int64_t MaxVal, 5812 int64_t &Fmt) { 5813 int64_t Val; 5814 SMLoc Loc = getLoc(); 5815 5816 auto Res = parseIntWithPrefix(Pref, Val); 5817 if (Res == MatchOperand_ParseFail) 5818 return false; 5819 if (Res == MatchOperand_NoMatch) 5820 return true; 5821 5822 if (Val < 0 || Val > MaxVal) { 5823 Error(Loc, Twine("out of range ", StringRef(Pref))); 5824 return false; 5825 } 5826 5827 Fmt = Val; 5828 return true; 5829 } 5830 5831 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5832 // values to live in a joint format operand in the MCInst encoding. 5833 OperandMatchResultTy 5834 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5835 using namespace llvm::AMDGPU::MTBUFFormat; 5836 5837 int64_t Dfmt = DFMT_UNDEF; 5838 int64_t Nfmt = NFMT_UNDEF; 5839 5840 // dfmt and nfmt can appear in either order, and each is optional. 5841 for (int I = 0; I < 2; ++I) { 5842 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5843 return MatchOperand_ParseFail; 5844 5845 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5846 return MatchOperand_ParseFail; 5847 } 5848 // Skip optional comma between dfmt/nfmt 5849 // but guard against 2 commas following each other. 5850 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5851 !peekToken().is(AsmToken::Comma)) { 5852 trySkipToken(AsmToken::Comma); 5853 } 5854 } 5855 5856 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5857 return MatchOperand_NoMatch; 5858 5859 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5860 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5861 5862 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5863 return MatchOperand_Success; 5864 } 5865 5866 OperandMatchResultTy 5867 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5868 using namespace llvm::AMDGPU::MTBUFFormat; 5869 5870 int64_t Fmt = UFMT_UNDEF; 5871 5872 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5873 return MatchOperand_ParseFail; 5874 5875 if (Fmt == UFMT_UNDEF) 5876 return MatchOperand_NoMatch; 5877 5878 Format = Fmt; 5879 return MatchOperand_Success; 5880 } 5881 5882 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5883 int64_t &Nfmt, 5884 StringRef FormatStr, 5885 SMLoc Loc) { 5886 using namespace llvm::AMDGPU::MTBUFFormat; 5887 int64_t Format; 5888 5889 Format = getDfmt(FormatStr); 5890 if (Format != DFMT_UNDEF) { 5891 Dfmt = Format; 5892 return true; 5893 } 5894 5895 Format = getNfmt(FormatStr, getSTI()); 5896 if (Format != NFMT_UNDEF) { 5897 Nfmt = Format; 5898 return true; 5899 } 5900 5901 Error(Loc, "unsupported format"); 5902 return false; 5903 } 5904 5905 OperandMatchResultTy 5906 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5907 SMLoc FormatLoc, 5908 int64_t &Format) { 5909 using namespace llvm::AMDGPU::MTBUFFormat; 5910 5911 int64_t Dfmt = DFMT_UNDEF; 5912 int64_t Nfmt = NFMT_UNDEF; 5913 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5914 return MatchOperand_ParseFail; 5915 5916 if (trySkipToken(AsmToken::Comma)) { 5917 StringRef Str; 5918 SMLoc Loc = getLoc(); 5919 if (!parseId(Str, "expected a format string") || 5920 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5921 return MatchOperand_ParseFail; 5922 } 5923 if (Dfmt == DFMT_UNDEF) { 5924 Error(Loc, "duplicate numeric format"); 5925 return MatchOperand_ParseFail; 5926 } else if (Nfmt == NFMT_UNDEF) { 5927 Error(Loc, "duplicate data format"); 5928 return MatchOperand_ParseFail; 5929 } 5930 } 5931 5932 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5933 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5934 5935 if (isGFX10Plus()) { 5936 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5937 if (Ufmt == UFMT_UNDEF) { 5938 Error(FormatLoc, "unsupported format"); 5939 return MatchOperand_ParseFail; 5940 } 5941 Format = Ufmt; 5942 } else { 5943 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5944 } 5945 5946 return MatchOperand_Success; 5947 } 5948 5949 OperandMatchResultTy 5950 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5951 SMLoc Loc, 5952 int64_t &Format) { 5953 using namespace llvm::AMDGPU::MTBUFFormat; 5954 5955 auto Id = getUnifiedFormat(FormatStr); 5956 if (Id == UFMT_UNDEF) 5957 return MatchOperand_NoMatch; 5958 5959 if (!isGFX10Plus()) { 5960 Error(Loc, "unified format is not supported on this GPU"); 5961 return MatchOperand_ParseFail; 5962 } 5963 5964 Format = Id; 5965 return MatchOperand_Success; 5966 } 5967 5968 OperandMatchResultTy 5969 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5970 using namespace llvm::AMDGPU::MTBUFFormat; 5971 SMLoc Loc = getLoc(); 5972 5973 if (!parseExpr(Format)) 5974 return MatchOperand_ParseFail; 5975 if (!isValidFormatEncoding(Format, getSTI())) { 5976 Error(Loc, "out of range format"); 5977 return MatchOperand_ParseFail; 5978 } 5979 5980 return MatchOperand_Success; 5981 } 5982 5983 OperandMatchResultTy 5984 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5985 using namespace llvm::AMDGPU::MTBUFFormat; 5986 5987 if (!trySkipId("format", AsmToken::Colon)) 5988 return MatchOperand_NoMatch; 5989 5990 if (trySkipToken(AsmToken::LBrac)) { 5991 StringRef FormatStr; 5992 SMLoc Loc = getLoc(); 5993 if (!parseId(FormatStr, "expected a format string")) 5994 return MatchOperand_ParseFail; 5995 5996 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5997 if (Res == MatchOperand_NoMatch) 5998 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5999 if (Res != MatchOperand_Success) 6000 return Res; 6001 6002 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6003 return MatchOperand_ParseFail; 6004 6005 return MatchOperand_Success; 6006 } 6007 6008 return parseNumericFormat(Format); 6009 } 6010 6011 OperandMatchResultTy 6012 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6013 using namespace llvm::AMDGPU::MTBUFFormat; 6014 6015 int64_t Format = getDefaultFormatEncoding(getSTI()); 6016 OperandMatchResultTy Res; 6017 SMLoc Loc = getLoc(); 6018 6019 // Parse legacy format syntax. 6020 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6021 if (Res == MatchOperand_ParseFail) 6022 return Res; 6023 6024 bool FormatFound = (Res == MatchOperand_Success); 6025 6026 Operands.push_back( 6027 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6028 6029 if (FormatFound) 6030 trySkipToken(AsmToken::Comma); 6031 6032 if (isToken(AsmToken::EndOfStatement)) { 6033 // We are expecting an soffset operand, 6034 // but let matcher handle the error. 6035 return MatchOperand_Success; 6036 } 6037 6038 // Parse soffset. 6039 Res = parseRegOrImm(Operands); 6040 if (Res != MatchOperand_Success) 6041 return Res; 6042 6043 trySkipToken(AsmToken::Comma); 6044 6045 if (!FormatFound) { 6046 Res = parseSymbolicOrNumericFormat(Format); 6047 if (Res == MatchOperand_ParseFail) 6048 return Res; 6049 if (Res == MatchOperand_Success) { 6050 auto Size = Operands.size(); 6051 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6052 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6053 Op.setImm(Format); 6054 } 6055 return MatchOperand_Success; 6056 } 6057 6058 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6059 Error(getLoc(), "duplicate format"); 6060 return MatchOperand_ParseFail; 6061 } 6062 return MatchOperand_Success; 6063 } 6064 6065 //===----------------------------------------------------------------------===// 6066 // ds 6067 //===----------------------------------------------------------------------===// 6068 6069 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6070 const OperandVector &Operands) { 6071 OptionalImmIndexMap OptionalIdx; 6072 6073 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6074 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6075 6076 // Add the register arguments 6077 if (Op.isReg()) { 6078 Op.addRegOperands(Inst, 1); 6079 continue; 6080 } 6081 6082 // Handle optional arguments 6083 OptionalIdx[Op.getImmTy()] = i; 6084 } 6085 6086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6089 6090 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6091 } 6092 6093 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6094 bool IsGdsHardcoded) { 6095 OptionalImmIndexMap OptionalIdx; 6096 6097 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6098 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6099 6100 // Add the register arguments 6101 if (Op.isReg()) { 6102 Op.addRegOperands(Inst, 1); 6103 continue; 6104 } 6105 6106 if (Op.isToken() && Op.getToken() == "gds") { 6107 IsGdsHardcoded = true; 6108 continue; 6109 } 6110 6111 // Handle optional arguments 6112 OptionalIdx[Op.getImmTy()] = i; 6113 } 6114 6115 AMDGPUOperand::ImmTy OffsetType = 6116 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6117 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6118 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6119 AMDGPUOperand::ImmTyOffset; 6120 6121 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6122 6123 if (!IsGdsHardcoded) { 6124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6125 } 6126 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6127 } 6128 6129 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6130 OptionalImmIndexMap OptionalIdx; 6131 6132 unsigned OperandIdx[4]; 6133 unsigned EnMask = 0; 6134 int SrcIdx = 0; 6135 6136 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6137 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6138 6139 // Add the register arguments 6140 if (Op.isReg()) { 6141 assert(SrcIdx < 4); 6142 OperandIdx[SrcIdx] = Inst.size(); 6143 Op.addRegOperands(Inst, 1); 6144 ++SrcIdx; 6145 continue; 6146 } 6147 6148 if (Op.isOff()) { 6149 assert(SrcIdx < 4); 6150 OperandIdx[SrcIdx] = Inst.size(); 6151 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6152 ++SrcIdx; 6153 continue; 6154 } 6155 6156 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6157 Op.addImmOperands(Inst, 1); 6158 continue; 6159 } 6160 6161 if (Op.isToken() && Op.getToken() == "done") 6162 continue; 6163 6164 // Handle optional arguments 6165 OptionalIdx[Op.getImmTy()] = i; 6166 } 6167 6168 assert(SrcIdx == 4); 6169 6170 bool Compr = false; 6171 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6172 Compr = true; 6173 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6174 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6175 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6176 } 6177 6178 for (auto i = 0; i < SrcIdx; ++i) { 6179 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6180 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6181 } 6182 } 6183 6184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6186 6187 Inst.addOperand(MCOperand::createImm(EnMask)); 6188 } 6189 6190 //===----------------------------------------------------------------------===// 6191 // s_waitcnt 6192 //===----------------------------------------------------------------------===// 6193 6194 static bool 6195 encodeCnt( 6196 const AMDGPU::IsaVersion ISA, 6197 int64_t &IntVal, 6198 int64_t CntVal, 6199 bool Saturate, 6200 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6201 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6202 { 6203 bool Failed = false; 6204 6205 IntVal = encode(ISA, IntVal, CntVal); 6206 if (CntVal != decode(ISA, IntVal)) { 6207 if (Saturate) { 6208 IntVal = encode(ISA, IntVal, -1); 6209 } else { 6210 Failed = true; 6211 } 6212 } 6213 return Failed; 6214 } 6215 6216 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6217 6218 SMLoc CntLoc = getLoc(); 6219 StringRef CntName = getTokenStr(); 6220 6221 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6222 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6223 return false; 6224 6225 int64_t CntVal; 6226 SMLoc ValLoc = getLoc(); 6227 if (!parseExpr(CntVal)) 6228 return false; 6229 6230 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6231 6232 bool Failed = true; 6233 bool Sat = CntName.endswith("_sat"); 6234 6235 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6236 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6237 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6238 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6239 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6240 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6241 } else { 6242 Error(CntLoc, "invalid counter name " + CntName); 6243 return false; 6244 } 6245 6246 if (Failed) { 6247 Error(ValLoc, "too large value for " + CntName); 6248 return false; 6249 } 6250 6251 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6252 return false; 6253 6254 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6255 if (isToken(AsmToken::EndOfStatement)) { 6256 Error(getLoc(), "expected a counter name"); 6257 return false; 6258 } 6259 } 6260 6261 return true; 6262 } 6263 6264 OperandMatchResultTy 6265 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6266 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6267 int64_t Waitcnt = getWaitcntBitMask(ISA); 6268 SMLoc S = getLoc(); 6269 6270 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6271 while (!isToken(AsmToken::EndOfStatement)) { 6272 if (!parseCnt(Waitcnt)) 6273 return MatchOperand_ParseFail; 6274 } 6275 } else { 6276 if (!parseExpr(Waitcnt)) 6277 return MatchOperand_ParseFail; 6278 } 6279 6280 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6281 return MatchOperand_Success; 6282 } 6283 6284 bool 6285 AMDGPUOperand::isSWaitCnt() const { 6286 return isImm(); 6287 } 6288 6289 //===----------------------------------------------------------------------===// 6290 // hwreg 6291 //===----------------------------------------------------------------------===// 6292 6293 bool 6294 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6295 OperandInfoTy &Offset, 6296 OperandInfoTy &Width) { 6297 using namespace llvm::AMDGPU::Hwreg; 6298 6299 // The register may be specified by name or using a numeric code 6300 HwReg.Loc = getLoc(); 6301 if (isToken(AsmToken::Identifier) && 6302 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6303 HwReg.IsSymbolic = true; 6304 lex(); // skip register name 6305 } else if (!parseExpr(HwReg.Id, "a register name")) { 6306 return false; 6307 } 6308 6309 if (trySkipToken(AsmToken::RParen)) 6310 return true; 6311 6312 // parse optional params 6313 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6314 return false; 6315 6316 Offset.Loc = getLoc(); 6317 if (!parseExpr(Offset.Id)) 6318 return false; 6319 6320 if (!skipToken(AsmToken::Comma, "expected a comma")) 6321 return false; 6322 6323 Width.Loc = getLoc(); 6324 return parseExpr(Width.Id) && 6325 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6326 } 6327 6328 bool 6329 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6330 const OperandInfoTy &Offset, 6331 const OperandInfoTy &Width) { 6332 6333 using namespace llvm::AMDGPU::Hwreg; 6334 6335 if (HwReg.IsSymbolic) { 6336 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6337 Error(HwReg.Loc, 6338 "specified hardware register is not supported on this GPU"); 6339 return false; 6340 } 6341 } else { 6342 if (!isValidHwreg(HwReg.Id)) { 6343 Error(HwReg.Loc, 6344 "invalid code of hardware register: only 6-bit values are legal"); 6345 return false; 6346 } 6347 } 6348 if (!isValidHwregOffset(Offset.Id)) { 6349 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6350 return false; 6351 } 6352 if (!isValidHwregWidth(Width.Id)) { 6353 Error(Width.Loc, 6354 "invalid bitfield width: only values from 1 to 32 are legal"); 6355 return false; 6356 } 6357 return true; 6358 } 6359 6360 OperandMatchResultTy 6361 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6362 using namespace llvm::AMDGPU::Hwreg; 6363 6364 int64_t ImmVal = 0; 6365 SMLoc Loc = getLoc(); 6366 6367 if (trySkipId("hwreg", AsmToken::LParen)) { 6368 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6369 OperandInfoTy Offset(OFFSET_DEFAULT_); 6370 OperandInfoTy Width(WIDTH_DEFAULT_); 6371 if (parseHwregBody(HwReg, Offset, Width) && 6372 validateHwreg(HwReg, Offset, Width)) { 6373 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6374 } else { 6375 return MatchOperand_ParseFail; 6376 } 6377 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6378 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6379 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6380 return MatchOperand_ParseFail; 6381 } 6382 } else { 6383 return MatchOperand_ParseFail; 6384 } 6385 6386 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6387 return MatchOperand_Success; 6388 } 6389 6390 bool AMDGPUOperand::isHwreg() const { 6391 return isImmTy(ImmTyHwreg); 6392 } 6393 6394 //===----------------------------------------------------------------------===// 6395 // sendmsg 6396 //===----------------------------------------------------------------------===// 6397 6398 bool 6399 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6400 OperandInfoTy &Op, 6401 OperandInfoTy &Stream) { 6402 using namespace llvm::AMDGPU::SendMsg; 6403 6404 Msg.Loc = getLoc(); 6405 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6406 Msg.IsSymbolic = true; 6407 lex(); // skip message name 6408 } else if (!parseExpr(Msg.Id, "a message name")) { 6409 return false; 6410 } 6411 6412 if (trySkipToken(AsmToken::Comma)) { 6413 Op.IsDefined = true; 6414 Op.Loc = getLoc(); 6415 if (isToken(AsmToken::Identifier) && 6416 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6417 lex(); // skip operation name 6418 } else if (!parseExpr(Op.Id, "an operation name")) { 6419 return false; 6420 } 6421 6422 if (trySkipToken(AsmToken::Comma)) { 6423 Stream.IsDefined = true; 6424 Stream.Loc = getLoc(); 6425 if (!parseExpr(Stream.Id)) 6426 return false; 6427 } 6428 } 6429 6430 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6431 } 6432 6433 bool 6434 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6435 const OperandInfoTy &Op, 6436 const OperandInfoTy &Stream) { 6437 using namespace llvm::AMDGPU::SendMsg; 6438 6439 // Validation strictness depends on whether message is specified 6440 // in a symbolic or in a numeric form. In the latter case 6441 // only encoding possibility is checked. 6442 bool Strict = Msg.IsSymbolic; 6443 6444 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6445 Error(Msg.Loc, "invalid message id"); 6446 return false; 6447 } 6448 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6449 if (Op.IsDefined) { 6450 Error(Op.Loc, "message does not support operations"); 6451 } else { 6452 Error(Msg.Loc, "missing message operation"); 6453 } 6454 return false; 6455 } 6456 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6457 Error(Op.Loc, "invalid operation id"); 6458 return false; 6459 } 6460 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6461 Error(Stream.Loc, "message operation does not support streams"); 6462 return false; 6463 } 6464 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6465 Error(Stream.Loc, "invalid message stream id"); 6466 return false; 6467 } 6468 return true; 6469 } 6470 6471 OperandMatchResultTy 6472 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6473 using namespace llvm::AMDGPU::SendMsg; 6474 6475 int64_t ImmVal = 0; 6476 SMLoc Loc = getLoc(); 6477 6478 if (trySkipId("sendmsg", AsmToken::LParen)) { 6479 OperandInfoTy Msg(ID_UNKNOWN_); 6480 OperandInfoTy Op(OP_NONE_); 6481 OperandInfoTy Stream(STREAM_ID_NONE_); 6482 if (parseSendMsgBody(Msg, Op, Stream) && 6483 validateSendMsg(Msg, Op, Stream)) { 6484 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6485 } else { 6486 return MatchOperand_ParseFail; 6487 } 6488 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6489 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6490 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6491 return MatchOperand_ParseFail; 6492 } 6493 } else { 6494 return MatchOperand_ParseFail; 6495 } 6496 6497 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6498 return MatchOperand_Success; 6499 } 6500 6501 bool AMDGPUOperand::isSendMsg() const { 6502 return isImmTy(ImmTySendMsg); 6503 } 6504 6505 //===----------------------------------------------------------------------===// 6506 // v_interp 6507 //===----------------------------------------------------------------------===// 6508 6509 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6510 StringRef Str; 6511 SMLoc S = getLoc(); 6512 6513 if (!parseId(Str)) 6514 return MatchOperand_NoMatch; 6515 6516 int Slot = StringSwitch<int>(Str) 6517 .Case("p10", 0) 6518 .Case("p20", 1) 6519 .Case("p0", 2) 6520 .Default(-1); 6521 6522 if (Slot == -1) { 6523 Error(S, "invalid interpolation slot"); 6524 return MatchOperand_ParseFail; 6525 } 6526 6527 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6528 AMDGPUOperand::ImmTyInterpSlot)); 6529 return MatchOperand_Success; 6530 } 6531 6532 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6533 StringRef Str; 6534 SMLoc S = getLoc(); 6535 6536 if (!parseId(Str)) 6537 return MatchOperand_NoMatch; 6538 6539 if (!Str.startswith("attr")) { 6540 Error(S, "invalid interpolation attribute"); 6541 return MatchOperand_ParseFail; 6542 } 6543 6544 StringRef Chan = Str.take_back(2); 6545 int AttrChan = StringSwitch<int>(Chan) 6546 .Case(".x", 0) 6547 .Case(".y", 1) 6548 .Case(".z", 2) 6549 .Case(".w", 3) 6550 .Default(-1); 6551 if (AttrChan == -1) { 6552 Error(S, "invalid or missing interpolation attribute channel"); 6553 return MatchOperand_ParseFail; 6554 } 6555 6556 Str = Str.drop_back(2).drop_front(4); 6557 6558 uint8_t Attr; 6559 if (Str.getAsInteger(10, Attr)) { 6560 Error(S, "invalid or missing interpolation attribute number"); 6561 return MatchOperand_ParseFail; 6562 } 6563 6564 if (Attr > 63) { 6565 Error(S, "out of bounds interpolation attribute number"); 6566 return MatchOperand_ParseFail; 6567 } 6568 6569 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6570 6571 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6572 AMDGPUOperand::ImmTyInterpAttr)); 6573 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6574 AMDGPUOperand::ImmTyAttrChan)); 6575 return MatchOperand_Success; 6576 } 6577 6578 //===----------------------------------------------------------------------===// 6579 // exp 6580 //===----------------------------------------------------------------------===// 6581 6582 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6583 using namespace llvm::AMDGPU::Exp; 6584 6585 StringRef Str; 6586 SMLoc S = getLoc(); 6587 6588 if (!parseId(Str)) 6589 return MatchOperand_NoMatch; 6590 6591 unsigned Id = getTgtId(Str); 6592 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6593 Error(S, (Id == ET_INVALID) ? 6594 "invalid exp target" : 6595 "exp target is not supported on this GPU"); 6596 return MatchOperand_ParseFail; 6597 } 6598 6599 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6600 AMDGPUOperand::ImmTyExpTgt)); 6601 return MatchOperand_Success; 6602 } 6603 6604 //===----------------------------------------------------------------------===// 6605 // parser helpers 6606 //===----------------------------------------------------------------------===// 6607 6608 bool 6609 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6610 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6611 } 6612 6613 bool 6614 AMDGPUAsmParser::isId(const StringRef Id) const { 6615 return isId(getToken(), Id); 6616 } 6617 6618 bool 6619 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6620 return getTokenKind() == Kind; 6621 } 6622 6623 bool 6624 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6625 if (isId(Id)) { 6626 lex(); 6627 return true; 6628 } 6629 return false; 6630 } 6631 6632 bool 6633 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6634 if (isToken(AsmToken::Identifier)) { 6635 StringRef Tok = getTokenStr(); 6636 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6637 lex(); 6638 return true; 6639 } 6640 } 6641 return false; 6642 } 6643 6644 bool 6645 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6646 if (isId(Id) && peekToken().is(Kind)) { 6647 lex(); 6648 lex(); 6649 return true; 6650 } 6651 return false; 6652 } 6653 6654 bool 6655 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6656 if (isToken(Kind)) { 6657 lex(); 6658 return true; 6659 } 6660 return false; 6661 } 6662 6663 bool 6664 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6665 const StringRef ErrMsg) { 6666 if (!trySkipToken(Kind)) { 6667 Error(getLoc(), ErrMsg); 6668 return false; 6669 } 6670 return true; 6671 } 6672 6673 bool 6674 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6675 SMLoc S = getLoc(); 6676 6677 const MCExpr *Expr; 6678 if (Parser.parseExpression(Expr)) 6679 return false; 6680 6681 if (Expr->evaluateAsAbsolute(Imm)) 6682 return true; 6683 6684 if (Expected.empty()) { 6685 Error(S, "expected absolute expression"); 6686 } else { 6687 Error(S, Twine("expected ", Expected) + 6688 Twine(" or an absolute expression")); 6689 } 6690 return false; 6691 } 6692 6693 bool 6694 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6695 SMLoc S = getLoc(); 6696 6697 const MCExpr *Expr; 6698 if (Parser.parseExpression(Expr)) 6699 return false; 6700 6701 int64_t IntVal; 6702 if (Expr->evaluateAsAbsolute(IntVal)) { 6703 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6704 } else { 6705 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6706 } 6707 return true; 6708 } 6709 6710 bool 6711 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6712 if (isToken(AsmToken::String)) { 6713 Val = getToken().getStringContents(); 6714 lex(); 6715 return true; 6716 } else { 6717 Error(getLoc(), ErrMsg); 6718 return false; 6719 } 6720 } 6721 6722 bool 6723 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6724 if (isToken(AsmToken::Identifier)) { 6725 Val = getTokenStr(); 6726 lex(); 6727 return true; 6728 } else { 6729 if (!ErrMsg.empty()) 6730 Error(getLoc(), ErrMsg); 6731 return false; 6732 } 6733 } 6734 6735 AsmToken 6736 AMDGPUAsmParser::getToken() const { 6737 return Parser.getTok(); 6738 } 6739 6740 AsmToken 6741 AMDGPUAsmParser::peekToken() { 6742 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6743 } 6744 6745 void 6746 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6747 auto TokCount = getLexer().peekTokens(Tokens); 6748 6749 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6750 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6751 } 6752 6753 AsmToken::TokenKind 6754 AMDGPUAsmParser::getTokenKind() const { 6755 return getLexer().getKind(); 6756 } 6757 6758 SMLoc 6759 AMDGPUAsmParser::getLoc() const { 6760 return getToken().getLoc(); 6761 } 6762 6763 StringRef 6764 AMDGPUAsmParser::getTokenStr() const { 6765 return getToken().getString(); 6766 } 6767 6768 void 6769 AMDGPUAsmParser::lex() { 6770 Parser.Lex(); 6771 } 6772 6773 SMLoc 6774 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6775 const OperandVector &Operands) const { 6776 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6777 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6778 if (Test(Op)) 6779 return Op.getStartLoc(); 6780 } 6781 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6782 } 6783 6784 SMLoc 6785 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6786 const OperandVector &Operands) const { 6787 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6788 return getOperandLoc(Test, Operands); 6789 } 6790 6791 SMLoc 6792 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6793 const OperandVector &Operands) const { 6794 auto Test = [=](const AMDGPUOperand& Op) { 6795 return Op.isRegKind() && Op.getReg() == Reg; 6796 }; 6797 return getOperandLoc(Test, Operands); 6798 } 6799 6800 SMLoc 6801 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6802 auto Test = [](const AMDGPUOperand& Op) { 6803 return Op.IsImmKindLiteral() || Op.isExpr(); 6804 }; 6805 return getOperandLoc(Test, Operands); 6806 } 6807 6808 SMLoc 6809 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6810 auto Test = [](const AMDGPUOperand& Op) { 6811 return Op.isImmKindConst(); 6812 }; 6813 return getOperandLoc(Test, Operands); 6814 } 6815 6816 //===----------------------------------------------------------------------===// 6817 // swizzle 6818 //===----------------------------------------------------------------------===// 6819 6820 LLVM_READNONE 6821 static unsigned 6822 encodeBitmaskPerm(const unsigned AndMask, 6823 const unsigned OrMask, 6824 const unsigned XorMask) { 6825 using namespace llvm::AMDGPU::Swizzle; 6826 6827 return BITMASK_PERM_ENC | 6828 (AndMask << BITMASK_AND_SHIFT) | 6829 (OrMask << BITMASK_OR_SHIFT) | 6830 (XorMask << BITMASK_XOR_SHIFT); 6831 } 6832 6833 bool 6834 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6835 const unsigned MinVal, 6836 const unsigned MaxVal, 6837 const StringRef ErrMsg, 6838 SMLoc &Loc) { 6839 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6840 return false; 6841 } 6842 Loc = getLoc(); 6843 if (!parseExpr(Op)) { 6844 return false; 6845 } 6846 if (Op < MinVal || Op > MaxVal) { 6847 Error(Loc, ErrMsg); 6848 return false; 6849 } 6850 6851 return true; 6852 } 6853 6854 bool 6855 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6856 const unsigned MinVal, 6857 const unsigned MaxVal, 6858 const StringRef ErrMsg) { 6859 SMLoc Loc; 6860 for (unsigned i = 0; i < OpNum; ++i) { 6861 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6862 return false; 6863 } 6864 6865 return true; 6866 } 6867 6868 bool 6869 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6870 using namespace llvm::AMDGPU::Swizzle; 6871 6872 int64_t Lane[LANE_NUM]; 6873 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6874 "expected a 2-bit lane id")) { 6875 Imm = QUAD_PERM_ENC; 6876 for (unsigned I = 0; I < LANE_NUM; ++I) { 6877 Imm |= Lane[I] << (LANE_SHIFT * I); 6878 } 6879 return true; 6880 } 6881 return false; 6882 } 6883 6884 bool 6885 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6886 using namespace llvm::AMDGPU::Swizzle; 6887 6888 SMLoc Loc; 6889 int64_t GroupSize; 6890 int64_t LaneIdx; 6891 6892 if (!parseSwizzleOperand(GroupSize, 6893 2, 32, 6894 "group size must be in the interval [2,32]", 6895 Loc)) { 6896 return false; 6897 } 6898 if (!isPowerOf2_64(GroupSize)) { 6899 Error(Loc, "group size must be a power of two"); 6900 return false; 6901 } 6902 if (parseSwizzleOperand(LaneIdx, 6903 0, GroupSize - 1, 6904 "lane id must be in the interval [0,group size - 1]", 6905 Loc)) { 6906 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6907 return true; 6908 } 6909 return false; 6910 } 6911 6912 bool 6913 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6914 using namespace llvm::AMDGPU::Swizzle; 6915 6916 SMLoc Loc; 6917 int64_t GroupSize; 6918 6919 if (!parseSwizzleOperand(GroupSize, 6920 2, 32, 6921 "group size must be in the interval [2,32]", 6922 Loc)) { 6923 return false; 6924 } 6925 if (!isPowerOf2_64(GroupSize)) { 6926 Error(Loc, "group size must be a power of two"); 6927 return false; 6928 } 6929 6930 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6931 return true; 6932 } 6933 6934 bool 6935 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6936 using namespace llvm::AMDGPU::Swizzle; 6937 6938 SMLoc Loc; 6939 int64_t GroupSize; 6940 6941 if (!parseSwizzleOperand(GroupSize, 6942 1, 16, 6943 "group size must be in the interval [1,16]", 6944 Loc)) { 6945 return false; 6946 } 6947 if (!isPowerOf2_64(GroupSize)) { 6948 Error(Loc, "group size must be a power of two"); 6949 return false; 6950 } 6951 6952 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6953 return true; 6954 } 6955 6956 bool 6957 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6958 using namespace llvm::AMDGPU::Swizzle; 6959 6960 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6961 return false; 6962 } 6963 6964 StringRef Ctl; 6965 SMLoc StrLoc = getLoc(); 6966 if (!parseString(Ctl)) { 6967 return false; 6968 } 6969 if (Ctl.size() != BITMASK_WIDTH) { 6970 Error(StrLoc, "expected a 5-character mask"); 6971 return false; 6972 } 6973 6974 unsigned AndMask = 0; 6975 unsigned OrMask = 0; 6976 unsigned XorMask = 0; 6977 6978 for (size_t i = 0; i < Ctl.size(); ++i) { 6979 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6980 switch(Ctl[i]) { 6981 default: 6982 Error(StrLoc, "invalid mask"); 6983 return false; 6984 case '0': 6985 break; 6986 case '1': 6987 OrMask |= Mask; 6988 break; 6989 case 'p': 6990 AndMask |= Mask; 6991 break; 6992 case 'i': 6993 AndMask |= Mask; 6994 XorMask |= Mask; 6995 break; 6996 } 6997 } 6998 6999 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7000 return true; 7001 } 7002 7003 bool 7004 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7005 7006 SMLoc OffsetLoc = getLoc(); 7007 7008 if (!parseExpr(Imm, "a swizzle macro")) { 7009 return false; 7010 } 7011 if (!isUInt<16>(Imm)) { 7012 Error(OffsetLoc, "expected a 16-bit offset"); 7013 return false; 7014 } 7015 return true; 7016 } 7017 7018 bool 7019 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7020 using namespace llvm::AMDGPU::Swizzle; 7021 7022 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7023 7024 SMLoc ModeLoc = getLoc(); 7025 bool Ok = false; 7026 7027 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7028 Ok = parseSwizzleQuadPerm(Imm); 7029 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7030 Ok = parseSwizzleBitmaskPerm(Imm); 7031 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7032 Ok = parseSwizzleBroadcast(Imm); 7033 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7034 Ok = parseSwizzleSwap(Imm); 7035 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7036 Ok = parseSwizzleReverse(Imm); 7037 } else { 7038 Error(ModeLoc, "expected a swizzle mode"); 7039 } 7040 7041 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7042 } 7043 7044 return false; 7045 } 7046 7047 OperandMatchResultTy 7048 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7049 SMLoc S = getLoc(); 7050 int64_t Imm = 0; 7051 7052 if (trySkipId("offset")) { 7053 7054 bool Ok = false; 7055 if (skipToken(AsmToken::Colon, "expected a colon")) { 7056 if (trySkipId("swizzle")) { 7057 Ok = parseSwizzleMacro(Imm); 7058 } else { 7059 Ok = parseSwizzleOffset(Imm); 7060 } 7061 } 7062 7063 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7064 7065 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7066 } else { 7067 // Swizzle "offset" operand is optional. 7068 // If it is omitted, try parsing other optional operands. 7069 return parseOptionalOpr(Operands); 7070 } 7071 } 7072 7073 bool 7074 AMDGPUOperand::isSwizzle() const { 7075 return isImmTy(ImmTySwizzle); 7076 } 7077 7078 //===----------------------------------------------------------------------===// 7079 // VGPR Index Mode 7080 //===----------------------------------------------------------------------===// 7081 7082 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7083 7084 using namespace llvm::AMDGPU::VGPRIndexMode; 7085 7086 if (trySkipToken(AsmToken::RParen)) { 7087 return OFF; 7088 } 7089 7090 int64_t Imm = 0; 7091 7092 while (true) { 7093 unsigned Mode = 0; 7094 SMLoc S = getLoc(); 7095 7096 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7097 if (trySkipId(IdSymbolic[ModeId])) { 7098 Mode = 1 << ModeId; 7099 break; 7100 } 7101 } 7102 7103 if (Mode == 0) { 7104 Error(S, (Imm == 0)? 7105 "expected a VGPR index mode or a closing parenthesis" : 7106 "expected a VGPR index mode"); 7107 return UNDEF; 7108 } 7109 7110 if (Imm & Mode) { 7111 Error(S, "duplicate VGPR index mode"); 7112 return UNDEF; 7113 } 7114 Imm |= Mode; 7115 7116 if (trySkipToken(AsmToken::RParen)) 7117 break; 7118 if (!skipToken(AsmToken::Comma, 7119 "expected a comma or a closing parenthesis")) 7120 return UNDEF; 7121 } 7122 7123 return Imm; 7124 } 7125 7126 OperandMatchResultTy 7127 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7128 7129 using namespace llvm::AMDGPU::VGPRIndexMode; 7130 7131 int64_t Imm = 0; 7132 SMLoc S = getLoc(); 7133 7134 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7135 Imm = parseGPRIdxMacro(); 7136 if (Imm == UNDEF) 7137 return MatchOperand_ParseFail; 7138 } else { 7139 if (getParser().parseAbsoluteExpression(Imm)) 7140 return MatchOperand_ParseFail; 7141 if (Imm < 0 || !isUInt<4>(Imm)) { 7142 Error(S, "invalid immediate: only 4-bit values are legal"); 7143 return MatchOperand_ParseFail; 7144 } 7145 } 7146 7147 Operands.push_back( 7148 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7149 return MatchOperand_Success; 7150 } 7151 7152 bool AMDGPUOperand::isGPRIdxMode() const { 7153 return isImmTy(ImmTyGprIdxMode); 7154 } 7155 7156 //===----------------------------------------------------------------------===// 7157 // sopp branch targets 7158 //===----------------------------------------------------------------------===// 7159 7160 OperandMatchResultTy 7161 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7162 7163 // Make sure we are not parsing something 7164 // that looks like a label or an expression but is not. 7165 // This will improve error messages. 7166 if (isRegister() || isModifier()) 7167 return MatchOperand_NoMatch; 7168 7169 if (!parseExpr(Operands)) 7170 return MatchOperand_ParseFail; 7171 7172 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7173 assert(Opr.isImm() || Opr.isExpr()); 7174 SMLoc Loc = Opr.getStartLoc(); 7175 7176 // Currently we do not support arbitrary expressions as branch targets. 7177 // Only labels and absolute expressions are accepted. 7178 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7179 Error(Loc, "expected an absolute expression or a label"); 7180 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7181 Error(Loc, "expected a 16-bit signed jump offset"); 7182 } 7183 7184 return MatchOperand_Success; 7185 } 7186 7187 //===----------------------------------------------------------------------===// 7188 // Boolean holding registers 7189 //===----------------------------------------------------------------------===// 7190 7191 OperandMatchResultTy 7192 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7193 return parseReg(Operands); 7194 } 7195 7196 //===----------------------------------------------------------------------===// 7197 // mubuf 7198 //===----------------------------------------------------------------------===// 7199 7200 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7201 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7202 } 7203 7204 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7205 const OperandVector &Operands, 7206 bool IsAtomic, 7207 bool IsLds) { 7208 bool IsLdsOpcode = IsLds; 7209 bool HasLdsModifier = false; 7210 OptionalImmIndexMap OptionalIdx; 7211 unsigned FirstOperandIdx = 1; 7212 bool IsAtomicReturn = false; 7213 7214 if (IsAtomic) { 7215 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7216 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7217 if (!Op.isCPol()) 7218 continue; 7219 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7220 break; 7221 } 7222 7223 if (!IsAtomicReturn) { 7224 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7225 if (NewOpc != -1) 7226 Inst.setOpcode(NewOpc); 7227 } 7228 7229 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7230 SIInstrFlags::IsAtomicRet; 7231 } 7232 7233 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7234 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7235 7236 // Add the register arguments 7237 if (Op.isReg()) { 7238 Op.addRegOperands(Inst, 1); 7239 // Insert a tied src for atomic return dst. 7240 // This cannot be postponed as subsequent calls to 7241 // addImmOperands rely on correct number of MC operands. 7242 if (IsAtomicReturn && i == FirstOperandIdx) 7243 Op.addRegOperands(Inst, 1); 7244 continue; 7245 } 7246 7247 // Handle the case where soffset is an immediate 7248 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7249 Op.addImmOperands(Inst, 1); 7250 continue; 7251 } 7252 7253 HasLdsModifier |= Op.isLDS(); 7254 7255 // Handle tokens like 'offen' which are sometimes hard-coded into the 7256 // asm string. There are no MCInst operands for these. 7257 if (Op.isToken()) { 7258 continue; 7259 } 7260 assert(Op.isImm()); 7261 7262 // Handle optional arguments 7263 OptionalIdx[Op.getImmTy()] = i; 7264 } 7265 7266 // This is a workaround for an llvm quirk which may result in an 7267 // incorrect instruction selection. Lds and non-lds versions of 7268 // MUBUF instructions are identical except that lds versions 7269 // have mandatory 'lds' modifier. However this modifier follows 7270 // optional modifiers and llvm asm matcher regards this 'lds' 7271 // modifier as an optional one. As a result, an lds version 7272 // of opcode may be selected even if it has no 'lds' modifier. 7273 if (IsLdsOpcode && !HasLdsModifier) { 7274 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7275 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7276 Inst.setOpcode(NoLdsOpcode); 7277 IsLdsOpcode = false; 7278 } 7279 } 7280 7281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7283 7284 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7286 } 7287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7288 } 7289 7290 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7291 OptionalImmIndexMap OptionalIdx; 7292 7293 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7294 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7295 7296 // Add the register arguments 7297 if (Op.isReg()) { 7298 Op.addRegOperands(Inst, 1); 7299 continue; 7300 } 7301 7302 // Handle the case where soffset is an immediate 7303 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7304 Op.addImmOperands(Inst, 1); 7305 continue; 7306 } 7307 7308 // Handle tokens like 'offen' which are sometimes hard-coded into the 7309 // asm string. There are no MCInst operands for these. 7310 if (Op.isToken()) { 7311 continue; 7312 } 7313 assert(Op.isImm()); 7314 7315 // Handle optional arguments 7316 OptionalIdx[Op.getImmTy()] = i; 7317 } 7318 7319 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7320 AMDGPUOperand::ImmTyOffset); 7321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7325 } 7326 7327 //===----------------------------------------------------------------------===// 7328 // mimg 7329 //===----------------------------------------------------------------------===// 7330 7331 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7332 bool IsAtomic) { 7333 unsigned I = 1; 7334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7335 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7336 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7337 } 7338 7339 if (IsAtomic) { 7340 // Add src, same as dst 7341 assert(Desc.getNumDefs() == 1); 7342 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7343 } 7344 7345 OptionalImmIndexMap OptionalIdx; 7346 7347 for (unsigned E = Operands.size(); I != E; ++I) { 7348 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7349 7350 // Add the register arguments 7351 if (Op.isReg()) { 7352 Op.addRegOperands(Inst, 1); 7353 } else if (Op.isImmModifier()) { 7354 OptionalIdx[Op.getImmTy()] = I; 7355 } else if (!Op.isToken()) { 7356 llvm_unreachable("unexpected operand type"); 7357 } 7358 } 7359 7360 bool IsGFX10Plus = isGFX10Plus(); 7361 7362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7363 if (IsGFX10Plus) 7364 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7368 if (IsGFX10Plus) 7369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7370 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7371 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7372 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7373 if (!IsGFX10Plus) 7374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7376 } 7377 7378 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7379 cvtMIMG(Inst, Operands, true); 7380 } 7381 7382 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7383 OptionalImmIndexMap OptionalIdx; 7384 bool IsAtomicReturn = false; 7385 7386 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7387 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7388 if (!Op.isCPol()) 7389 continue; 7390 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7391 break; 7392 } 7393 7394 if (!IsAtomicReturn) { 7395 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7396 if (NewOpc != -1) 7397 Inst.setOpcode(NewOpc); 7398 } 7399 7400 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7401 SIInstrFlags::IsAtomicRet; 7402 7403 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7404 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7405 7406 // Add the register arguments 7407 if (Op.isReg()) { 7408 Op.addRegOperands(Inst, 1); 7409 if (IsAtomicReturn && i == 1) 7410 Op.addRegOperands(Inst, 1); 7411 continue; 7412 } 7413 7414 // Handle the case where soffset is an immediate 7415 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7416 Op.addImmOperands(Inst, 1); 7417 continue; 7418 } 7419 7420 // Handle tokens like 'offen' which are sometimes hard-coded into the 7421 // asm string. There are no MCInst operands for these. 7422 if (Op.isToken()) { 7423 continue; 7424 } 7425 assert(Op.isImm()); 7426 7427 // Handle optional arguments 7428 OptionalIdx[Op.getImmTy()] = i; 7429 } 7430 7431 if ((int)Inst.getNumOperands() <= 7432 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7435 } 7436 7437 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7438 const OperandVector &Operands) { 7439 for (unsigned I = 1; I < Operands.size(); ++I) { 7440 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7441 if (Operand.isReg()) 7442 Operand.addRegOperands(Inst, 1); 7443 } 7444 7445 Inst.addOperand(MCOperand::createImm(1)); // a16 7446 } 7447 7448 //===----------------------------------------------------------------------===// 7449 // smrd 7450 //===----------------------------------------------------------------------===// 7451 7452 bool AMDGPUOperand::isSMRDOffset8() const { 7453 return isImm() && isUInt<8>(getImm()); 7454 } 7455 7456 bool AMDGPUOperand::isSMEMOffset() const { 7457 return isImm(); // Offset range is checked later by validator. 7458 } 7459 7460 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7461 // 32-bit literals are only supported on CI and we only want to use them 7462 // when the offset is > 8-bits. 7463 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7464 } 7465 7466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7467 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7468 } 7469 7470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7471 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7472 } 7473 7474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7475 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7476 } 7477 7478 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7479 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7480 } 7481 7482 //===----------------------------------------------------------------------===// 7483 // vop3 7484 //===----------------------------------------------------------------------===// 7485 7486 static bool ConvertOmodMul(int64_t &Mul) { 7487 if (Mul != 1 && Mul != 2 && Mul != 4) 7488 return false; 7489 7490 Mul >>= 1; 7491 return true; 7492 } 7493 7494 static bool ConvertOmodDiv(int64_t &Div) { 7495 if (Div == 1) { 7496 Div = 0; 7497 return true; 7498 } 7499 7500 if (Div == 2) { 7501 Div = 3; 7502 return true; 7503 } 7504 7505 return false; 7506 } 7507 7508 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7509 // This is intentional and ensures compatibility with sp3. 7510 // See bug 35397 for details. 7511 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7512 if (BoundCtrl == 0 || BoundCtrl == 1) { 7513 BoundCtrl = 1; 7514 return true; 7515 } 7516 return false; 7517 } 7518 7519 // Note: the order in this table matches the order of operands in AsmString. 7520 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7521 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7522 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7523 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7524 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7525 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7526 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7527 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7528 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7529 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7530 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7531 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7532 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7533 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7534 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7535 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7536 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7537 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7538 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7539 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7540 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7541 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7542 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7543 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7544 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7545 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7546 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7547 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7548 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7549 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7550 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7551 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7552 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7553 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7554 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7555 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7556 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7557 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7558 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7559 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7560 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7561 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7562 }; 7563 7564 void AMDGPUAsmParser::onBeginOfFile() { 7565 if (!getParser().getStreamer().getTargetStreamer() || 7566 getSTI().getTargetTriple().getArch() == Triple::r600) 7567 return; 7568 7569 if (!getTargetStreamer().getTargetID()) 7570 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7571 7572 if (isHsaAbiVersion3AndAbove(&getSTI())) 7573 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7574 } 7575 7576 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7577 7578 OperandMatchResultTy res = parseOptionalOpr(Operands); 7579 7580 // This is a hack to enable hardcoded mandatory operands which follow 7581 // optional operands. 7582 // 7583 // Current design assumes that all operands after the first optional operand 7584 // are also optional. However implementation of some instructions violates 7585 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7586 // 7587 // To alleviate this problem, we have to (implicitly) parse extra operands 7588 // to make sure autogenerated parser of custom operands never hit hardcoded 7589 // mandatory operands. 7590 7591 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7592 if (res != MatchOperand_Success || 7593 isToken(AsmToken::EndOfStatement)) 7594 break; 7595 7596 trySkipToken(AsmToken::Comma); 7597 res = parseOptionalOpr(Operands); 7598 } 7599 7600 return res; 7601 } 7602 7603 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7604 OperandMatchResultTy res; 7605 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7606 // try to parse any optional operand here 7607 if (Op.IsBit) { 7608 res = parseNamedBit(Op.Name, Operands, Op.Type); 7609 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7610 res = parseOModOperand(Operands); 7611 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7612 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7613 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7614 res = parseSDWASel(Operands, Op.Name, Op.Type); 7615 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7616 res = parseSDWADstUnused(Operands); 7617 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7618 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7619 Op.Type == AMDGPUOperand::ImmTyNegLo || 7620 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7621 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7622 Op.ConvertResult); 7623 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7624 res = parseDim(Operands); 7625 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7626 res = parseCPol(Operands); 7627 } else { 7628 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7629 } 7630 if (res != MatchOperand_NoMatch) { 7631 return res; 7632 } 7633 } 7634 return MatchOperand_NoMatch; 7635 } 7636 7637 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7638 StringRef Name = getTokenStr(); 7639 if (Name == "mul") { 7640 return parseIntWithPrefix("mul", Operands, 7641 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7642 } 7643 7644 if (Name == "div") { 7645 return parseIntWithPrefix("div", Operands, 7646 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7647 } 7648 7649 return MatchOperand_NoMatch; 7650 } 7651 7652 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7653 cvtVOP3P(Inst, Operands); 7654 7655 int Opc = Inst.getOpcode(); 7656 7657 int SrcNum; 7658 const int Ops[] = { AMDGPU::OpName::src0, 7659 AMDGPU::OpName::src1, 7660 AMDGPU::OpName::src2 }; 7661 for (SrcNum = 0; 7662 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7663 ++SrcNum); 7664 assert(SrcNum > 0); 7665 7666 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7667 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7668 7669 if ((OpSel & (1 << SrcNum)) != 0) { 7670 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7671 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7672 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7673 } 7674 } 7675 7676 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7677 // 1. This operand is input modifiers 7678 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7679 // 2. This is not last operand 7680 && Desc.NumOperands > (OpNum + 1) 7681 // 3. Next operand is register class 7682 && Desc.OpInfo[OpNum + 1].RegClass != -1 7683 // 4. Next register is not tied to any other operand 7684 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7685 } 7686 7687 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7688 { 7689 OptionalImmIndexMap OptionalIdx; 7690 unsigned Opc = Inst.getOpcode(); 7691 7692 unsigned I = 1; 7693 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7694 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7695 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7696 } 7697 7698 for (unsigned E = Operands.size(); I != E; ++I) { 7699 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7700 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7701 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7702 } else if (Op.isInterpSlot() || 7703 Op.isInterpAttr() || 7704 Op.isAttrChan()) { 7705 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7706 } else if (Op.isImmModifier()) { 7707 OptionalIdx[Op.getImmTy()] = I; 7708 } else { 7709 llvm_unreachable("unhandled operand type"); 7710 } 7711 } 7712 7713 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7715 } 7716 7717 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7719 } 7720 7721 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7723 } 7724 } 7725 7726 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7727 OptionalImmIndexMap &OptionalIdx) { 7728 unsigned Opc = Inst.getOpcode(); 7729 7730 unsigned I = 1; 7731 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7732 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7733 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7734 } 7735 7736 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7737 // This instruction has src modifiers 7738 for (unsigned E = Operands.size(); I != E; ++I) { 7739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7740 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7741 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7742 } else if (Op.isImmModifier()) { 7743 OptionalIdx[Op.getImmTy()] = I; 7744 } else if (Op.isRegOrImm()) { 7745 Op.addRegOrImmOperands(Inst, 1); 7746 } else { 7747 llvm_unreachable("unhandled operand type"); 7748 } 7749 } 7750 } else { 7751 // No src modifiers 7752 for (unsigned E = Operands.size(); I != E; ++I) { 7753 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7754 if (Op.isMod()) { 7755 OptionalIdx[Op.getImmTy()] = I; 7756 } else { 7757 Op.addRegOrImmOperands(Inst, 1); 7758 } 7759 } 7760 } 7761 7762 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7764 } 7765 7766 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7768 } 7769 7770 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7771 // it has src2 register operand that is tied to dst operand 7772 // we don't allow modifiers for this operand in assembler so src2_modifiers 7773 // should be 0. 7774 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7775 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7776 Opc == AMDGPU::V_MAC_F32_e64_vi || 7777 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7778 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7779 Opc == AMDGPU::V_MAC_F16_e64_vi || 7780 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7781 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7782 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7783 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7784 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7785 auto it = Inst.begin(); 7786 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7787 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7788 ++it; 7789 // Copy the operand to ensure it's not invalidated when Inst grows. 7790 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7791 } 7792 } 7793 7794 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7795 OptionalImmIndexMap OptionalIdx; 7796 cvtVOP3(Inst, Operands, OptionalIdx); 7797 } 7798 7799 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7800 OptionalImmIndexMap &OptIdx) { 7801 const int Opc = Inst.getOpcode(); 7802 const MCInstrDesc &Desc = MII.get(Opc); 7803 7804 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7805 7806 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7807 assert(!IsPacked); 7808 Inst.addOperand(Inst.getOperand(0)); 7809 } 7810 7811 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7812 // instruction, and then figure out where to actually put the modifiers 7813 7814 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7815 if (OpSelIdx != -1) { 7816 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7817 } 7818 7819 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7820 if (OpSelHiIdx != -1) { 7821 int DefaultVal = IsPacked ? -1 : 0; 7822 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7823 DefaultVal); 7824 } 7825 7826 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7827 if (NegLoIdx != -1) { 7828 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7829 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7830 } 7831 7832 const int Ops[] = { AMDGPU::OpName::src0, 7833 AMDGPU::OpName::src1, 7834 AMDGPU::OpName::src2 }; 7835 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7836 AMDGPU::OpName::src1_modifiers, 7837 AMDGPU::OpName::src2_modifiers }; 7838 7839 unsigned OpSel = 0; 7840 unsigned OpSelHi = 0; 7841 unsigned NegLo = 0; 7842 unsigned NegHi = 0; 7843 7844 if (OpSelIdx != -1) 7845 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7846 7847 if (OpSelHiIdx != -1) 7848 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7849 7850 if (NegLoIdx != -1) { 7851 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7852 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7853 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7854 } 7855 7856 for (int J = 0; J < 3; ++J) { 7857 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7858 if (OpIdx == -1) 7859 break; 7860 7861 uint32_t ModVal = 0; 7862 7863 if ((OpSel & (1 << J)) != 0) 7864 ModVal |= SISrcMods::OP_SEL_0; 7865 7866 if ((OpSelHi & (1 << J)) != 0) 7867 ModVal |= SISrcMods::OP_SEL_1; 7868 7869 if ((NegLo & (1 << J)) != 0) 7870 ModVal |= SISrcMods::NEG; 7871 7872 if ((NegHi & (1 << J)) != 0) 7873 ModVal |= SISrcMods::NEG_HI; 7874 7875 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7876 7877 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7878 } 7879 } 7880 7881 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7882 OptionalImmIndexMap OptIdx; 7883 cvtVOP3(Inst, Operands, OptIdx); 7884 cvtVOP3P(Inst, Operands, OptIdx); 7885 } 7886 7887 //===----------------------------------------------------------------------===// 7888 // dpp 7889 //===----------------------------------------------------------------------===// 7890 7891 bool AMDGPUOperand::isDPP8() const { 7892 return isImmTy(ImmTyDPP8); 7893 } 7894 7895 bool AMDGPUOperand::isDPPCtrl() const { 7896 using namespace AMDGPU::DPP; 7897 7898 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7899 if (result) { 7900 int64_t Imm = getImm(); 7901 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7902 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7903 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7904 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7905 (Imm == DppCtrl::WAVE_SHL1) || 7906 (Imm == DppCtrl::WAVE_ROL1) || 7907 (Imm == DppCtrl::WAVE_SHR1) || 7908 (Imm == DppCtrl::WAVE_ROR1) || 7909 (Imm == DppCtrl::ROW_MIRROR) || 7910 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7911 (Imm == DppCtrl::BCAST15) || 7912 (Imm == DppCtrl::BCAST31) || 7913 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7914 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7915 } 7916 return false; 7917 } 7918 7919 //===----------------------------------------------------------------------===// 7920 // mAI 7921 //===----------------------------------------------------------------------===// 7922 7923 bool AMDGPUOperand::isBLGP() const { 7924 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7925 } 7926 7927 bool AMDGPUOperand::isCBSZ() const { 7928 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7929 } 7930 7931 bool AMDGPUOperand::isABID() const { 7932 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7933 } 7934 7935 bool AMDGPUOperand::isS16Imm() const { 7936 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7937 } 7938 7939 bool AMDGPUOperand::isU16Imm() const { 7940 return isImm() && isUInt<16>(getImm()); 7941 } 7942 7943 //===----------------------------------------------------------------------===// 7944 // dim 7945 //===----------------------------------------------------------------------===// 7946 7947 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7948 // We want to allow "dim:1D" etc., 7949 // but the initial 1 is tokenized as an integer. 7950 std::string Token; 7951 if (isToken(AsmToken::Integer)) { 7952 SMLoc Loc = getToken().getEndLoc(); 7953 Token = std::string(getTokenStr()); 7954 lex(); 7955 if (getLoc() != Loc) 7956 return false; 7957 } 7958 7959 StringRef Suffix; 7960 if (!parseId(Suffix)) 7961 return false; 7962 Token += Suffix; 7963 7964 StringRef DimId = Token; 7965 if (DimId.startswith("SQ_RSRC_IMG_")) 7966 DimId = DimId.drop_front(12); 7967 7968 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7969 if (!DimInfo) 7970 return false; 7971 7972 Encoding = DimInfo->Encoding; 7973 return true; 7974 } 7975 7976 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7977 if (!isGFX10Plus()) 7978 return MatchOperand_NoMatch; 7979 7980 SMLoc S = getLoc(); 7981 7982 if (!trySkipId("dim", AsmToken::Colon)) 7983 return MatchOperand_NoMatch; 7984 7985 unsigned Encoding; 7986 SMLoc Loc = getLoc(); 7987 if (!parseDimId(Encoding)) { 7988 Error(Loc, "invalid dim value"); 7989 return MatchOperand_ParseFail; 7990 } 7991 7992 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7993 AMDGPUOperand::ImmTyDim)); 7994 return MatchOperand_Success; 7995 } 7996 7997 //===----------------------------------------------------------------------===// 7998 // dpp 7999 //===----------------------------------------------------------------------===// 8000 8001 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8002 SMLoc S = getLoc(); 8003 8004 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8005 return MatchOperand_NoMatch; 8006 8007 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8008 8009 int64_t Sels[8]; 8010 8011 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8012 return MatchOperand_ParseFail; 8013 8014 for (size_t i = 0; i < 8; ++i) { 8015 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8016 return MatchOperand_ParseFail; 8017 8018 SMLoc Loc = getLoc(); 8019 if (getParser().parseAbsoluteExpression(Sels[i])) 8020 return MatchOperand_ParseFail; 8021 if (0 > Sels[i] || 7 < Sels[i]) { 8022 Error(Loc, "expected a 3-bit value"); 8023 return MatchOperand_ParseFail; 8024 } 8025 } 8026 8027 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8028 return MatchOperand_ParseFail; 8029 8030 unsigned DPP8 = 0; 8031 for (size_t i = 0; i < 8; ++i) 8032 DPP8 |= (Sels[i] << (i * 3)); 8033 8034 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8035 return MatchOperand_Success; 8036 } 8037 8038 bool 8039 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8040 const OperandVector &Operands) { 8041 if (Ctrl == "row_newbcast") 8042 return isGFX90A(); 8043 8044 if (Ctrl == "row_share" || 8045 Ctrl == "row_xmask") 8046 return isGFX10Plus(); 8047 8048 if (Ctrl == "wave_shl" || 8049 Ctrl == "wave_shr" || 8050 Ctrl == "wave_rol" || 8051 Ctrl == "wave_ror" || 8052 Ctrl == "row_bcast") 8053 return isVI() || isGFX9(); 8054 8055 return Ctrl == "row_mirror" || 8056 Ctrl == "row_half_mirror" || 8057 Ctrl == "quad_perm" || 8058 Ctrl == "row_shl" || 8059 Ctrl == "row_shr" || 8060 Ctrl == "row_ror"; 8061 } 8062 8063 int64_t 8064 AMDGPUAsmParser::parseDPPCtrlPerm() { 8065 // quad_perm:[%d,%d,%d,%d] 8066 8067 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8068 return -1; 8069 8070 int64_t Val = 0; 8071 for (int i = 0; i < 4; ++i) { 8072 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8073 return -1; 8074 8075 int64_t Temp; 8076 SMLoc Loc = getLoc(); 8077 if (getParser().parseAbsoluteExpression(Temp)) 8078 return -1; 8079 if (Temp < 0 || Temp > 3) { 8080 Error(Loc, "expected a 2-bit value"); 8081 return -1; 8082 } 8083 8084 Val += (Temp << i * 2); 8085 } 8086 8087 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8088 return -1; 8089 8090 return Val; 8091 } 8092 8093 int64_t 8094 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8095 using namespace AMDGPU::DPP; 8096 8097 // sel:%d 8098 8099 int64_t Val; 8100 SMLoc Loc = getLoc(); 8101 8102 if (getParser().parseAbsoluteExpression(Val)) 8103 return -1; 8104 8105 struct DppCtrlCheck { 8106 int64_t Ctrl; 8107 int Lo; 8108 int Hi; 8109 }; 8110 8111 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8112 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8113 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8114 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8115 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8116 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8117 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8118 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8119 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8120 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8121 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8122 .Default({-1, 0, 0}); 8123 8124 bool Valid; 8125 if (Check.Ctrl == -1) { 8126 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8127 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8128 } else { 8129 Valid = Check.Lo <= Val && Val <= Check.Hi; 8130 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8131 } 8132 8133 if (!Valid) { 8134 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8135 return -1; 8136 } 8137 8138 return Val; 8139 } 8140 8141 OperandMatchResultTy 8142 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8143 using namespace AMDGPU::DPP; 8144 8145 if (!isToken(AsmToken::Identifier) || 8146 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8147 return MatchOperand_NoMatch; 8148 8149 SMLoc S = getLoc(); 8150 int64_t Val = -1; 8151 StringRef Ctrl; 8152 8153 parseId(Ctrl); 8154 8155 if (Ctrl == "row_mirror") { 8156 Val = DppCtrl::ROW_MIRROR; 8157 } else if (Ctrl == "row_half_mirror") { 8158 Val = DppCtrl::ROW_HALF_MIRROR; 8159 } else { 8160 if (skipToken(AsmToken::Colon, "expected a colon")) { 8161 if (Ctrl == "quad_perm") { 8162 Val = parseDPPCtrlPerm(); 8163 } else { 8164 Val = parseDPPCtrlSel(Ctrl); 8165 } 8166 } 8167 } 8168 8169 if (Val == -1) 8170 return MatchOperand_ParseFail; 8171 8172 Operands.push_back( 8173 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8174 return MatchOperand_Success; 8175 } 8176 8177 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8178 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8179 } 8180 8181 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8182 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8183 } 8184 8185 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8186 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8187 } 8188 8189 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8190 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8191 } 8192 8193 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8194 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8195 } 8196 8197 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8198 OptionalImmIndexMap OptionalIdx; 8199 8200 unsigned Opc = Inst.getOpcode(); 8201 bool HasModifiers = 8202 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8203 unsigned I = 1; 8204 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8205 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8206 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8207 } 8208 8209 int Fi = 0; 8210 for (unsigned E = Operands.size(); I != E; ++I) { 8211 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8212 MCOI::TIED_TO); 8213 if (TiedTo != -1) { 8214 assert((unsigned)TiedTo < Inst.getNumOperands()); 8215 // handle tied old or src2 for MAC instructions 8216 Inst.addOperand(Inst.getOperand(TiedTo)); 8217 } 8218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8219 // Add the register arguments 8220 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8221 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8222 // Skip it. 8223 continue; 8224 } 8225 8226 if (IsDPP8) { 8227 if (Op.isDPP8()) { 8228 Op.addImmOperands(Inst, 1); 8229 } else if (HasModifiers && 8230 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8231 Op.addRegWithFPInputModsOperands(Inst, 2); 8232 } else if (Op.isFI()) { 8233 Fi = Op.getImm(); 8234 } else if (Op.isReg()) { 8235 Op.addRegOperands(Inst, 1); 8236 } else { 8237 llvm_unreachable("Invalid operand type"); 8238 } 8239 } else { 8240 if (HasModifiers && 8241 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8242 Op.addRegWithFPInputModsOperands(Inst, 2); 8243 } else if (Op.isReg()) { 8244 Op.addRegOperands(Inst, 1); 8245 } else if (Op.isDPPCtrl()) { 8246 Op.addImmOperands(Inst, 1); 8247 } else if (Op.isImm()) { 8248 // Handle optional arguments 8249 OptionalIdx[Op.getImmTy()] = I; 8250 } else { 8251 llvm_unreachable("Invalid operand type"); 8252 } 8253 } 8254 } 8255 8256 if (IsDPP8) { 8257 using namespace llvm::AMDGPU::DPP; 8258 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8259 } else { 8260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8263 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8265 } 8266 } 8267 } 8268 8269 //===----------------------------------------------------------------------===// 8270 // sdwa 8271 //===----------------------------------------------------------------------===// 8272 8273 OperandMatchResultTy 8274 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8275 AMDGPUOperand::ImmTy Type) { 8276 using namespace llvm::AMDGPU::SDWA; 8277 8278 SMLoc S = getLoc(); 8279 StringRef Value; 8280 OperandMatchResultTy res; 8281 8282 SMLoc StringLoc; 8283 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8284 if (res != MatchOperand_Success) { 8285 return res; 8286 } 8287 8288 int64_t Int; 8289 Int = StringSwitch<int64_t>(Value) 8290 .Case("BYTE_0", SdwaSel::BYTE_0) 8291 .Case("BYTE_1", SdwaSel::BYTE_1) 8292 .Case("BYTE_2", SdwaSel::BYTE_2) 8293 .Case("BYTE_3", SdwaSel::BYTE_3) 8294 .Case("WORD_0", SdwaSel::WORD_0) 8295 .Case("WORD_1", SdwaSel::WORD_1) 8296 .Case("DWORD", SdwaSel::DWORD) 8297 .Default(0xffffffff); 8298 8299 if (Int == 0xffffffff) { 8300 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8301 return MatchOperand_ParseFail; 8302 } 8303 8304 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8305 return MatchOperand_Success; 8306 } 8307 8308 OperandMatchResultTy 8309 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8310 using namespace llvm::AMDGPU::SDWA; 8311 8312 SMLoc S = getLoc(); 8313 StringRef Value; 8314 OperandMatchResultTy res; 8315 8316 SMLoc StringLoc; 8317 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8318 if (res != MatchOperand_Success) { 8319 return res; 8320 } 8321 8322 int64_t Int; 8323 Int = StringSwitch<int64_t>(Value) 8324 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8325 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8326 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8327 .Default(0xffffffff); 8328 8329 if (Int == 0xffffffff) { 8330 Error(StringLoc, "invalid dst_unused value"); 8331 return MatchOperand_ParseFail; 8332 } 8333 8334 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8335 return MatchOperand_Success; 8336 } 8337 8338 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8339 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8340 } 8341 8342 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8343 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8344 } 8345 8346 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8347 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8348 } 8349 8350 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8351 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8352 } 8353 8354 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8355 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8356 } 8357 8358 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8359 uint64_t BasicInstType, 8360 bool SkipDstVcc, 8361 bool SkipSrcVcc) { 8362 using namespace llvm::AMDGPU::SDWA; 8363 8364 OptionalImmIndexMap OptionalIdx; 8365 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8366 bool SkippedVcc = false; 8367 8368 unsigned I = 1; 8369 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8370 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8371 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8372 } 8373 8374 for (unsigned E = Operands.size(); I != E; ++I) { 8375 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8376 if (SkipVcc && !SkippedVcc && Op.isReg() && 8377 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8378 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8379 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8380 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8381 // Skip VCC only if we didn't skip it on previous iteration. 8382 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8383 if (BasicInstType == SIInstrFlags::VOP2 && 8384 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8385 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8386 SkippedVcc = true; 8387 continue; 8388 } else if (BasicInstType == SIInstrFlags::VOPC && 8389 Inst.getNumOperands() == 0) { 8390 SkippedVcc = true; 8391 continue; 8392 } 8393 } 8394 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8395 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8396 } else if (Op.isImm()) { 8397 // Handle optional arguments 8398 OptionalIdx[Op.getImmTy()] = I; 8399 } else { 8400 llvm_unreachable("Invalid operand type"); 8401 } 8402 SkippedVcc = false; 8403 } 8404 8405 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8406 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8407 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8408 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8409 switch (BasicInstType) { 8410 case SIInstrFlags::VOP1: 8411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8412 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8414 } 8415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8418 break; 8419 8420 case SIInstrFlags::VOP2: 8421 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8422 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8423 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8424 } 8425 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8429 break; 8430 8431 case SIInstrFlags::VOPC: 8432 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8435 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8436 break; 8437 8438 default: 8439 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8440 } 8441 } 8442 8443 // special case v_mac_{f16, f32}: 8444 // it has src2 register operand that is tied to dst operand 8445 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8446 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8447 auto it = Inst.begin(); 8448 std::advance( 8449 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8450 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8451 } 8452 } 8453 8454 //===----------------------------------------------------------------------===// 8455 // mAI 8456 //===----------------------------------------------------------------------===// 8457 8458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8459 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8460 } 8461 8462 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8463 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8464 } 8465 8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8467 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8468 } 8469 8470 /// Force static initialization. 8471 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8472 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8473 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8474 } 8475 8476 #define GET_REGISTER_MATCHER 8477 #define GET_MATCHER_IMPLEMENTATION 8478 #define GET_MNEMONIC_SPELL_CHECKER 8479 #define GET_MNEMONIC_CHECKER 8480 #include "AMDGPUGenAsmMatcher.inc" 8481 8482 // This function should be defined after auto-generated include so that we have 8483 // MatchClassKind enum defined 8484 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8485 unsigned Kind) { 8486 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8487 // But MatchInstructionImpl() expects to meet token and fails to validate 8488 // operand. This method checks if we are given immediate operand but expect to 8489 // get corresponding token. 8490 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8491 switch (Kind) { 8492 case MCK_addr64: 8493 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8494 case MCK_gds: 8495 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8496 case MCK_lds: 8497 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8498 case MCK_idxen: 8499 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8500 case MCK_offen: 8501 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8502 case MCK_SSrcB32: 8503 // When operands have expression values, they will return true for isToken, 8504 // because it is not possible to distinguish between a token and an 8505 // expression at parse time. MatchInstructionImpl() will always try to 8506 // match an operand as a token, when isToken returns true, and when the 8507 // name of the expression is not a valid token, the match will fail, 8508 // so we need to handle it here. 8509 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8510 case MCK_SSrcF32: 8511 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8512 case MCK_SoppBrTarget: 8513 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8514 case MCK_VReg32OrOff: 8515 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8516 case MCK_InterpSlot: 8517 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8518 case MCK_Attr: 8519 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8520 case MCK_AttrChan: 8521 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8522 case MCK_ImmSMEMOffset: 8523 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8524 case MCK_SReg_64: 8525 case MCK_SReg_64_XEXEC: 8526 // Null is defined as a 32-bit register but 8527 // it should also be enabled with 64-bit operands. 8528 // The following code enables it for SReg_64 operands 8529 // used as source and destination. Remaining source 8530 // operands are handled in isInlinableImm. 8531 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8532 default: 8533 return Match_InvalidOperand; 8534 } 8535 } 8536 8537 //===----------------------------------------------------------------------===// 8538 // endpgm 8539 //===----------------------------------------------------------------------===// 8540 8541 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8542 SMLoc S = getLoc(); 8543 int64_t Imm = 0; 8544 8545 if (!parseExpr(Imm)) { 8546 // The operand is optional, if not present default to 0 8547 Imm = 0; 8548 } 8549 8550 if (!isUInt<16>(Imm)) { 8551 Error(S, "expected a 16-bit value"); 8552 return MatchOperand_ParseFail; 8553 } 8554 8555 Operands.push_back( 8556 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8557 return MatchOperand_Success; 8558 } 8559 8560 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8561