1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1390 bool isGFX90A() const { 1391 return AMDGPU::isGFX90A(getSTI()); 1392 } 1393 1394 bool isGFX940() const { 1395 return AMDGPU::isGFX940(getSTI()); 1396 } 1397 1398 bool isGFX9Plus() const { 1399 return AMDGPU::isGFX9Plus(getSTI()); 1400 } 1401 1402 bool isGFX10() const { 1403 return AMDGPU::isGFX10(getSTI()); 1404 } 1405 1406 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1407 1408 bool isGFX10_BEncoding() const { 1409 return AMDGPU::isGFX10_BEncoding(getSTI()); 1410 } 1411 1412 bool hasInv2PiInlineImm() const { 1413 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1414 } 1415 1416 bool hasFlatOffsets() const { 1417 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1418 } 1419 1420 bool hasArchitectedFlatScratch() const { 1421 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1422 } 1423 1424 bool hasSGPR102_SGPR103() const { 1425 return !isVI() && !isGFX9(); 1426 } 1427 1428 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1429 1430 bool hasIntClamp() const { 1431 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1432 } 1433 1434 AMDGPUTargetStreamer &getTargetStreamer() { 1435 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1436 return static_cast<AMDGPUTargetStreamer &>(TS); 1437 } 1438 1439 const MCRegisterInfo *getMRI() const { 1440 // We need this const_cast because for some reason getContext() is not const 1441 // in MCAsmParser. 1442 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1443 } 1444 1445 const MCInstrInfo *getMII() const { 1446 return &MII; 1447 } 1448 1449 const FeatureBitset &getFeatureBits() const { 1450 return getSTI().getFeatureBits(); 1451 } 1452 1453 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1454 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1455 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1456 1457 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1458 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1459 bool isForcedDPP() const { return ForcedDPP; } 1460 bool isForcedSDWA() const { return ForcedSDWA; } 1461 ArrayRef<unsigned> getMatchedVariants() const; 1462 StringRef getMatchedVariantName() const; 1463 1464 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1465 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1466 bool RestoreOnFailure); 1467 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1468 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1469 SMLoc &EndLoc) override; 1470 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1471 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1472 unsigned Kind) override; 1473 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1474 OperandVector &Operands, MCStreamer &Out, 1475 uint64_t &ErrorInfo, 1476 bool MatchingInlineAsm) override; 1477 bool ParseDirective(AsmToken DirectiveID) override; 1478 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1479 OperandMode Mode = OperandMode_Default); 1480 StringRef parseMnemonicSuffix(StringRef Name); 1481 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1482 SMLoc NameLoc, OperandVector &Operands) override; 1483 //bool ProcessInstruction(MCInst &Inst); 1484 1485 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1486 1487 OperandMatchResultTy 1488 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1489 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1490 bool (*ConvertResult)(int64_t &) = nullptr); 1491 1492 OperandMatchResultTy 1493 parseOperandArrayWithPrefix(const char *Prefix, 1494 OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1496 bool (*ConvertResult)(int64_t&) = nullptr); 1497 1498 OperandMatchResultTy 1499 parseNamedBit(StringRef Name, OperandVector &Operands, 1500 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1501 OperandMatchResultTy parseCPol(OperandVector &Operands); 1502 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1503 StringRef &Value, 1504 SMLoc &StringLoc); 1505 1506 bool isModifier(); 1507 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1508 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1509 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1510 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1511 bool parseSP3NegModifier(); 1512 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1513 OperandMatchResultTy parseReg(OperandVector &Operands); 1514 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1515 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1516 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1517 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1518 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1519 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1520 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1521 OperandMatchResultTy parseUfmt(int64_t &Format); 1522 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1523 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1524 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1525 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1526 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1527 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1528 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1529 1530 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1531 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1532 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1533 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1534 1535 bool parseCnt(int64_t &IntVal); 1536 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1537 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1538 1539 private: 1540 struct OperandInfoTy { 1541 SMLoc Loc; 1542 int64_t Id; 1543 bool IsSymbolic = false; 1544 bool IsDefined = false; 1545 StringRef Name; 1546 1547 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1548 }; 1549 1550 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1551 bool validateSendMsg(const OperandInfoTy &Msg, 1552 const OperandInfoTy &Op, 1553 const OperandInfoTy &Stream); 1554 1555 bool parseHwregBody(OperandInfoTy &HwReg, 1556 OperandInfoTy &Offset, 1557 OperandInfoTy &Width); 1558 bool validateHwreg(const OperandInfoTy &HwReg, 1559 const OperandInfoTy &Offset, 1560 const OperandInfoTy &Width); 1561 1562 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1563 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1564 1565 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1566 const OperandVector &Operands) const; 1567 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1568 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1569 SMLoc getLitLoc(const OperandVector &Operands) const; 1570 SMLoc getConstLoc(const OperandVector &Operands) const; 1571 1572 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1573 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1574 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1575 bool validateSOPLiteral(const MCInst &Inst) const; 1576 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1577 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1578 bool validateIntClampSupported(const MCInst &Inst); 1579 bool validateMIMGAtomicDMask(const MCInst &Inst); 1580 bool validateMIMGGatherDMask(const MCInst &Inst); 1581 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1582 bool validateMIMGDataSize(const MCInst &Inst); 1583 bool validateMIMGAddrSize(const MCInst &Inst); 1584 bool validateMIMGD16(const MCInst &Inst); 1585 bool validateMIMGDim(const MCInst &Inst); 1586 bool validateMIMGMSAA(const MCInst &Inst); 1587 bool validateOpSel(const MCInst &Inst); 1588 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1589 bool validateVccOperand(unsigned Reg) const; 1590 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1591 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1593 bool validateAGPRLdSt(const MCInst &Inst) const; 1594 bool validateVGPRAlign(const MCInst &Inst) const; 1595 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1596 bool validateDivScale(const MCInst &Inst); 1597 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1598 const SMLoc &IDLoc); 1599 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1600 unsigned getConstantBusLimit(unsigned Opcode) const; 1601 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1602 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1603 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1604 1605 bool isSupportedMnemo(StringRef Mnemo, 1606 const FeatureBitset &FBS); 1607 bool isSupportedMnemo(StringRef Mnemo, 1608 const FeatureBitset &FBS, 1609 ArrayRef<unsigned> Variants); 1610 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1611 1612 bool isId(const StringRef Id) const; 1613 bool isId(const AsmToken &Token, const StringRef Id) const; 1614 bool isToken(const AsmToken::TokenKind Kind) const; 1615 bool trySkipId(const StringRef Id); 1616 bool trySkipId(const StringRef Pref, const StringRef Id); 1617 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1618 bool trySkipToken(const AsmToken::TokenKind Kind); 1619 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1620 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1621 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1622 1623 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1624 AsmToken::TokenKind getTokenKind() const; 1625 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1626 bool parseExpr(OperandVector &Operands); 1627 StringRef getTokenStr() const; 1628 AsmToken peekToken(); 1629 AsmToken getToken() const; 1630 SMLoc getLoc() const; 1631 void lex(); 1632 1633 public: 1634 void onBeginOfFile() override; 1635 1636 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1637 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1638 1639 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1640 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1641 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1642 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1643 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1644 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1645 1646 bool parseSwizzleOperand(int64_t &Op, 1647 const unsigned MinVal, 1648 const unsigned MaxVal, 1649 const StringRef ErrMsg, 1650 SMLoc &Loc); 1651 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1652 const unsigned MinVal, 1653 const unsigned MaxVal, 1654 const StringRef ErrMsg); 1655 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1656 bool parseSwizzleOffset(int64_t &Imm); 1657 bool parseSwizzleMacro(int64_t &Imm); 1658 bool parseSwizzleQuadPerm(int64_t &Imm); 1659 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1660 bool parseSwizzleBroadcast(int64_t &Imm); 1661 bool parseSwizzleSwap(int64_t &Imm); 1662 bool parseSwizzleReverse(int64_t &Imm); 1663 1664 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1665 int64_t parseGPRIdxMacro(); 1666 1667 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1668 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1669 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1670 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1671 1672 AMDGPUOperand::Ptr defaultCPol() const; 1673 1674 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1675 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1676 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1677 AMDGPUOperand::Ptr defaultFlatOffset() const; 1678 1679 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1680 1681 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1682 OptionalImmIndexMap &OptionalIdx); 1683 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1684 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1685 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1686 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1687 OptionalImmIndexMap &OptionalIdx); 1688 1689 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1690 1691 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1692 bool IsAtomic = false); 1693 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1694 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1695 1696 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1697 1698 bool parseDimId(unsigned &Encoding); 1699 OperandMatchResultTy parseDim(OperandVector &Operands); 1700 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1701 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1702 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1703 int64_t parseDPPCtrlSel(StringRef Ctrl); 1704 int64_t parseDPPCtrlPerm(); 1705 AMDGPUOperand::Ptr defaultRowMask() const; 1706 AMDGPUOperand::Ptr defaultBankMask() const; 1707 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1708 AMDGPUOperand::Ptr defaultFI() const; 1709 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1710 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1711 1712 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1713 AMDGPUOperand::ImmTy Type); 1714 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1715 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1716 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1717 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1718 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1719 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1720 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1721 uint64_t BasicInstType, 1722 bool SkipDstVcc = false, 1723 bool SkipSrcVcc = false); 1724 1725 AMDGPUOperand::Ptr defaultBLGP() const; 1726 AMDGPUOperand::Ptr defaultCBSZ() const; 1727 AMDGPUOperand::Ptr defaultABID() const; 1728 1729 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1730 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1731 }; 1732 1733 struct OptionalOperand { 1734 const char *Name; 1735 AMDGPUOperand::ImmTy Type; 1736 bool IsBit; 1737 bool (*ConvertResult)(int64_t&); 1738 }; 1739 1740 } // end anonymous namespace 1741 1742 // May be called with integer type with equivalent bitwidth. 1743 static const fltSemantics *getFltSemantics(unsigned Size) { 1744 switch (Size) { 1745 case 4: 1746 return &APFloat::IEEEsingle(); 1747 case 8: 1748 return &APFloat::IEEEdouble(); 1749 case 2: 1750 return &APFloat::IEEEhalf(); 1751 default: 1752 llvm_unreachable("unsupported fp type"); 1753 } 1754 } 1755 1756 static const fltSemantics *getFltSemantics(MVT VT) { 1757 return getFltSemantics(VT.getSizeInBits() / 8); 1758 } 1759 1760 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1761 switch (OperandType) { 1762 case AMDGPU::OPERAND_REG_IMM_INT32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32: 1764 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1765 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1766 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1768 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1769 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1770 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1771 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1772 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1773 case AMDGPU::OPERAND_KIMM32: 1774 return &APFloat::IEEEsingle(); 1775 case AMDGPU::OPERAND_REG_IMM_INT64: 1776 case AMDGPU::OPERAND_REG_IMM_FP64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1778 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1779 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1780 return &APFloat::IEEEdouble(); 1781 case AMDGPU::OPERAND_REG_IMM_INT16: 1782 case AMDGPU::OPERAND_REG_IMM_FP16: 1783 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1784 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1785 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1786 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1788 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1791 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1792 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1793 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1794 case AMDGPU::OPERAND_KIMM16: 1795 return &APFloat::IEEEhalf(); 1796 default: 1797 llvm_unreachable("unsupported fp type"); 1798 } 1799 } 1800 1801 //===----------------------------------------------------------------------===// 1802 // Operand 1803 //===----------------------------------------------------------------------===// 1804 1805 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1806 bool Lost; 1807 1808 // Convert literal to single precision 1809 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1810 APFloat::rmNearestTiesToEven, 1811 &Lost); 1812 // We allow precision lost but not overflow or underflow 1813 if (Status != APFloat::opOK && 1814 Lost && 1815 ((Status & APFloat::opOverflow) != 0 || 1816 (Status & APFloat::opUnderflow) != 0)) { 1817 return false; 1818 } 1819 1820 return true; 1821 } 1822 1823 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1824 return isUIntN(Size, Val) || isIntN(Size, Val); 1825 } 1826 1827 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1828 if (VT.getScalarType() == MVT::i16) { 1829 // FP immediate values are broken. 1830 return isInlinableIntLiteral(Val); 1831 } 1832 1833 // f16/v2f16 operands work correctly for all values. 1834 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1835 } 1836 1837 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1838 1839 // This is a hack to enable named inline values like 1840 // shared_base with both 32-bit and 64-bit operands. 1841 // Note that these values are defined as 1842 // 32-bit operands only. 1843 if (isInlineValue()) { 1844 return true; 1845 } 1846 1847 if (!isImmTy(ImmTyNone)) { 1848 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1849 return false; 1850 } 1851 // TODO: We should avoid using host float here. It would be better to 1852 // check the float bit values which is what a few other places do. 1853 // We've had bot failures before due to weird NaN support on mips hosts. 1854 1855 APInt Literal(64, Imm.Val); 1856 1857 if (Imm.IsFPImm) { // We got fp literal token 1858 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1859 return AMDGPU::isInlinableLiteral64(Imm.Val, 1860 AsmParser->hasInv2PiInlineImm()); 1861 } 1862 1863 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1864 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1865 return false; 1866 1867 if (type.getScalarSizeInBits() == 16) { 1868 return isInlineableLiteralOp16( 1869 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1870 type, AsmParser->hasInv2PiInlineImm()); 1871 } 1872 1873 // Check if single precision literal is inlinable 1874 return AMDGPU::isInlinableLiteral32( 1875 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 // We got int literal token. 1880 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1881 return AMDGPU::isInlinableLiteral64(Imm.Val, 1882 AsmParser->hasInv2PiInlineImm()); 1883 } 1884 1885 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1886 return false; 1887 } 1888 1889 if (type.getScalarSizeInBits() == 16) { 1890 return isInlineableLiteralOp16( 1891 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1892 type, AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 return AMDGPU::isInlinableLiteral32( 1896 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1897 AsmParser->hasInv2PiInlineImm()); 1898 } 1899 1900 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1901 // Check that this immediate can be added as literal 1902 if (!isImmTy(ImmTyNone)) { 1903 return false; 1904 } 1905 1906 if (!Imm.IsFPImm) { 1907 // We got int literal token. 1908 1909 if (type == MVT::f64 && hasFPModifiers()) { 1910 // Cannot apply fp modifiers to int literals preserving the same semantics 1911 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1912 // disable these cases. 1913 return false; 1914 } 1915 1916 unsigned Size = type.getSizeInBits(); 1917 if (Size == 64) 1918 Size = 32; 1919 1920 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1921 // types. 1922 return isSafeTruncation(Imm.Val, Size); 1923 } 1924 1925 // We got fp literal token 1926 if (type == MVT::f64) { // Expected 64-bit fp operand 1927 // We would set low 64-bits of literal to zeroes but we accept this literals 1928 return true; 1929 } 1930 1931 if (type == MVT::i64) { // Expected 64-bit int operand 1932 // We don't allow fp literals in 64-bit integer instructions. It is 1933 // unclear how we should encode them. 1934 return false; 1935 } 1936 1937 // We allow fp literals with f16x2 operands assuming that the specified 1938 // literal goes into the lower half and the upper half is zero. We also 1939 // require that the literal may be losslessly converted to f16. 1940 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1941 (type == MVT::v2i16)? MVT::i16 : 1942 (type == MVT::v2f32)? MVT::f32 : type; 1943 1944 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1945 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1946 } 1947 1948 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1949 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1950 } 1951 1952 bool AMDGPUOperand::isVRegWithInputMods() const { 1953 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1954 // GFX90A allows DPP on 64-bit operands. 1955 (isRegClass(AMDGPU::VReg_64RegClassID) && 1956 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1957 } 1958 1959 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1960 if (AsmParser->isVI()) 1961 return isVReg32(); 1962 else if (AsmParser->isGFX9Plus()) 1963 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1964 else 1965 return false; 1966 } 1967 1968 bool AMDGPUOperand::isSDWAFP16Operand() const { 1969 return isSDWAOperand(MVT::f16); 1970 } 1971 1972 bool AMDGPUOperand::isSDWAFP32Operand() const { 1973 return isSDWAOperand(MVT::f32); 1974 } 1975 1976 bool AMDGPUOperand::isSDWAInt16Operand() const { 1977 return isSDWAOperand(MVT::i16); 1978 } 1979 1980 bool AMDGPUOperand::isSDWAInt32Operand() const { 1981 return isSDWAOperand(MVT::i32); 1982 } 1983 1984 bool AMDGPUOperand::isBoolReg() const { 1985 auto FB = AsmParser->getFeatureBits(); 1986 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1987 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1988 } 1989 1990 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1991 { 1992 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1993 assert(Size == 2 || Size == 4 || Size == 8); 1994 1995 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1996 1997 if (Imm.Mods.Abs) { 1998 Val &= ~FpSignMask; 1999 } 2000 if (Imm.Mods.Neg) { 2001 Val ^= FpSignMask; 2002 } 2003 2004 return Val; 2005 } 2006 2007 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2008 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2009 Inst.getNumOperands())) { 2010 addLiteralImmOperand(Inst, Imm.Val, 2011 ApplyModifiers & 2012 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2013 } else { 2014 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2015 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2016 setImmKindNone(); 2017 } 2018 } 2019 2020 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2021 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2022 auto OpNum = Inst.getNumOperands(); 2023 // Check that this operand accepts literals 2024 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2025 2026 if (ApplyModifiers) { 2027 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2028 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2029 Val = applyInputFPModifiers(Val, Size); 2030 } 2031 2032 APInt Literal(64, Val); 2033 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2034 2035 if (Imm.IsFPImm) { // We got fp literal token 2036 switch (OpTy) { 2037 case AMDGPU::OPERAND_REG_IMM_INT64: 2038 case AMDGPU::OPERAND_REG_IMM_FP64: 2039 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2040 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2041 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2042 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2043 AsmParser->hasInv2PiInlineImm())) { 2044 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2045 setImmKindConst(); 2046 return; 2047 } 2048 2049 // Non-inlineable 2050 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2051 // For fp operands we check if low 32 bits are zeros 2052 if (Literal.getLoBits(32) != 0) { 2053 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2054 "Can't encode literal as exact 64-bit floating-point operand. " 2055 "Low 32-bits will be set to zero"); 2056 } 2057 2058 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2059 setImmKindLiteral(); 2060 return; 2061 } 2062 2063 // We don't allow fp literals in 64-bit integer instructions. It is 2064 // unclear how we should encode them. This case should be checked earlier 2065 // in predicate methods (isLiteralImm()) 2066 llvm_unreachable("fp literal in 64-bit integer instruction."); 2067 2068 case AMDGPU::OPERAND_REG_IMM_INT32: 2069 case AMDGPU::OPERAND_REG_IMM_FP32: 2070 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2071 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2072 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2073 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2074 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2075 case AMDGPU::OPERAND_REG_IMM_INT16: 2076 case AMDGPU::OPERAND_REG_IMM_FP16: 2077 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2078 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2079 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2080 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2081 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2086 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2087 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2088 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2089 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2090 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2091 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2092 case AMDGPU::OPERAND_KIMM32: 2093 case AMDGPU::OPERAND_KIMM16: { 2094 bool lost; 2095 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2096 // Convert literal to single precision 2097 FPLiteral.convert(*getOpFltSemantics(OpTy), 2098 APFloat::rmNearestTiesToEven, &lost); 2099 // We allow precision lost but not overflow or underflow. This should be 2100 // checked earlier in isLiteralImm() 2101 2102 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2103 Inst.addOperand(MCOperand::createImm(ImmVal)); 2104 setImmKindLiteral(); 2105 return; 2106 } 2107 default: 2108 llvm_unreachable("invalid operand size"); 2109 } 2110 2111 return; 2112 } 2113 2114 // We got int literal token. 2115 // Only sign extend inline immediates. 2116 switch (OpTy) { 2117 case AMDGPU::OPERAND_REG_IMM_INT32: 2118 case AMDGPU::OPERAND_REG_IMM_FP32: 2119 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2120 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2121 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2124 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2125 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2126 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2127 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2128 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2129 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2130 if (isSafeTruncation(Val, 32) && 2131 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2132 AsmParser->hasInv2PiInlineImm())) { 2133 Inst.addOperand(MCOperand::createImm(Val)); 2134 setImmKindConst(); 2135 return; 2136 } 2137 2138 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2139 setImmKindLiteral(); 2140 return; 2141 2142 case AMDGPU::OPERAND_REG_IMM_INT64: 2143 case AMDGPU::OPERAND_REG_IMM_FP64: 2144 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2145 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2146 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2147 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2148 Inst.addOperand(MCOperand::createImm(Val)); 2149 setImmKindConst(); 2150 return; 2151 } 2152 2153 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2154 setImmKindLiteral(); 2155 return; 2156 2157 case AMDGPU::OPERAND_REG_IMM_INT16: 2158 case AMDGPU::OPERAND_REG_IMM_FP16: 2159 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2164 if (isSafeTruncation(Val, 16) && 2165 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2166 AsmParser->hasInv2PiInlineImm())) { 2167 Inst.addOperand(MCOperand::createImm(Val)); 2168 setImmKindConst(); 2169 return; 2170 } 2171 2172 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2173 setImmKindLiteral(); 2174 return; 2175 2176 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2180 assert(isSafeTruncation(Val, 16)); 2181 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2182 AsmParser->hasInv2PiInlineImm())); 2183 2184 Inst.addOperand(MCOperand::createImm(Val)); 2185 return; 2186 } 2187 case AMDGPU::OPERAND_KIMM32: 2188 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2189 setImmKindNone(); 2190 return; 2191 case AMDGPU::OPERAND_KIMM16: 2192 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2193 setImmKindNone(); 2194 return; 2195 default: 2196 llvm_unreachable("invalid operand size"); 2197 } 2198 } 2199 2200 template <unsigned Bitwidth> 2201 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2202 APInt Literal(64, Imm.Val); 2203 setImmKindNone(); 2204 2205 if (!Imm.IsFPImm) { 2206 // We got int literal token. 2207 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2208 return; 2209 } 2210 2211 bool Lost; 2212 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2213 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2214 APFloat::rmNearestTiesToEven, &Lost); 2215 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2216 } 2217 2218 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2219 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2220 } 2221 2222 static bool isInlineValue(unsigned Reg) { 2223 switch (Reg) { 2224 case AMDGPU::SRC_SHARED_BASE: 2225 case AMDGPU::SRC_SHARED_LIMIT: 2226 case AMDGPU::SRC_PRIVATE_BASE: 2227 case AMDGPU::SRC_PRIVATE_LIMIT: 2228 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2229 return true; 2230 case AMDGPU::SRC_VCCZ: 2231 case AMDGPU::SRC_EXECZ: 2232 case AMDGPU::SRC_SCC: 2233 return true; 2234 case AMDGPU::SGPR_NULL: 2235 return true; 2236 default: 2237 return false; 2238 } 2239 } 2240 2241 bool AMDGPUOperand::isInlineValue() const { 2242 return isRegKind() && ::isInlineValue(getReg()); 2243 } 2244 2245 //===----------------------------------------------------------------------===// 2246 // AsmParser 2247 //===----------------------------------------------------------------------===// 2248 2249 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2250 if (Is == IS_VGPR) { 2251 switch (RegWidth) { 2252 default: return -1; 2253 case 1: return AMDGPU::VGPR_32RegClassID; 2254 case 2: return AMDGPU::VReg_64RegClassID; 2255 case 3: return AMDGPU::VReg_96RegClassID; 2256 case 4: return AMDGPU::VReg_128RegClassID; 2257 case 5: return AMDGPU::VReg_160RegClassID; 2258 case 6: return AMDGPU::VReg_192RegClassID; 2259 case 7: return AMDGPU::VReg_224RegClassID; 2260 case 8: return AMDGPU::VReg_256RegClassID; 2261 case 16: return AMDGPU::VReg_512RegClassID; 2262 case 32: return AMDGPU::VReg_1024RegClassID; 2263 } 2264 } else if (Is == IS_TTMP) { 2265 switch (RegWidth) { 2266 default: return -1; 2267 case 1: return AMDGPU::TTMP_32RegClassID; 2268 case 2: return AMDGPU::TTMP_64RegClassID; 2269 case 4: return AMDGPU::TTMP_128RegClassID; 2270 case 8: return AMDGPU::TTMP_256RegClassID; 2271 case 16: return AMDGPU::TTMP_512RegClassID; 2272 } 2273 } else if (Is == IS_SGPR) { 2274 switch (RegWidth) { 2275 default: return -1; 2276 case 1: return AMDGPU::SGPR_32RegClassID; 2277 case 2: return AMDGPU::SGPR_64RegClassID; 2278 case 3: return AMDGPU::SGPR_96RegClassID; 2279 case 4: return AMDGPU::SGPR_128RegClassID; 2280 case 5: return AMDGPU::SGPR_160RegClassID; 2281 case 6: return AMDGPU::SGPR_192RegClassID; 2282 case 7: return AMDGPU::SGPR_224RegClassID; 2283 case 8: return AMDGPU::SGPR_256RegClassID; 2284 case 16: return AMDGPU::SGPR_512RegClassID; 2285 } 2286 } else if (Is == IS_AGPR) { 2287 switch (RegWidth) { 2288 default: return -1; 2289 case 1: return AMDGPU::AGPR_32RegClassID; 2290 case 2: return AMDGPU::AReg_64RegClassID; 2291 case 3: return AMDGPU::AReg_96RegClassID; 2292 case 4: return AMDGPU::AReg_128RegClassID; 2293 case 5: return AMDGPU::AReg_160RegClassID; 2294 case 6: return AMDGPU::AReg_192RegClassID; 2295 case 7: return AMDGPU::AReg_224RegClassID; 2296 case 8: return AMDGPU::AReg_256RegClassID; 2297 case 16: return AMDGPU::AReg_512RegClassID; 2298 case 32: return AMDGPU::AReg_1024RegClassID; 2299 } 2300 } 2301 return -1; 2302 } 2303 2304 static unsigned getSpecialRegForName(StringRef RegName) { 2305 return StringSwitch<unsigned>(RegName) 2306 .Case("exec", AMDGPU::EXEC) 2307 .Case("vcc", AMDGPU::VCC) 2308 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2309 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2310 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2311 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2312 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2313 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2314 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2315 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2316 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2317 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2318 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2319 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2320 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2321 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2322 .Case("m0", AMDGPU::M0) 2323 .Case("vccz", AMDGPU::SRC_VCCZ) 2324 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2325 .Case("execz", AMDGPU::SRC_EXECZ) 2326 .Case("src_execz", AMDGPU::SRC_EXECZ) 2327 .Case("scc", AMDGPU::SRC_SCC) 2328 .Case("src_scc", AMDGPU::SRC_SCC) 2329 .Case("tba", AMDGPU::TBA) 2330 .Case("tma", AMDGPU::TMA) 2331 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2332 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2333 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2334 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2335 .Case("vcc_lo", AMDGPU::VCC_LO) 2336 .Case("vcc_hi", AMDGPU::VCC_HI) 2337 .Case("exec_lo", AMDGPU::EXEC_LO) 2338 .Case("exec_hi", AMDGPU::EXEC_HI) 2339 .Case("tma_lo", AMDGPU::TMA_LO) 2340 .Case("tma_hi", AMDGPU::TMA_HI) 2341 .Case("tba_lo", AMDGPU::TBA_LO) 2342 .Case("tba_hi", AMDGPU::TBA_HI) 2343 .Case("pc", AMDGPU::PC_REG) 2344 .Case("null", AMDGPU::SGPR_NULL) 2345 .Default(AMDGPU::NoRegister); 2346 } 2347 2348 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2349 SMLoc &EndLoc, bool RestoreOnFailure) { 2350 auto R = parseRegister(); 2351 if (!R) return true; 2352 assert(R->isReg()); 2353 RegNo = R->getReg(); 2354 StartLoc = R->getStartLoc(); 2355 EndLoc = R->getEndLoc(); 2356 return false; 2357 } 2358 2359 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2360 SMLoc &EndLoc) { 2361 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2362 } 2363 2364 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2365 SMLoc &StartLoc, 2366 SMLoc &EndLoc) { 2367 bool Result = 2368 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2369 bool PendingErrors = getParser().hasPendingError(); 2370 getParser().clearPendingErrors(); 2371 if (PendingErrors) 2372 return MatchOperand_ParseFail; 2373 if (Result) 2374 return MatchOperand_NoMatch; 2375 return MatchOperand_Success; 2376 } 2377 2378 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2379 RegisterKind RegKind, unsigned Reg1, 2380 SMLoc Loc) { 2381 switch (RegKind) { 2382 case IS_SPECIAL: 2383 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2384 Reg = AMDGPU::EXEC; 2385 RegWidth = 2; 2386 return true; 2387 } 2388 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2389 Reg = AMDGPU::FLAT_SCR; 2390 RegWidth = 2; 2391 return true; 2392 } 2393 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2394 Reg = AMDGPU::XNACK_MASK; 2395 RegWidth = 2; 2396 return true; 2397 } 2398 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2399 Reg = AMDGPU::VCC; 2400 RegWidth = 2; 2401 return true; 2402 } 2403 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2404 Reg = AMDGPU::TBA; 2405 RegWidth = 2; 2406 return true; 2407 } 2408 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2409 Reg = AMDGPU::TMA; 2410 RegWidth = 2; 2411 return true; 2412 } 2413 Error(Loc, "register does not fit in the list"); 2414 return false; 2415 case IS_VGPR: 2416 case IS_SGPR: 2417 case IS_AGPR: 2418 case IS_TTMP: 2419 if (Reg1 != Reg + RegWidth) { 2420 Error(Loc, "registers in a list must have consecutive indices"); 2421 return false; 2422 } 2423 RegWidth++; 2424 return true; 2425 default: 2426 llvm_unreachable("unexpected register kind"); 2427 } 2428 } 2429 2430 struct RegInfo { 2431 StringLiteral Name; 2432 RegisterKind Kind; 2433 }; 2434 2435 static constexpr RegInfo RegularRegisters[] = { 2436 {{"v"}, IS_VGPR}, 2437 {{"s"}, IS_SGPR}, 2438 {{"ttmp"}, IS_TTMP}, 2439 {{"acc"}, IS_AGPR}, 2440 {{"a"}, IS_AGPR}, 2441 }; 2442 2443 static bool isRegularReg(RegisterKind Kind) { 2444 return Kind == IS_VGPR || 2445 Kind == IS_SGPR || 2446 Kind == IS_TTMP || 2447 Kind == IS_AGPR; 2448 } 2449 2450 static const RegInfo* getRegularRegInfo(StringRef Str) { 2451 for (const RegInfo &Reg : RegularRegisters) 2452 if (Str.startswith(Reg.Name)) 2453 return &Reg; 2454 return nullptr; 2455 } 2456 2457 static bool getRegNum(StringRef Str, unsigned& Num) { 2458 return !Str.getAsInteger(10, Num); 2459 } 2460 2461 bool 2462 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2463 const AsmToken &NextToken) const { 2464 2465 // A list of consecutive registers: [s0,s1,s2,s3] 2466 if (Token.is(AsmToken::LBrac)) 2467 return true; 2468 2469 if (!Token.is(AsmToken::Identifier)) 2470 return false; 2471 2472 // A single register like s0 or a range of registers like s[0:1] 2473 2474 StringRef Str = Token.getString(); 2475 const RegInfo *Reg = getRegularRegInfo(Str); 2476 if (Reg) { 2477 StringRef RegName = Reg->Name; 2478 StringRef RegSuffix = Str.substr(RegName.size()); 2479 if (!RegSuffix.empty()) { 2480 unsigned Num; 2481 // A single register with an index: rXX 2482 if (getRegNum(RegSuffix, Num)) 2483 return true; 2484 } else { 2485 // A range of registers: r[XX:YY]. 2486 if (NextToken.is(AsmToken::LBrac)) 2487 return true; 2488 } 2489 } 2490 2491 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2492 } 2493 2494 bool 2495 AMDGPUAsmParser::isRegister() 2496 { 2497 return isRegister(getToken(), peekToken()); 2498 } 2499 2500 unsigned 2501 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2502 unsigned RegNum, 2503 unsigned RegWidth, 2504 SMLoc Loc) { 2505 2506 assert(isRegularReg(RegKind)); 2507 2508 unsigned AlignSize = 1; 2509 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2510 // SGPR and TTMP registers must be aligned. 2511 // Max required alignment is 4 dwords. 2512 AlignSize = std::min(RegWidth, 4u); 2513 } 2514 2515 if (RegNum % AlignSize != 0) { 2516 Error(Loc, "invalid register alignment"); 2517 return AMDGPU::NoRegister; 2518 } 2519 2520 unsigned RegIdx = RegNum / AlignSize; 2521 int RCID = getRegClass(RegKind, RegWidth); 2522 if (RCID == -1) { 2523 Error(Loc, "invalid or unsupported register size"); 2524 return AMDGPU::NoRegister; 2525 } 2526 2527 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2528 const MCRegisterClass RC = TRI->getRegClass(RCID); 2529 if (RegIdx >= RC.getNumRegs()) { 2530 Error(Loc, "register index is out of range"); 2531 return AMDGPU::NoRegister; 2532 } 2533 2534 return RC.getRegister(RegIdx); 2535 } 2536 2537 bool 2538 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2539 int64_t RegLo, RegHi; 2540 if (!skipToken(AsmToken::LBrac, "missing register index")) 2541 return false; 2542 2543 SMLoc FirstIdxLoc = getLoc(); 2544 SMLoc SecondIdxLoc; 2545 2546 if (!parseExpr(RegLo)) 2547 return false; 2548 2549 if (trySkipToken(AsmToken::Colon)) { 2550 SecondIdxLoc = getLoc(); 2551 if (!parseExpr(RegHi)) 2552 return false; 2553 } else { 2554 RegHi = RegLo; 2555 } 2556 2557 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2558 return false; 2559 2560 if (!isUInt<32>(RegLo)) { 2561 Error(FirstIdxLoc, "invalid register index"); 2562 return false; 2563 } 2564 2565 if (!isUInt<32>(RegHi)) { 2566 Error(SecondIdxLoc, "invalid register index"); 2567 return false; 2568 } 2569 2570 if (RegLo > RegHi) { 2571 Error(FirstIdxLoc, "first register index should not exceed second index"); 2572 return false; 2573 } 2574 2575 Num = static_cast<unsigned>(RegLo); 2576 Width = (RegHi - RegLo) + 1; 2577 return true; 2578 } 2579 2580 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2581 unsigned &RegNum, unsigned &RegWidth, 2582 SmallVectorImpl<AsmToken> &Tokens) { 2583 assert(isToken(AsmToken::Identifier)); 2584 unsigned Reg = getSpecialRegForName(getTokenStr()); 2585 if (Reg) { 2586 RegNum = 0; 2587 RegWidth = 1; 2588 RegKind = IS_SPECIAL; 2589 Tokens.push_back(getToken()); 2590 lex(); // skip register name 2591 } 2592 return Reg; 2593 } 2594 2595 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2596 unsigned &RegNum, unsigned &RegWidth, 2597 SmallVectorImpl<AsmToken> &Tokens) { 2598 assert(isToken(AsmToken::Identifier)); 2599 StringRef RegName = getTokenStr(); 2600 auto Loc = getLoc(); 2601 2602 const RegInfo *RI = getRegularRegInfo(RegName); 2603 if (!RI) { 2604 Error(Loc, "invalid register name"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 Tokens.push_back(getToken()); 2609 lex(); // skip register name 2610 2611 RegKind = RI->Kind; 2612 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2613 if (!RegSuffix.empty()) { 2614 // Single 32-bit register: vXX. 2615 if (!getRegNum(RegSuffix, RegNum)) { 2616 Error(Loc, "invalid register index"); 2617 return AMDGPU::NoRegister; 2618 } 2619 RegWidth = 1; 2620 } else { 2621 // Range of registers: v[XX:YY]. ":YY" is optional. 2622 if (!ParseRegRange(RegNum, RegWidth)) 2623 return AMDGPU::NoRegister; 2624 } 2625 2626 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2627 } 2628 2629 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2630 unsigned &RegWidth, 2631 SmallVectorImpl<AsmToken> &Tokens) { 2632 unsigned Reg = AMDGPU::NoRegister; 2633 auto ListLoc = getLoc(); 2634 2635 if (!skipToken(AsmToken::LBrac, 2636 "expected a register or a list of registers")) { 2637 return AMDGPU::NoRegister; 2638 } 2639 2640 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2641 2642 auto Loc = getLoc(); 2643 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2644 return AMDGPU::NoRegister; 2645 if (RegWidth != 1) { 2646 Error(Loc, "expected a single 32-bit register"); 2647 return AMDGPU::NoRegister; 2648 } 2649 2650 for (; trySkipToken(AsmToken::Comma); ) { 2651 RegisterKind NextRegKind; 2652 unsigned NextReg, NextRegNum, NextRegWidth; 2653 Loc = getLoc(); 2654 2655 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2656 NextRegNum, NextRegWidth, 2657 Tokens)) { 2658 return AMDGPU::NoRegister; 2659 } 2660 if (NextRegWidth != 1) { 2661 Error(Loc, "expected a single 32-bit register"); 2662 return AMDGPU::NoRegister; 2663 } 2664 if (NextRegKind != RegKind) { 2665 Error(Loc, "registers in a list must be of the same kind"); 2666 return AMDGPU::NoRegister; 2667 } 2668 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2669 return AMDGPU::NoRegister; 2670 } 2671 2672 if (!skipToken(AsmToken::RBrac, 2673 "expected a comma or a closing square bracket")) { 2674 return AMDGPU::NoRegister; 2675 } 2676 2677 if (isRegularReg(RegKind)) 2678 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2679 2680 return Reg; 2681 } 2682 2683 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2684 unsigned &RegNum, unsigned &RegWidth, 2685 SmallVectorImpl<AsmToken> &Tokens) { 2686 auto Loc = getLoc(); 2687 Reg = AMDGPU::NoRegister; 2688 2689 if (isToken(AsmToken::Identifier)) { 2690 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2691 if (Reg == AMDGPU::NoRegister) 2692 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2693 } else { 2694 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2695 } 2696 2697 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2698 if (Reg == AMDGPU::NoRegister) { 2699 assert(Parser.hasPendingError()); 2700 return false; 2701 } 2702 2703 if (!subtargetHasRegister(*TRI, Reg)) { 2704 if (Reg == AMDGPU::SGPR_NULL) { 2705 Error(Loc, "'null' operand is not supported on this GPU"); 2706 } else { 2707 Error(Loc, "register not available on this GPU"); 2708 } 2709 return false; 2710 } 2711 2712 return true; 2713 } 2714 2715 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2716 unsigned &RegNum, unsigned &RegWidth, 2717 bool RestoreOnFailure /*=false*/) { 2718 Reg = AMDGPU::NoRegister; 2719 2720 SmallVector<AsmToken, 1> Tokens; 2721 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2722 if (RestoreOnFailure) { 2723 while (!Tokens.empty()) { 2724 getLexer().UnLex(Tokens.pop_back_val()); 2725 } 2726 } 2727 return true; 2728 } 2729 return false; 2730 } 2731 2732 Optional<StringRef> 2733 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2734 switch (RegKind) { 2735 case IS_VGPR: 2736 return StringRef(".amdgcn.next_free_vgpr"); 2737 case IS_SGPR: 2738 return StringRef(".amdgcn.next_free_sgpr"); 2739 default: 2740 return None; 2741 } 2742 } 2743 2744 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2745 auto SymbolName = getGprCountSymbolName(RegKind); 2746 assert(SymbolName && "initializing invalid register kind"); 2747 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2748 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2749 } 2750 2751 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2752 unsigned DwordRegIndex, 2753 unsigned RegWidth) { 2754 // Symbols are only defined for GCN targets 2755 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2756 return true; 2757 2758 auto SymbolName = getGprCountSymbolName(RegKind); 2759 if (!SymbolName) 2760 return true; 2761 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2762 2763 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2764 int64_t OldCount; 2765 2766 if (!Sym->isVariable()) 2767 return !Error(getLoc(), 2768 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2769 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2770 return !Error( 2771 getLoc(), 2772 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2773 2774 if (OldCount <= NewMax) 2775 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2776 2777 return true; 2778 } 2779 2780 std::unique_ptr<AMDGPUOperand> 2781 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2782 const auto &Tok = getToken(); 2783 SMLoc StartLoc = Tok.getLoc(); 2784 SMLoc EndLoc = Tok.getEndLoc(); 2785 RegisterKind RegKind; 2786 unsigned Reg, RegNum, RegWidth; 2787 2788 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2789 return nullptr; 2790 } 2791 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2792 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2793 return nullptr; 2794 } else 2795 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2796 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2797 } 2798 2799 OperandMatchResultTy 2800 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2801 // TODO: add syntactic sugar for 1/(2*PI) 2802 2803 assert(!isRegister()); 2804 assert(!isModifier()); 2805 2806 const auto& Tok = getToken(); 2807 const auto& NextTok = peekToken(); 2808 bool IsReal = Tok.is(AsmToken::Real); 2809 SMLoc S = getLoc(); 2810 bool Negate = false; 2811 2812 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2813 lex(); 2814 IsReal = true; 2815 Negate = true; 2816 } 2817 2818 if (IsReal) { 2819 // Floating-point expressions are not supported. 2820 // Can only allow floating-point literals with an 2821 // optional sign. 2822 2823 StringRef Num = getTokenStr(); 2824 lex(); 2825 2826 APFloat RealVal(APFloat::IEEEdouble()); 2827 auto roundMode = APFloat::rmNearestTiesToEven; 2828 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2829 return MatchOperand_ParseFail; 2830 } 2831 if (Negate) 2832 RealVal.changeSign(); 2833 2834 Operands.push_back( 2835 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2836 AMDGPUOperand::ImmTyNone, true)); 2837 2838 return MatchOperand_Success; 2839 2840 } else { 2841 int64_t IntVal; 2842 const MCExpr *Expr; 2843 SMLoc S = getLoc(); 2844 2845 if (HasSP3AbsModifier) { 2846 // This is a workaround for handling expressions 2847 // as arguments of SP3 'abs' modifier, for example: 2848 // |1.0| 2849 // |-1| 2850 // |1+x| 2851 // This syntax is not compatible with syntax of standard 2852 // MC expressions (due to the trailing '|'). 2853 SMLoc EndLoc; 2854 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2855 return MatchOperand_ParseFail; 2856 } else { 2857 if (Parser.parseExpression(Expr)) 2858 return MatchOperand_ParseFail; 2859 } 2860 2861 if (Expr->evaluateAsAbsolute(IntVal)) { 2862 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2863 } else { 2864 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2865 } 2866 2867 return MatchOperand_Success; 2868 } 2869 2870 return MatchOperand_NoMatch; 2871 } 2872 2873 OperandMatchResultTy 2874 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2875 if (!isRegister()) 2876 return MatchOperand_NoMatch; 2877 2878 if (auto R = parseRegister()) { 2879 assert(R->isReg()); 2880 Operands.push_back(std::move(R)); 2881 return MatchOperand_Success; 2882 } 2883 return MatchOperand_ParseFail; 2884 } 2885 2886 OperandMatchResultTy 2887 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2888 auto res = parseReg(Operands); 2889 if (res != MatchOperand_NoMatch) { 2890 return res; 2891 } else if (isModifier()) { 2892 return MatchOperand_NoMatch; 2893 } else { 2894 return parseImm(Operands, HasSP3AbsMod); 2895 } 2896 } 2897 2898 bool 2899 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2900 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2901 const auto &str = Token.getString(); 2902 return str == "abs" || str == "neg" || str == "sext"; 2903 } 2904 return false; 2905 } 2906 2907 bool 2908 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2909 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2910 } 2911 2912 bool 2913 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2914 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2915 } 2916 2917 bool 2918 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2919 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2920 } 2921 2922 // Check if this is an operand modifier or an opcode modifier 2923 // which may look like an expression but it is not. We should 2924 // avoid parsing these modifiers as expressions. Currently 2925 // recognized sequences are: 2926 // |...| 2927 // abs(...) 2928 // neg(...) 2929 // sext(...) 2930 // -reg 2931 // -|...| 2932 // -abs(...) 2933 // name:... 2934 // Note that simple opcode modifiers like 'gds' may be parsed as 2935 // expressions; this is a special case. See getExpressionAsToken. 2936 // 2937 bool 2938 AMDGPUAsmParser::isModifier() { 2939 2940 AsmToken Tok = getToken(); 2941 AsmToken NextToken[2]; 2942 peekTokens(NextToken); 2943 2944 return isOperandModifier(Tok, NextToken[0]) || 2945 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2946 isOpcodeModifierWithVal(Tok, NextToken[0]); 2947 } 2948 2949 // Check if the current token is an SP3 'neg' modifier. 2950 // Currently this modifier is allowed in the following context: 2951 // 2952 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2953 // 2. Before an 'abs' modifier: -abs(...) 2954 // 3. Before an SP3 'abs' modifier: -|...| 2955 // 2956 // In all other cases "-" is handled as a part 2957 // of an expression that follows the sign. 2958 // 2959 // Note: When "-" is followed by an integer literal, 2960 // this is interpreted as integer negation rather 2961 // than a floating-point NEG modifier applied to N. 2962 // Beside being contr-intuitive, such use of floating-point 2963 // NEG modifier would have resulted in different meaning 2964 // of integer literals used with VOP1/2/C and VOP3, 2965 // for example: 2966 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2967 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2968 // Negative fp literals with preceding "-" are 2969 // handled likewise for uniformity 2970 // 2971 bool 2972 AMDGPUAsmParser::parseSP3NegModifier() { 2973 2974 AsmToken NextToken[2]; 2975 peekTokens(NextToken); 2976 2977 if (isToken(AsmToken::Minus) && 2978 (isRegister(NextToken[0], NextToken[1]) || 2979 NextToken[0].is(AsmToken::Pipe) || 2980 isId(NextToken[0], "abs"))) { 2981 lex(); 2982 return true; 2983 } 2984 2985 return false; 2986 } 2987 2988 OperandMatchResultTy 2989 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2990 bool AllowImm) { 2991 bool Neg, SP3Neg; 2992 bool Abs, SP3Abs; 2993 SMLoc Loc; 2994 2995 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2996 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2997 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2998 return MatchOperand_ParseFail; 2999 } 3000 3001 SP3Neg = parseSP3NegModifier(); 3002 3003 Loc = getLoc(); 3004 Neg = trySkipId("neg"); 3005 if (Neg && SP3Neg) { 3006 Error(Loc, "expected register or immediate"); 3007 return MatchOperand_ParseFail; 3008 } 3009 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3010 return MatchOperand_ParseFail; 3011 3012 Abs = trySkipId("abs"); 3013 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3014 return MatchOperand_ParseFail; 3015 3016 Loc = getLoc(); 3017 SP3Abs = trySkipToken(AsmToken::Pipe); 3018 if (Abs && SP3Abs) { 3019 Error(Loc, "expected register or immediate"); 3020 return MatchOperand_ParseFail; 3021 } 3022 3023 OperandMatchResultTy Res; 3024 if (AllowImm) { 3025 Res = parseRegOrImm(Operands, SP3Abs); 3026 } else { 3027 Res = parseReg(Operands); 3028 } 3029 if (Res != MatchOperand_Success) { 3030 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3031 } 3032 3033 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3034 return MatchOperand_ParseFail; 3035 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3036 return MatchOperand_ParseFail; 3037 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3038 return MatchOperand_ParseFail; 3039 3040 AMDGPUOperand::Modifiers Mods; 3041 Mods.Abs = Abs || SP3Abs; 3042 Mods.Neg = Neg || SP3Neg; 3043 3044 if (Mods.hasFPModifiers()) { 3045 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3046 if (Op.isExpr()) { 3047 Error(Op.getStartLoc(), "expected an absolute expression"); 3048 return MatchOperand_ParseFail; 3049 } 3050 Op.setModifiers(Mods); 3051 } 3052 return MatchOperand_Success; 3053 } 3054 3055 OperandMatchResultTy 3056 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3057 bool AllowImm) { 3058 bool Sext = trySkipId("sext"); 3059 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3060 return MatchOperand_ParseFail; 3061 3062 OperandMatchResultTy Res; 3063 if (AllowImm) { 3064 Res = parseRegOrImm(Operands); 3065 } else { 3066 Res = parseReg(Operands); 3067 } 3068 if (Res != MatchOperand_Success) { 3069 return Sext? MatchOperand_ParseFail : Res; 3070 } 3071 3072 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3073 return MatchOperand_ParseFail; 3074 3075 AMDGPUOperand::Modifiers Mods; 3076 Mods.Sext = Sext; 3077 3078 if (Mods.hasIntModifiers()) { 3079 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3080 if (Op.isExpr()) { 3081 Error(Op.getStartLoc(), "expected an absolute expression"); 3082 return MatchOperand_ParseFail; 3083 } 3084 Op.setModifiers(Mods); 3085 } 3086 3087 return MatchOperand_Success; 3088 } 3089 3090 OperandMatchResultTy 3091 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3092 return parseRegOrImmWithFPInputMods(Operands, false); 3093 } 3094 3095 OperandMatchResultTy 3096 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3097 return parseRegOrImmWithIntInputMods(Operands, false); 3098 } 3099 3100 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3101 auto Loc = getLoc(); 3102 if (trySkipId("off")) { 3103 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3104 AMDGPUOperand::ImmTyOff, false)); 3105 return MatchOperand_Success; 3106 } 3107 3108 if (!isRegister()) 3109 return MatchOperand_NoMatch; 3110 3111 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3112 if (Reg) { 3113 Operands.push_back(std::move(Reg)); 3114 return MatchOperand_Success; 3115 } 3116 3117 return MatchOperand_ParseFail; 3118 3119 } 3120 3121 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3122 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3123 3124 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3125 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3126 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3127 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3128 return Match_InvalidOperand; 3129 3130 if ((TSFlags & SIInstrFlags::VOP3) && 3131 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3132 getForcedEncodingSize() != 64) 3133 return Match_PreferE32; 3134 3135 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3136 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3137 // v_mac_f32/16 allow only dst_sel == DWORD; 3138 auto OpNum = 3139 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3140 const auto &Op = Inst.getOperand(OpNum); 3141 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3142 return Match_InvalidOperand; 3143 } 3144 } 3145 3146 return Match_Success; 3147 } 3148 3149 static ArrayRef<unsigned> getAllVariants() { 3150 static const unsigned Variants[] = { 3151 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3152 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3153 }; 3154 3155 return makeArrayRef(Variants); 3156 } 3157 3158 // What asm variants we should check 3159 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3160 if (getForcedEncodingSize() == 32) { 3161 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3162 return makeArrayRef(Variants); 3163 } 3164 3165 if (isForcedVOP3()) { 3166 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3167 return makeArrayRef(Variants); 3168 } 3169 3170 if (isForcedSDWA()) { 3171 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3172 AMDGPUAsmVariants::SDWA9}; 3173 return makeArrayRef(Variants); 3174 } 3175 3176 if (isForcedDPP()) { 3177 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3178 return makeArrayRef(Variants); 3179 } 3180 3181 return getAllVariants(); 3182 } 3183 3184 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3185 if (getForcedEncodingSize() == 32) 3186 return "e32"; 3187 3188 if (isForcedVOP3()) 3189 return "e64"; 3190 3191 if (isForcedSDWA()) 3192 return "sdwa"; 3193 3194 if (isForcedDPP()) 3195 return "dpp"; 3196 3197 return ""; 3198 } 3199 3200 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3201 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3202 const unsigned Num = Desc.getNumImplicitUses(); 3203 for (unsigned i = 0; i < Num; ++i) { 3204 unsigned Reg = Desc.ImplicitUses[i]; 3205 switch (Reg) { 3206 case AMDGPU::FLAT_SCR: 3207 case AMDGPU::VCC: 3208 case AMDGPU::VCC_LO: 3209 case AMDGPU::VCC_HI: 3210 case AMDGPU::M0: 3211 return Reg; 3212 default: 3213 break; 3214 } 3215 } 3216 return AMDGPU::NoRegister; 3217 } 3218 3219 // NB: This code is correct only when used to check constant 3220 // bus limitations because GFX7 support no f16 inline constants. 3221 // Note that there are no cases when a GFX7 opcode violates 3222 // constant bus limitations due to the use of an f16 constant. 3223 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3224 unsigned OpIdx) const { 3225 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3226 3227 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3228 return false; 3229 } 3230 3231 const MCOperand &MO = Inst.getOperand(OpIdx); 3232 3233 int64_t Val = MO.getImm(); 3234 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3235 3236 switch (OpSize) { // expected operand size 3237 case 8: 3238 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3239 case 4: 3240 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3241 case 2: { 3242 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3243 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3245 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3246 return AMDGPU::isInlinableIntLiteral(Val); 3247 3248 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3249 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3250 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3251 return AMDGPU::isInlinableIntLiteralV216(Val); 3252 3253 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3254 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3255 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3256 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3257 3258 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3259 } 3260 default: 3261 llvm_unreachable("invalid operand size"); 3262 } 3263 } 3264 3265 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3266 if (!isGFX10Plus()) 3267 return 1; 3268 3269 switch (Opcode) { 3270 // 64-bit shift instructions can use only one scalar value input 3271 case AMDGPU::V_LSHLREV_B64_e64: 3272 case AMDGPU::V_LSHLREV_B64_gfx10: 3273 case AMDGPU::V_LSHRREV_B64_e64: 3274 case AMDGPU::V_LSHRREV_B64_gfx10: 3275 case AMDGPU::V_ASHRREV_I64_e64: 3276 case AMDGPU::V_ASHRREV_I64_gfx10: 3277 case AMDGPU::V_LSHL_B64_e64: 3278 case AMDGPU::V_LSHR_B64_e64: 3279 case AMDGPU::V_ASHR_I64_e64: 3280 return 1; 3281 default: 3282 return 2; 3283 } 3284 } 3285 3286 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3287 const MCOperand &MO = Inst.getOperand(OpIdx); 3288 if (MO.isImm()) { 3289 return !isInlineConstant(Inst, OpIdx); 3290 } else if (MO.isReg()) { 3291 auto Reg = MO.getReg(); 3292 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3293 auto PReg = mc2PseudoReg(Reg); 3294 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3295 } else { 3296 return true; 3297 } 3298 } 3299 3300 bool 3301 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3302 const OperandVector &Operands) { 3303 const unsigned Opcode = Inst.getOpcode(); 3304 const MCInstrDesc &Desc = MII.get(Opcode); 3305 unsigned LastSGPR = AMDGPU::NoRegister; 3306 unsigned ConstantBusUseCount = 0; 3307 unsigned NumLiterals = 0; 3308 unsigned LiteralSize; 3309 3310 if (Desc.TSFlags & 3311 (SIInstrFlags::VOPC | 3312 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3313 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3314 SIInstrFlags::SDWA)) { 3315 // Check special imm operands (used by madmk, etc) 3316 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3317 ++NumLiterals; 3318 LiteralSize = 4; 3319 } 3320 3321 SmallDenseSet<unsigned> SGPRsUsed; 3322 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3323 if (SGPRUsed != AMDGPU::NoRegister) { 3324 SGPRsUsed.insert(SGPRUsed); 3325 ++ConstantBusUseCount; 3326 } 3327 3328 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3329 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3330 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3331 3332 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3333 3334 for (int OpIdx : OpIndices) { 3335 if (OpIdx == -1) break; 3336 3337 const MCOperand &MO = Inst.getOperand(OpIdx); 3338 if (usesConstantBus(Inst, OpIdx)) { 3339 if (MO.isReg()) { 3340 LastSGPR = mc2PseudoReg(MO.getReg()); 3341 // Pairs of registers with a partial intersections like these 3342 // s0, s[0:1] 3343 // flat_scratch_lo, flat_scratch 3344 // flat_scratch_lo, flat_scratch_hi 3345 // are theoretically valid but they are disabled anyway. 3346 // Note that this code mimics SIInstrInfo::verifyInstruction 3347 if (!SGPRsUsed.count(LastSGPR)) { 3348 SGPRsUsed.insert(LastSGPR); 3349 ++ConstantBusUseCount; 3350 } 3351 } else { // Expression or a literal 3352 3353 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3354 continue; // special operand like VINTERP attr_chan 3355 3356 // An instruction may use only one literal. 3357 // This has been validated on the previous step. 3358 // See validateVOPLiteral. 3359 // This literal may be used as more than one operand. 3360 // If all these operands are of the same size, 3361 // this literal counts as one scalar value. 3362 // Otherwise it counts as 2 scalar values. 3363 // See "GFX10 Shader Programming", section 3.6.2.3. 3364 3365 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3366 if (Size < 4) Size = 4; 3367 3368 if (NumLiterals == 0) { 3369 NumLiterals = 1; 3370 LiteralSize = Size; 3371 } else if (LiteralSize != Size) { 3372 NumLiterals = 2; 3373 } 3374 } 3375 } 3376 } 3377 } 3378 ConstantBusUseCount += NumLiterals; 3379 3380 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3381 return true; 3382 3383 SMLoc LitLoc = getLitLoc(Operands); 3384 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3385 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3386 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3387 return false; 3388 } 3389 3390 bool 3391 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3392 const OperandVector &Operands) { 3393 const unsigned Opcode = Inst.getOpcode(); 3394 const MCInstrDesc &Desc = MII.get(Opcode); 3395 3396 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3397 if (DstIdx == -1 || 3398 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3399 return true; 3400 } 3401 3402 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3403 3404 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3405 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3406 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3407 3408 assert(DstIdx != -1); 3409 const MCOperand &Dst = Inst.getOperand(DstIdx); 3410 assert(Dst.isReg()); 3411 3412 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3413 3414 for (int SrcIdx : SrcIndices) { 3415 if (SrcIdx == -1) break; 3416 const MCOperand &Src = Inst.getOperand(SrcIdx); 3417 if (Src.isReg()) { 3418 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3419 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3420 Error(getRegLoc(SrcReg, Operands), 3421 "destination must be different than all sources"); 3422 return false; 3423 } 3424 } 3425 } 3426 3427 return true; 3428 } 3429 3430 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3431 3432 const unsigned Opc = Inst.getOpcode(); 3433 const MCInstrDesc &Desc = MII.get(Opc); 3434 3435 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3436 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3437 assert(ClampIdx != -1); 3438 return Inst.getOperand(ClampIdx).getImm() == 0; 3439 } 3440 3441 return true; 3442 } 3443 3444 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3445 3446 const unsigned Opc = Inst.getOpcode(); 3447 const MCInstrDesc &Desc = MII.get(Opc); 3448 3449 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3450 return true; 3451 3452 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3453 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3454 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3455 3456 assert(VDataIdx != -1); 3457 3458 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3459 return true; 3460 3461 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3462 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3463 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3464 if (DMask == 0) 3465 DMask = 1; 3466 3467 unsigned DataSize = 3468 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3469 if (hasPackedD16()) { 3470 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3471 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3472 DataSize = (DataSize + 1) / 2; 3473 } 3474 3475 return (VDataSize / 4) == DataSize + TFESize; 3476 } 3477 3478 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3479 const unsigned Opc = Inst.getOpcode(); 3480 const MCInstrDesc &Desc = MII.get(Opc); 3481 3482 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3483 return true; 3484 3485 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3486 3487 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3488 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3489 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3490 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3491 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3492 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3493 3494 assert(VAddr0Idx != -1); 3495 assert(SrsrcIdx != -1); 3496 assert(SrsrcIdx > VAddr0Idx); 3497 3498 if (DimIdx == -1) 3499 return true; // intersect_ray 3500 3501 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3502 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3503 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3504 unsigned ActualAddrSize = 3505 IsNSA ? SrsrcIdx - VAddr0Idx 3506 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3507 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3508 3509 unsigned ExpectedAddrSize = 3510 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3511 3512 if (!IsNSA) { 3513 if (ExpectedAddrSize > 8) 3514 ExpectedAddrSize = 16; 3515 3516 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3517 // This provides backward compatibility for assembly created 3518 // before 160b/192b/224b types were directly supported. 3519 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3520 return true; 3521 } 3522 3523 return ActualAddrSize == ExpectedAddrSize; 3524 } 3525 3526 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3527 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3532 return true; 3533 if (!Desc.mayLoad() || !Desc.mayStore()) 3534 return true; // Not atomic 3535 3536 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3537 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3538 3539 // This is an incomplete check because image_atomic_cmpswap 3540 // may only use 0x3 and 0xf while other atomic operations 3541 // may use 0x1 and 0x3. However these limitations are 3542 // verified when we check that dmask matches dst size. 3543 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3544 } 3545 3546 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3547 3548 const unsigned Opc = Inst.getOpcode(); 3549 const MCInstrDesc &Desc = MII.get(Opc); 3550 3551 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3552 return true; 3553 3554 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3555 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3556 3557 // GATHER4 instructions use dmask in a different fashion compared to 3558 // other MIMG instructions. The only useful DMASK values are 3559 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3560 // (red,red,red,red) etc.) The ISA document doesn't mention 3561 // this. 3562 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3563 } 3564 3565 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3566 const unsigned Opc = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opc); 3568 3569 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3570 return true; 3571 3572 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3573 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3574 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3575 3576 if (!BaseOpcode->MSAA) 3577 return true; 3578 3579 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3580 assert(DimIdx != -1); 3581 3582 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3583 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3584 3585 return DimInfo->MSAA; 3586 } 3587 3588 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3589 { 3590 switch (Opcode) { 3591 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3592 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3593 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3594 return true; 3595 default: 3596 return false; 3597 } 3598 } 3599 3600 // movrels* opcodes should only allow VGPRS as src0. 3601 // This is specified in .td description for vop1/vop3, 3602 // but sdwa is handled differently. See isSDWAOperand. 3603 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3604 const OperandVector &Operands) { 3605 3606 const unsigned Opc = Inst.getOpcode(); 3607 const MCInstrDesc &Desc = MII.get(Opc); 3608 3609 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3610 return true; 3611 3612 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3613 assert(Src0Idx != -1); 3614 3615 SMLoc ErrLoc; 3616 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3617 if (Src0.isReg()) { 3618 auto Reg = mc2PseudoReg(Src0.getReg()); 3619 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3620 if (!isSGPR(Reg, TRI)) 3621 return true; 3622 ErrLoc = getRegLoc(Reg, Operands); 3623 } else { 3624 ErrLoc = getConstLoc(Operands); 3625 } 3626 3627 Error(ErrLoc, "source operand must be a VGPR"); 3628 return false; 3629 } 3630 3631 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3632 const OperandVector &Operands) { 3633 3634 const unsigned Opc = Inst.getOpcode(); 3635 3636 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3637 return true; 3638 3639 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3640 assert(Src0Idx != -1); 3641 3642 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3643 if (!Src0.isReg()) 3644 return true; 3645 3646 auto Reg = mc2PseudoReg(Src0.getReg()); 3647 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3648 if (isSGPR(Reg, TRI)) { 3649 Error(getRegLoc(Reg, Operands), 3650 "source operand must be either a VGPR or an inline constant"); 3651 return false; 3652 } 3653 3654 return true; 3655 } 3656 3657 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3658 const OperandVector &Operands) { 3659 const unsigned Opc = Inst.getOpcode(); 3660 const MCInstrDesc &Desc = MII.get(Opc); 3661 3662 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3663 return true; 3664 3665 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3666 if (Src2Idx == -1) 3667 return true; 3668 3669 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3670 if (!Src2.isReg()) 3671 return true; 3672 3673 MCRegister Src2Reg = Src2.getReg(); 3674 MCRegister DstReg = Inst.getOperand(0).getReg(); 3675 if (Src2Reg == DstReg) 3676 return true; 3677 3678 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3679 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3680 return true; 3681 3682 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3683 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3684 "source 2 operand must not partially overlap with dst"); 3685 return false; 3686 } 3687 3688 return true; 3689 } 3690 3691 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3692 switch (Inst.getOpcode()) { 3693 default: 3694 return true; 3695 case V_DIV_SCALE_F32_gfx6_gfx7: 3696 case V_DIV_SCALE_F32_vi: 3697 case V_DIV_SCALE_F32_gfx10: 3698 case V_DIV_SCALE_F64_gfx6_gfx7: 3699 case V_DIV_SCALE_F64_vi: 3700 case V_DIV_SCALE_F64_gfx10: 3701 break; 3702 } 3703 3704 // TODO: Check that src0 = src1 or src2. 3705 3706 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3707 AMDGPU::OpName::src2_modifiers, 3708 AMDGPU::OpName::src2_modifiers}) { 3709 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3710 .getImm() & 3711 SISrcMods::ABS) { 3712 return false; 3713 } 3714 } 3715 3716 return true; 3717 } 3718 3719 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3720 3721 const unsigned Opc = Inst.getOpcode(); 3722 const MCInstrDesc &Desc = MII.get(Opc); 3723 3724 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3725 return true; 3726 3727 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3728 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3729 if (isCI() || isSI()) 3730 return false; 3731 } 3732 3733 return true; 3734 } 3735 3736 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3737 const unsigned Opc = Inst.getOpcode(); 3738 const MCInstrDesc &Desc = MII.get(Opc); 3739 3740 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3741 return true; 3742 3743 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3744 if (DimIdx < 0) 3745 return true; 3746 3747 long Imm = Inst.getOperand(DimIdx).getImm(); 3748 if (Imm < 0 || Imm >= 8) 3749 return false; 3750 3751 return true; 3752 } 3753 3754 static bool IsRevOpcode(const unsigned Opcode) 3755 { 3756 switch (Opcode) { 3757 case AMDGPU::V_SUBREV_F32_e32: 3758 case AMDGPU::V_SUBREV_F32_e64: 3759 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3760 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3761 case AMDGPU::V_SUBREV_F32_e32_vi: 3762 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3763 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3764 case AMDGPU::V_SUBREV_F32_e64_vi: 3765 3766 case AMDGPU::V_SUBREV_CO_U32_e32: 3767 case AMDGPU::V_SUBREV_CO_U32_e64: 3768 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3769 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3770 3771 case AMDGPU::V_SUBBREV_U32_e32: 3772 case AMDGPU::V_SUBBREV_U32_e64: 3773 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3774 case AMDGPU::V_SUBBREV_U32_e32_vi: 3775 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3776 case AMDGPU::V_SUBBREV_U32_e64_vi: 3777 3778 case AMDGPU::V_SUBREV_U32_e32: 3779 case AMDGPU::V_SUBREV_U32_e64: 3780 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3781 case AMDGPU::V_SUBREV_U32_e32_vi: 3782 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3783 case AMDGPU::V_SUBREV_U32_e64_vi: 3784 3785 case AMDGPU::V_SUBREV_F16_e32: 3786 case AMDGPU::V_SUBREV_F16_e64: 3787 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3788 case AMDGPU::V_SUBREV_F16_e32_vi: 3789 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3790 case AMDGPU::V_SUBREV_F16_e64_vi: 3791 3792 case AMDGPU::V_SUBREV_U16_e32: 3793 case AMDGPU::V_SUBREV_U16_e64: 3794 case AMDGPU::V_SUBREV_U16_e32_vi: 3795 case AMDGPU::V_SUBREV_U16_e64_vi: 3796 3797 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3798 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3799 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3800 3801 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3802 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3803 3804 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3805 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3806 3807 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3808 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3809 3810 case AMDGPU::V_LSHRREV_B32_e32: 3811 case AMDGPU::V_LSHRREV_B32_e64: 3812 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3813 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3814 case AMDGPU::V_LSHRREV_B32_e32_vi: 3815 case AMDGPU::V_LSHRREV_B32_e64_vi: 3816 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3817 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3818 3819 case AMDGPU::V_ASHRREV_I32_e32: 3820 case AMDGPU::V_ASHRREV_I32_e64: 3821 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3822 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3823 case AMDGPU::V_ASHRREV_I32_e32_vi: 3824 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3825 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3826 case AMDGPU::V_ASHRREV_I32_e64_vi: 3827 3828 case AMDGPU::V_LSHLREV_B32_e32: 3829 case AMDGPU::V_LSHLREV_B32_e64: 3830 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3831 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3832 case AMDGPU::V_LSHLREV_B32_e32_vi: 3833 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3834 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3835 case AMDGPU::V_LSHLREV_B32_e64_vi: 3836 3837 case AMDGPU::V_LSHLREV_B16_e32: 3838 case AMDGPU::V_LSHLREV_B16_e64: 3839 case AMDGPU::V_LSHLREV_B16_e32_vi: 3840 case AMDGPU::V_LSHLREV_B16_e64_vi: 3841 case AMDGPU::V_LSHLREV_B16_gfx10: 3842 3843 case AMDGPU::V_LSHRREV_B16_e32: 3844 case AMDGPU::V_LSHRREV_B16_e64: 3845 case AMDGPU::V_LSHRREV_B16_e32_vi: 3846 case AMDGPU::V_LSHRREV_B16_e64_vi: 3847 case AMDGPU::V_LSHRREV_B16_gfx10: 3848 3849 case AMDGPU::V_ASHRREV_I16_e32: 3850 case AMDGPU::V_ASHRREV_I16_e64: 3851 case AMDGPU::V_ASHRREV_I16_e32_vi: 3852 case AMDGPU::V_ASHRREV_I16_e64_vi: 3853 case AMDGPU::V_ASHRREV_I16_gfx10: 3854 3855 case AMDGPU::V_LSHLREV_B64_e64: 3856 case AMDGPU::V_LSHLREV_B64_gfx10: 3857 case AMDGPU::V_LSHLREV_B64_vi: 3858 3859 case AMDGPU::V_LSHRREV_B64_e64: 3860 case AMDGPU::V_LSHRREV_B64_gfx10: 3861 case AMDGPU::V_LSHRREV_B64_vi: 3862 3863 case AMDGPU::V_ASHRREV_I64_e64: 3864 case AMDGPU::V_ASHRREV_I64_gfx10: 3865 case AMDGPU::V_ASHRREV_I64_vi: 3866 3867 case AMDGPU::V_PK_LSHLREV_B16: 3868 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3869 case AMDGPU::V_PK_LSHLREV_B16_vi: 3870 3871 case AMDGPU::V_PK_LSHRREV_B16: 3872 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3873 case AMDGPU::V_PK_LSHRREV_B16_vi: 3874 case AMDGPU::V_PK_ASHRREV_I16: 3875 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3876 case AMDGPU::V_PK_ASHRREV_I16_vi: 3877 return true; 3878 default: 3879 return false; 3880 } 3881 } 3882 3883 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3884 3885 using namespace SIInstrFlags; 3886 const unsigned Opcode = Inst.getOpcode(); 3887 const MCInstrDesc &Desc = MII.get(Opcode); 3888 3889 // lds_direct register is defined so that it can be used 3890 // with 9-bit operands only. Ignore encodings which do not accept these. 3891 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3892 if ((Desc.TSFlags & Enc) == 0) 3893 return None; 3894 3895 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3896 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3897 if (SrcIdx == -1) 3898 break; 3899 const auto &Src = Inst.getOperand(SrcIdx); 3900 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3901 3902 if (isGFX90A()) 3903 return StringRef("lds_direct is not supported on this GPU"); 3904 3905 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3906 return StringRef("lds_direct cannot be used with this instruction"); 3907 3908 if (SrcName != OpName::src0) 3909 return StringRef("lds_direct may be used as src0 only"); 3910 } 3911 } 3912 3913 return None; 3914 } 3915 3916 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3917 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3918 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3919 if (Op.isFlatOffset()) 3920 return Op.getStartLoc(); 3921 } 3922 return getLoc(); 3923 } 3924 3925 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3926 const OperandVector &Operands) { 3927 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3928 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3929 return true; 3930 3931 auto Opcode = Inst.getOpcode(); 3932 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3933 assert(OpNum != -1); 3934 3935 const auto &Op = Inst.getOperand(OpNum); 3936 if (!hasFlatOffsets() && Op.getImm() != 0) { 3937 Error(getFlatOffsetLoc(Operands), 3938 "flat offset modifier is not supported on this GPU"); 3939 return false; 3940 } 3941 3942 // For FLAT segment the offset must be positive; 3943 // MSB is ignored and forced to zero. 3944 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3945 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3946 if (!isIntN(OffsetSize, Op.getImm())) { 3947 Error(getFlatOffsetLoc(Operands), 3948 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3949 return false; 3950 } 3951 } else { 3952 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3953 if (!isUIntN(OffsetSize, Op.getImm())) { 3954 Error(getFlatOffsetLoc(Operands), 3955 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3956 return false; 3957 } 3958 } 3959 3960 return true; 3961 } 3962 3963 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3964 // Start with second operand because SMEM Offset cannot be dst or src0. 3965 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3966 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3967 if (Op.isSMEMOffset()) 3968 return Op.getStartLoc(); 3969 } 3970 return getLoc(); 3971 } 3972 3973 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3974 const OperandVector &Operands) { 3975 if (isCI() || isSI()) 3976 return true; 3977 3978 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3979 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3980 return true; 3981 3982 auto Opcode = Inst.getOpcode(); 3983 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3984 if (OpNum == -1) 3985 return true; 3986 3987 const auto &Op = Inst.getOperand(OpNum); 3988 if (!Op.isImm()) 3989 return true; 3990 3991 uint64_t Offset = Op.getImm(); 3992 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3993 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3994 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3995 return true; 3996 3997 Error(getSMEMOffsetLoc(Operands), 3998 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3999 "expected a 21-bit signed offset"); 4000 4001 return false; 4002 } 4003 4004 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4005 unsigned Opcode = Inst.getOpcode(); 4006 const MCInstrDesc &Desc = MII.get(Opcode); 4007 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4008 return true; 4009 4010 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4011 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4012 4013 const int OpIndices[] = { Src0Idx, Src1Idx }; 4014 4015 unsigned NumExprs = 0; 4016 unsigned NumLiterals = 0; 4017 uint32_t LiteralValue; 4018 4019 for (int OpIdx : OpIndices) { 4020 if (OpIdx == -1) break; 4021 4022 const MCOperand &MO = Inst.getOperand(OpIdx); 4023 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4024 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4025 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4026 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4027 if (NumLiterals == 0 || LiteralValue != Value) { 4028 LiteralValue = Value; 4029 ++NumLiterals; 4030 } 4031 } else if (MO.isExpr()) { 4032 ++NumExprs; 4033 } 4034 } 4035 } 4036 4037 return NumLiterals + NumExprs <= 1; 4038 } 4039 4040 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4041 const unsigned Opc = Inst.getOpcode(); 4042 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4043 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4044 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4045 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4046 4047 if (OpSel & ~3) 4048 return false; 4049 } 4050 4051 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4052 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4053 if (OpSelIdx != -1) { 4054 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4055 return false; 4056 } 4057 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4058 if (OpSelHiIdx != -1) { 4059 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4060 return false; 4061 } 4062 } 4063 4064 return true; 4065 } 4066 4067 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4068 const OperandVector &Operands) { 4069 const unsigned Opc = Inst.getOpcode(); 4070 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4071 if (DppCtrlIdx < 0) 4072 return true; 4073 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4074 4075 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4076 // DPP64 is supported for row_newbcast only. 4077 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4078 if (Src0Idx >= 0 && 4079 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4080 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4081 Error(S, "64 bit dpp only supports row_newbcast"); 4082 return false; 4083 } 4084 } 4085 4086 return true; 4087 } 4088 4089 // Check if VCC register matches wavefront size 4090 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4091 auto FB = getFeatureBits(); 4092 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4093 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4094 } 4095 4096 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4097 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4098 const OperandVector &Operands) { 4099 unsigned Opcode = Inst.getOpcode(); 4100 const MCInstrDesc &Desc = MII.get(Opcode); 4101 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4102 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4103 ImmIdx == -1) 4104 return true; 4105 4106 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4107 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4108 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4109 4110 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4111 4112 unsigned NumExprs = 0; 4113 unsigned NumLiterals = 0; 4114 uint32_t LiteralValue; 4115 4116 for (int OpIdx : OpIndices) { 4117 if (OpIdx == -1) 4118 continue; 4119 4120 const MCOperand &MO = Inst.getOperand(OpIdx); 4121 if (!MO.isImm() && !MO.isExpr()) 4122 continue; 4123 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4124 continue; 4125 4126 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4127 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4128 Error(getConstLoc(Operands), 4129 "inline constants are not allowed for this operand"); 4130 return false; 4131 } 4132 4133 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4134 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4135 if (NumLiterals == 0 || LiteralValue != Value) { 4136 LiteralValue = Value; 4137 ++NumLiterals; 4138 } 4139 } else if (MO.isExpr()) { 4140 ++NumExprs; 4141 } 4142 } 4143 NumLiterals += NumExprs; 4144 4145 if (!NumLiterals) 4146 return true; 4147 4148 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4149 Error(getLitLoc(Operands), "literal operands are not supported"); 4150 return false; 4151 } 4152 4153 if (NumLiterals > 1) { 4154 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4155 return false; 4156 } 4157 4158 return true; 4159 } 4160 4161 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4162 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4163 const MCRegisterInfo *MRI) { 4164 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4165 if (OpIdx < 0) 4166 return -1; 4167 4168 const MCOperand &Op = Inst.getOperand(OpIdx); 4169 if (!Op.isReg()) 4170 return -1; 4171 4172 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4173 auto Reg = Sub ? Sub : Op.getReg(); 4174 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4175 return AGPR32.contains(Reg) ? 1 : 0; 4176 } 4177 4178 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4179 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4180 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4181 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4182 SIInstrFlags::DS)) == 0) 4183 return true; 4184 4185 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4186 : AMDGPU::OpName::vdata; 4187 4188 const MCRegisterInfo *MRI = getMRI(); 4189 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4190 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4191 4192 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4193 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4194 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4195 return false; 4196 } 4197 4198 auto FB = getFeatureBits(); 4199 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4200 if (DataAreg < 0 || DstAreg < 0) 4201 return true; 4202 return DstAreg == DataAreg; 4203 } 4204 4205 return DstAreg < 1 && DataAreg < 1; 4206 } 4207 4208 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4209 auto FB = getFeatureBits(); 4210 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4211 return true; 4212 4213 const MCRegisterInfo *MRI = getMRI(); 4214 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4215 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4216 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4217 const MCOperand &Op = Inst.getOperand(I); 4218 if (!Op.isReg()) 4219 continue; 4220 4221 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4222 if (!Sub) 4223 continue; 4224 4225 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4226 return false; 4227 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4228 return false; 4229 } 4230 4231 return true; 4232 } 4233 4234 // gfx90a has an undocumented limitation: 4235 // DS_GWS opcodes must use even aligned registers. 4236 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4237 const OperandVector &Operands) { 4238 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4239 return true; 4240 4241 int Opc = Inst.getOpcode(); 4242 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4243 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4244 return true; 4245 4246 const MCRegisterInfo *MRI = getMRI(); 4247 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4248 int Data0Pos = 4249 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4250 assert(Data0Pos != -1); 4251 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4252 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4253 if (RegIdx & 1) { 4254 SMLoc RegLoc = getRegLoc(Reg, Operands); 4255 Error(RegLoc, "vgpr must be even aligned"); 4256 return false; 4257 } 4258 4259 return true; 4260 } 4261 4262 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4263 const OperandVector &Operands, 4264 const SMLoc &IDLoc) { 4265 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4266 AMDGPU::OpName::cpol); 4267 if (CPolPos == -1) 4268 return true; 4269 4270 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4271 4272 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4273 if ((TSFlags & (SIInstrFlags::SMRD)) && 4274 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4275 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4276 return false; 4277 } 4278 4279 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4280 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4281 StringRef CStr(S.getPointer()); 4282 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4283 Error(S, "scc is not supported on this GPU"); 4284 return false; 4285 } 4286 4287 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4288 return true; 4289 4290 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4291 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4292 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4293 : "instruction must use glc"); 4294 return false; 4295 } 4296 } else { 4297 if (CPol & CPol::GLC) { 4298 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4299 StringRef CStr(S.getPointer()); 4300 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4301 Error(S, isGFX940() ? "instruction must not use sc0" 4302 : "instruction must not use glc"); 4303 return false; 4304 } 4305 } 4306 4307 return true; 4308 } 4309 4310 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4311 const SMLoc &IDLoc, 4312 const OperandVector &Operands) { 4313 if (auto ErrMsg = validateLdsDirect(Inst)) { 4314 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4315 return false; 4316 } 4317 if (!validateSOPLiteral(Inst)) { 4318 Error(getLitLoc(Operands), 4319 "only one literal operand is allowed"); 4320 return false; 4321 } 4322 if (!validateVOPLiteral(Inst, Operands)) { 4323 return false; 4324 } 4325 if (!validateConstantBusLimitations(Inst, Operands)) { 4326 return false; 4327 } 4328 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4329 return false; 4330 } 4331 if (!validateIntClampSupported(Inst)) { 4332 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4333 "integer clamping is not supported on this GPU"); 4334 return false; 4335 } 4336 if (!validateOpSel(Inst)) { 4337 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4338 "invalid op_sel operand"); 4339 return false; 4340 } 4341 if (!validateDPP(Inst, Operands)) { 4342 return false; 4343 } 4344 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4345 if (!validateMIMGD16(Inst)) { 4346 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4347 "d16 modifier is not supported on this GPU"); 4348 return false; 4349 } 4350 if (!validateMIMGDim(Inst)) { 4351 Error(IDLoc, "dim modifier is required on this GPU"); 4352 return false; 4353 } 4354 if (!validateMIMGMSAA(Inst)) { 4355 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4356 "invalid dim; must be MSAA type"); 4357 return false; 4358 } 4359 if (!validateMIMGDataSize(Inst)) { 4360 Error(IDLoc, 4361 "image data size does not match dmask and tfe"); 4362 return false; 4363 } 4364 if (!validateMIMGAddrSize(Inst)) { 4365 Error(IDLoc, 4366 "image address size does not match dim and a16"); 4367 return false; 4368 } 4369 if (!validateMIMGAtomicDMask(Inst)) { 4370 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4371 "invalid atomic image dmask"); 4372 return false; 4373 } 4374 if (!validateMIMGGatherDMask(Inst)) { 4375 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4376 "invalid image_gather dmask: only one bit must be set"); 4377 return false; 4378 } 4379 if (!validateMovrels(Inst, Operands)) { 4380 return false; 4381 } 4382 if (!validateFlatOffset(Inst, Operands)) { 4383 return false; 4384 } 4385 if (!validateSMEMOffset(Inst, Operands)) { 4386 return false; 4387 } 4388 if (!validateMAIAccWrite(Inst, Operands)) { 4389 return false; 4390 } 4391 if (!validateMFMA(Inst, Operands)) { 4392 return false; 4393 } 4394 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4395 return false; 4396 } 4397 4398 if (!validateAGPRLdSt(Inst)) { 4399 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4400 ? "invalid register class: data and dst should be all VGPR or AGPR" 4401 : "invalid register class: agpr loads and stores not supported on this GPU" 4402 ); 4403 return false; 4404 } 4405 if (!validateVGPRAlign(Inst)) { 4406 Error(IDLoc, 4407 "invalid register class: vgpr tuples must be 64 bit aligned"); 4408 return false; 4409 } 4410 if (!validateGWS(Inst, Operands)) { 4411 return false; 4412 } 4413 4414 if (!validateDivScale(Inst)) { 4415 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4416 return false; 4417 } 4418 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4419 return false; 4420 } 4421 4422 return true; 4423 } 4424 4425 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4426 const FeatureBitset &FBS, 4427 unsigned VariantID = 0); 4428 4429 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4430 const FeatureBitset &AvailableFeatures, 4431 unsigned VariantID); 4432 4433 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4434 const FeatureBitset &FBS) { 4435 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4436 } 4437 4438 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4439 const FeatureBitset &FBS, 4440 ArrayRef<unsigned> Variants) { 4441 for (auto Variant : Variants) { 4442 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4443 return true; 4444 } 4445 4446 return false; 4447 } 4448 4449 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4450 const SMLoc &IDLoc) { 4451 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4452 4453 // Check if requested instruction variant is supported. 4454 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4455 return false; 4456 4457 // This instruction is not supported. 4458 // Clear any other pending errors because they are no longer relevant. 4459 getParser().clearPendingErrors(); 4460 4461 // Requested instruction variant is not supported. 4462 // Check if any other variants are supported. 4463 StringRef VariantName = getMatchedVariantName(); 4464 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4465 return Error(IDLoc, 4466 Twine(VariantName, 4467 " variant of this instruction is not supported")); 4468 } 4469 4470 // Finally check if this instruction is supported on any other GPU. 4471 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4472 return Error(IDLoc, "instruction not supported on this GPU"); 4473 } 4474 4475 // Instruction not supported on any GPU. Probably a typo. 4476 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4477 return Error(IDLoc, "invalid instruction" + Suggestion); 4478 } 4479 4480 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4481 OperandVector &Operands, 4482 MCStreamer &Out, 4483 uint64_t &ErrorInfo, 4484 bool MatchingInlineAsm) { 4485 MCInst Inst; 4486 unsigned Result = Match_Success; 4487 for (auto Variant : getMatchedVariants()) { 4488 uint64_t EI; 4489 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4490 Variant); 4491 // We order match statuses from least to most specific. We use most specific 4492 // status as resulting 4493 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4494 if ((R == Match_Success) || 4495 (R == Match_PreferE32) || 4496 (R == Match_MissingFeature && Result != Match_PreferE32) || 4497 (R == Match_InvalidOperand && Result != Match_MissingFeature 4498 && Result != Match_PreferE32) || 4499 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4500 && Result != Match_MissingFeature 4501 && Result != Match_PreferE32)) { 4502 Result = R; 4503 ErrorInfo = EI; 4504 } 4505 if (R == Match_Success) 4506 break; 4507 } 4508 4509 if (Result == Match_Success) { 4510 if (!validateInstruction(Inst, IDLoc, Operands)) { 4511 return true; 4512 } 4513 Inst.setLoc(IDLoc); 4514 Out.emitInstruction(Inst, getSTI()); 4515 return false; 4516 } 4517 4518 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4519 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4520 return true; 4521 } 4522 4523 switch (Result) { 4524 default: break; 4525 case Match_MissingFeature: 4526 // It has been verified that the specified instruction 4527 // mnemonic is valid. A match was found but it requires 4528 // features which are not supported on this GPU. 4529 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4530 4531 case Match_InvalidOperand: { 4532 SMLoc ErrorLoc = IDLoc; 4533 if (ErrorInfo != ~0ULL) { 4534 if (ErrorInfo >= Operands.size()) { 4535 return Error(IDLoc, "too few operands for instruction"); 4536 } 4537 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4538 if (ErrorLoc == SMLoc()) 4539 ErrorLoc = IDLoc; 4540 } 4541 return Error(ErrorLoc, "invalid operand for instruction"); 4542 } 4543 4544 case Match_PreferE32: 4545 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4546 "should be encoded as e32"); 4547 case Match_MnemonicFail: 4548 llvm_unreachable("Invalid instructions should have been handled already"); 4549 } 4550 llvm_unreachable("Implement any new match types added!"); 4551 } 4552 4553 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4554 int64_t Tmp = -1; 4555 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4556 return true; 4557 } 4558 if (getParser().parseAbsoluteExpression(Tmp)) { 4559 return true; 4560 } 4561 Ret = static_cast<uint32_t>(Tmp); 4562 return false; 4563 } 4564 4565 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4566 uint32_t &Minor) { 4567 if (ParseAsAbsoluteExpression(Major)) 4568 return TokError("invalid major version"); 4569 4570 if (!trySkipToken(AsmToken::Comma)) 4571 return TokError("minor version number required, comma expected"); 4572 4573 if (ParseAsAbsoluteExpression(Minor)) 4574 return TokError("invalid minor version"); 4575 4576 return false; 4577 } 4578 4579 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4580 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4581 return TokError("directive only supported for amdgcn architecture"); 4582 4583 std::string TargetIDDirective; 4584 SMLoc TargetStart = getTok().getLoc(); 4585 if (getParser().parseEscapedString(TargetIDDirective)) 4586 return true; 4587 4588 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4589 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4590 return getParser().Error(TargetRange.Start, 4591 (Twine(".amdgcn_target directive's target id ") + 4592 Twine(TargetIDDirective) + 4593 Twine(" does not match the specified target id ") + 4594 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4595 4596 return false; 4597 } 4598 4599 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4600 return Error(Range.Start, "value out of range", Range); 4601 } 4602 4603 bool AMDGPUAsmParser::calculateGPRBlocks( 4604 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4605 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4606 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4607 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4608 // TODO(scott.linder): These calculations are duplicated from 4609 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4610 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4611 4612 unsigned NumVGPRs = NextFreeVGPR; 4613 unsigned NumSGPRs = NextFreeSGPR; 4614 4615 if (Version.Major >= 10) 4616 NumSGPRs = 0; 4617 else { 4618 unsigned MaxAddressableNumSGPRs = 4619 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4620 4621 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4622 NumSGPRs > MaxAddressableNumSGPRs) 4623 return OutOfRangeError(SGPRRange); 4624 4625 NumSGPRs += 4626 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4627 4628 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4629 NumSGPRs > MaxAddressableNumSGPRs) 4630 return OutOfRangeError(SGPRRange); 4631 4632 if (Features.test(FeatureSGPRInitBug)) 4633 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4634 } 4635 4636 VGPRBlocks = 4637 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4638 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4639 4640 return false; 4641 } 4642 4643 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4644 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4645 return TokError("directive only supported for amdgcn architecture"); 4646 4647 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4648 return TokError("directive only supported for amdhsa OS"); 4649 4650 StringRef KernelName; 4651 if (getParser().parseIdentifier(KernelName)) 4652 return true; 4653 4654 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4655 4656 StringSet<> Seen; 4657 4658 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4659 4660 SMRange VGPRRange; 4661 uint64_t NextFreeVGPR = 0; 4662 uint64_t AccumOffset = 0; 4663 uint64_t SharedVGPRCount = 0; 4664 SMRange SGPRRange; 4665 uint64_t NextFreeSGPR = 0; 4666 4667 // Count the number of user SGPRs implied from the enabled feature bits. 4668 unsigned ImpliedUserSGPRCount = 0; 4669 4670 // Track if the asm explicitly contains the directive for the user SGPR 4671 // count. 4672 Optional<unsigned> ExplicitUserSGPRCount; 4673 bool ReserveVCC = true; 4674 bool ReserveFlatScr = true; 4675 Optional<bool> EnableWavefrontSize32; 4676 4677 while (true) { 4678 while (trySkipToken(AsmToken::EndOfStatement)); 4679 4680 StringRef ID; 4681 SMRange IDRange = getTok().getLocRange(); 4682 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4683 return true; 4684 4685 if (ID == ".end_amdhsa_kernel") 4686 break; 4687 4688 if (Seen.find(ID) != Seen.end()) 4689 return TokError(".amdhsa_ directives cannot be repeated"); 4690 Seen.insert(ID); 4691 4692 SMLoc ValStart = getLoc(); 4693 int64_t IVal; 4694 if (getParser().parseAbsoluteExpression(IVal)) 4695 return true; 4696 SMLoc ValEnd = getLoc(); 4697 SMRange ValRange = SMRange(ValStart, ValEnd); 4698 4699 if (IVal < 0) 4700 return OutOfRangeError(ValRange); 4701 4702 uint64_t Val = IVal; 4703 4704 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4705 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4706 return OutOfRangeError(RANGE); \ 4707 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4708 4709 if (ID == ".amdhsa_group_segment_fixed_size") { 4710 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4711 return OutOfRangeError(ValRange); 4712 KD.group_segment_fixed_size = Val; 4713 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4714 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4715 return OutOfRangeError(ValRange); 4716 KD.private_segment_fixed_size = Val; 4717 } else if (ID == ".amdhsa_kernarg_size") { 4718 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4719 return OutOfRangeError(ValRange); 4720 KD.kernarg_size = Val; 4721 } else if (ID == ".amdhsa_user_sgpr_count") { 4722 ExplicitUserSGPRCount = Val; 4723 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4724 if (hasArchitectedFlatScratch()) 4725 return Error(IDRange.Start, 4726 "directive is not supported with architected flat scratch", 4727 IDRange); 4728 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4729 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4730 Val, ValRange); 4731 if (Val) 4732 ImpliedUserSGPRCount += 4; 4733 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4734 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4735 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4736 ValRange); 4737 if (Val) 4738 ImpliedUserSGPRCount += 2; 4739 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4740 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4741 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4742 ValRange); 4743 if (Val) 4744 ImpliedUserSGPRCount += 2; 4745 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4746 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4747 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4748 Val, ValRange); 4749 if (Val) 4750 ImpliedUserSGPRCount += 2; 4751 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4752 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4753 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4754 ValRange); 4755 if (Val) 4756 ImpliedUserSGPRCount += 2; 4757 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4758 if (hasArchitectedFlatScratch()) 4759 return Error(IDRange.Start, 4760 "directive is not supported with architected flat scratch", 4761 IDRange); 4762 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4763 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4764 ValRange); 4765 if (Val) 4766 ImpliedUserSGPRCount += 2; 4767 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4768 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4769 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4770 Val, ValRange); 4771 if (Val) 4772 ImpliedUserSGPRCount += 1; 4773 } else if (ID == ".amdhsa_wavefront_size32") { 4774 if (IVersion.Major < 10) 4775 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4776 EnableWavefrontSize32 = Val; 4777 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4778 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4779 Val, ValRange); 4780 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4781 if (hasArchitectedFlatScratch()) 4782 return Error(IDRange.Start, 4783 "directive is not supported with architected flat scratch", 4784 IDRange); 4785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4786 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4787 } else if (ID == ".amdhsa_enable_private_segment") { 4788 if (!hasArchitectedFlatScratch()) 4789 return Error( 4790 IDRange.Start, 4791 "directive is not supported without architected flat scratch", 4792 IDRange); 4793 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4794 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4795 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4796 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4797 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4798 ValRange); 4799 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4800 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4801 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4802 ValRange); 4803 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4804 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4805 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4806 ValRange); 4807 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4808 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4809 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4810 ValRange); 4811 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4812 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4813 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4814 ValRange); 4815 } else if (ID == ".amdhsa_next_free_vgpr") { 4816 VGPRRange = ValRange; 4817 NextFreeVGPR = Val; 4818 } else if (ID == ".amdhsa_next_free_sgpr") { 4819 SGPRRange = ValRange; 4820 NextFreeSGPR = Val; 4821 } else if (ID == ".amdhsa_accum_offset") { 4822 if (!isGFX90A()) 4823 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4824 AccumOffset = Val; 4825 } else if (ID == ".amdhsa_reserve_vcc") { 4826 if (!isUInt<1>(Val)) 4827 return OutOfRangeError(ValRange); 4828 ReserveVCC = Val; 4829 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4830 if (IVersion.Major < 7) 4831 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4832 if (hasArchitectedFlatScratch()) 4833 return Error(IDRange.Start, 4834 "directive is not supported with architected flat scratch", 4835 IDRange); 4836 if (!isUInt<1>(Val)) 4837 return OutOfRangeError(ValRange); 4838 ReserveFlatScr = Val; 4839 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4840 if (IVersion.Major < 8) 4841 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4842 if (!isUInt<1>(Val)) 4843 return OutOfRangeError(ValRange); 4844 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4845 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4846 IDRange); 4847 } else if (ID == ".amdhsa_float_round_mode_32") { 4848 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4849 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4850 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4852 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4853 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4854 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4855 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4856 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4857 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4858 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4859 ValRange); 4860 } else if (ID == ".amdhsa_dx10_clamp") { 4861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4862 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4863 } else if (ID == ".amdhsa_ieee_mode") { 4864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4865 Val, ValRange); 4866 } else if (ID == ".amdhsa_fp16_overflow") { 4867 if (IVersion.Major < 9) 4868 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4869 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4870 ValRange); 4871 } else if (ID == ".amdhsa_tg_split") { 4872 if (!isGFX90A()) 4873 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4874 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4875 ValRange); 4876 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4877 if (IVersion.Major < 10) 4878 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4880 ValRange); 4881 } else if (ID == ".amdhsa_memory_ordered") { 4882 if (IVersion.Major < 10) 4883 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4884 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4885 ValRange); 4886 } else if (ID == ".amdhsa_forward_progress") { 4887 if (IVersion.Major < 10) 4888 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4889 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4890 ValRange); 4891 } else if (ID == ".amdhsa_shared_vgpr_count") { 4892 if (IVersion.Major < 10) 4893 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4894 SharedVGPRCount = Val; 4895 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4896 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4897 ValRange); 4898 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4899 PARSE_BITS_ENTRY( 4900 KD.compute_pgm_rsrc2, 4901 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4902 ValRange); 4903 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4904 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4905 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4906 Val, ValRange); 4907 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4908 PARSE_BITS_ENTRY( 4909 KD.compute_pgm_rsrc2, 4910 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4911 ValRange); 4912 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4913 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4914 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4915 Val, ValRange); 4916 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4917 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4918 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4919 Val, ValRange); 4920 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4921 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4922 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4923 Val, ValRange); 4924 } else if (ID == ".amdhsa_exception_int_div_zero") { 4925 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4926 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4927 Val, ValRange); 4928 } else { 4929 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4930 } 4931 4932 #undef PARSE_BITS_ENTRY 4933 } 4934 4935 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4936 return TokError(".amdhsa_next_free_vgpr directive is required"); 4937 4938 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4939 return TokError(".amdhsa_next_free_sgpr directive is required"); 4940 4941 unsigned VGPRBlocks; 4942 unsigned SGPRBlocks; 4943 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4944 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4945 EnableWavefrontSize32, NextFreeVGPR, 4946 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4947 SGPRBlocks)) 4948 return true; 4949 4950 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4951 VGPRBlocks)) 4952 return OutOfRangeError(VGPRRange); 4953 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4954 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4955 4956 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4957 SGPRBlocks)) 4958 return OutOfRangeError(SGPRRange); 4959 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4960 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4961 SGPRBlocks); 4962 4963 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4964 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4965 "enabled user SGPRs"); 4966 4967 unsigned UserSGPRCount = 4968 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4969 4970 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4971 return TokError("too many user SGPRs enabled"); 4972 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4973 UserSGPRCount); 4974 4975 if (isGFX90A()) { 4976 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4977 return TokError(".amdhsa_accum_offset directive is required"); 4978 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4979 return TokError("accum_offset should be in range [4..256] in " 4980 "increments of 4"); 4981 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4982 return TokError("accum_offset exceeds total VGPR allocation"); 4983 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4984 (AccumOffset / 4 - 1)); 4985 } 4986 4987 if (IVersion.Major == 10) { 4988 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 4989 if (SharedVGPRCount && EnableWavefrontSize32) { 4990 return TokError("shared_vgpr_count directive not valid on " 4991 "wavefront size 32"); 4992 } 4993 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 4994 return TokError("shared_vgpr_count*2 + " 4995 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 4996 "exceed 63\n"); 4997 } 4998 } 4999 5000 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5001 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5002 ReserveFlatScr); 5003 return false; 5004 } 5005 5006 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5007 uint32_t Major; 5008 uint32_t Minor; 5009 5010 if (ParseDirectiveMajorMinor(Major, Minor)) 5011 return true; 5012 5013 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5014 return false; 5015 } 5016 5017 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5018 uint32_t Major; 5019 uint32_t Minor; 5020 uint32_t Stepping; 5021 StringRef VendorName; 5022 StringRef ArchName; 5023 5024 // If this directive has no arguments, then use the ISA version for the 5025 // targeted GPU. 5026 if (isToken(AsmToken::EndOfStatement)) { 5027 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5028 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5029 ISA.Stepping, 5030 "AMD", "AMDGPU"); 5031 return false; 5032 } 5033 5034 if (ParseDirectiveMajorMinor(Major, Minor)) 5035 return true; 5036 5037 if (!trySkipToken(AsmToken::Comma)) 5038 return TokError("stepping version number required, comma expected"); 5039 5040 if (ParseAsAbsoluteExpression(Stepping)) 5041 return TokError("invalid stepping version"); 5042 5043 if (!trySkipToken(AsmToken::Comma)) 5044 return TokError("vendor name required, comma expected"); 5045 5046 if (!parseString(VendorName, "invalid vendor name")) 5047 return true; 5048 5049 if (!trySkipToken(AsmToken::Comma)) 5050 return TokError("arch name required, comma expected"); 5051 5052 if (!parseString(ArchName, "invalid arch name")) 5053 return true; 5054 5055 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5056 VendorName, ArchName); 5057 return false; 5058 } 5059 5060 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5061 amd_kernel_code_t &Header) { 5062 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5063 // assembly for backwards compatibility. 5064 if (ID == "max_scratch_backing_memory_byte_size") { 5065 Parser.eatToEndOfStatement(); 5066 return false; 5067 } 5068 5069 SmallString<40> ErrStr; 5070 raw_svector_ostream Err(ErrStr); 5071 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5072 return TokError(Err.str()); 5073 } 5074 Lex(); 5075 5076 if (ID == "enable_wavefront_size32") { 5077 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5078 if (!isGFX10Plus()) 5079 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5080 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5081 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5082 } else { 5083 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5084 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5085 } 5086 } 5087 5088 if (ID == "wavefront_size") { 5089 if (Header.wavefront_size == 5) { 5090 if (!isGFX10Plus()) 5091 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5092 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5093 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5094 } else if (Header.wavefront_size == 6) { 5095 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5096 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5097 } 5098 } 5099 5100 if (ID == "enable_wgp_mode") { 5101 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5102 !isGFX10Plus()) 5103 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5104 } 5105 5106 if (ID == "enable_mem_ordered") { 5107 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5108 !isGFX10Plus()) 5109 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5110 } 5111 5112 if (ID == "enable_fwd_progress") { 5113 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5114 !isGFX10Plus()) 5115 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5116 } 5117 5118 return false; 5119 } 5120 5121 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5122 amd_kernel_code_t Header; 5123 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5124 5125 while (true) { 5126 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5127 // will set the current token to EndOfStatement. 5128 while(trySkipToken(AsmToken::EndOfStatement)); 5129 5130 StringRef ID; 5131 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5132 return true; 5133 5134 if (ID == ".end_amd_kernel_code_t") 5135 break; 5136 5137 if (ParseAMDKernelCodeTValue(ID, Header)) 5138 return true; 5139 } 5140 5141 getTargetStreamer().EmitAMDKernelCodeT(Header); 5142 5143 return false; 5144 } 5145 5146 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5147 StringRef KernelName; 5148 if (!parseId(KernelName, "expected symbol name")) 5149 return true; 5150 5151 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5152 ELF::STT_AMDGPU_HSA_KERNEL); 5153 5154 KernelScope.initialize(getContext()); 5155 return false; 5156 } 5157 5158 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5159 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5160 return Error(getLoc(), 5161 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5162 "architectures"); 5163 } 5164 5165 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5166 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5167 return Error(getParser().getTok().getLoc(), "target id must match options"); 5168 5169 getTargetStreamer().EmitISAVersion(); 5170 Lex(); 5171 5172 return false; 5173 } 5174 5175 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5176 const char *AssemblerDirectiveBegin; 5177 const char *AssemblerDirectiveEnd; 5178 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5179 isHsaAbiVersion3AndAbove(&getSTI()) 5180 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5181 HSAMD::V3::AssemblerDirectiveEnd) 5182 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5183 HSAMD::AssemblerDirectiveEnd); 5184 5185 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5186 return Error(getLoc(), 5187 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5188 "not available on non-amdhsa OSes")).str()); 5189 } 5190 5191 std::string HSAMetadataString; 5192 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5193 HSAMetadataString)) 5194 return true; 5195 5196 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5197 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5198 return Error(getLoc(), "invalid HSA metadata"); 5199 } else { 5200 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5201 return Error(getLoc(), "invalid HSA metadata"); 5202 } 5203 5204 return false; 5205 } 5206 5207 /// Common code to parse out a block of text (typically YAML) between start and 5208 /// end directives. 5209 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5210 const char *AssemblerDirectiveEnd, 5211 std::string &CollectString) { 5212 5213 raw_string_ostream CollectStream(CollectString); 5214 5215 getLexer().setSkipSpace(false); 5216 5217 bool FoundEnd = false; 5218 while (!isToken(AsmToken::Eof)) { 5219 while (isToken(AsmToken::Space)) { 5220 CollectStream << getTokenStr(); 5221 Lex(); 5222 } 5223 5224 if (trySkipId(AssemblerDirectiveEnd)) { 5225 FoundEnd = true; 5226 break; 5227 } 5228 5229 CollectStream << Parser.parseStringToEndOfStatement() 5230 << getContext().getAsmInfo()->getSeparatorString(); 5231 5232 Parser.eatToEndOfStatement(); 5233 } 5234 5235 getLexer().setSkipSpace(true); 5236 5237 if (isToken(AsmToken::Eof) && !FoundEnd) { 5238 return TokError(Twine("expected directive ") + 5239 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5240 } 5241 5242 CollectStream.flush(); 5243 return false; 5244 } 5245 5246 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5247 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5248 std::string String; 5249 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5250 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5251 return true; 5252 5253 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5254 if (!PALMetadata->setFromString(String)) 5255 return Error(getLoc(), "invalid PAL metadata"); 5256 return false; 5257 } 5258 5259 /// Parse the assembler directive for old linear-format PAL metadata. 5260 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5261 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5262 return Error(getLoc(), 5263 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5264 "not available on non-amdpal OSes")).str()); 5265 } 5266 5267 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5268 PALMetadata->setLegacy(); 5269 for (;;) { 5270 uint32_t Key, Value; 5271 if (ParseAsAbsoluteExpression(Key)) { 5272 return TokError(Twine("invalid value in ") + 5273 Twine(PALMD::AssemblerDirective)); 5274 } 5275 if (!trySkipToken(AsmToken::Comma)) { 5276 return TokError(Twine("expected an even number of values in ") + 5277 Twine(PALMD::AssemblerDirective)); 5278 } 5279 if (ParseAsAbsoluteExpression(Value)) { 5280 return TokError(Twine("invalid value in ") + 5281 Twine(PALMD::AssemblerDirective)); 5282 } 5283 PALMetadata->setRegister(Key, Value); 5284 if (!trySkipToken(AsmToken::Comma)) 5285 break; 5286 } 5287 return false; 5288 } 5289 5290 /// ParseDirectiveAMDGPULDS 5291 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5292 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5293 if (getParser().checkForValidSection()) 5294 return true; 5295 5296 StringRef Name; 5297 SMLoc NameLoc = getLoc(); 5298 if (getParser().parseIdentifier(Name)) 5299 return TokError("expected identifier in directive"); 5300 5301 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5302 if (parseToken(AsmToken::Comma, "expected ','")) 5303 return true; 5304 5305 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5306 5307 int64_t Size; 5308 SMLoc SizeLoc = getLoc(); 5309 if (getParser().parseAbsoluteExpression(Size)) 5310 return true; 5311 if (Size < 0) 5312 return Error(SizeLoc, "size must be non-negative"); 5313 if (Size > LocalMemorySize) 5314 return Error(SizeLoc, "size is too large"); 5315 5316 int64_t Alignment = 4; 5317 if (trySkipToken(AsmToken::Comma)) { 5318 SMLoc AlignLoc = getLoc(); 5319 if (getParser().parseAbsoluteExpression(Alignment)) 5320 return true; 5321 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5322 return Error(AlignLoc, "alignment must be a power of two"); 5323 5324 // Alignment larger than the size of LDS is possible in theory, as long 5325 // as the linker manages to place to symbol at address 0, but we do want 5326 // to make sure the alignment fits nicely into a 32-bit integer. 5327 if (Alignment >= 1u << 31) 5328 return Error(AlignLoc, "alignment is too large"); 5329 } 5330 5331 if (parseToken(AsmToken::EndOfStatement, 5332 "unexpected token in '.amdgpu_lds' directive")) 5333 return true; 5334 5335 Symbol->redefineIfPossible(); 5336 if (!Symbol->isUndefined()) 5337 return Error(NameLoc, "invalid symbol redefinition"); 5338 5339 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5340 return false; 5341 } 5342 5343 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5344 StringRef IDVal = DirectiveID.getString(); 5345 5346 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5347 if (IDVal == ".amdhsa_kernel") 5348 return ParseDirectiveAMDHSAKernel(); 5349 5350 // TODO: Restructure/combine with PAL metadata directive. 5351 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5352 return ParseDirectiveHSAMetadata(); 5353 } else { 5354 if (IDVal == ".hsa_code_object_version") 5355 return ParseDirectiveHSACodeObjectVersion(); 5356 5357 if (IDVal == ".hsa_code_object_isa") 5358 return ParseDirectiveHSACodeObjectISA(); 5359 5360 if (IDVal == ".amd_kernel_code_t") 5361 return ParseDirectiveAMDKernelCodeT(); 5362 5363 if (IDVal == ".amdgpu_hsa_kernel") 5364 return ParseDirectiveAMDGPUHsaKernel(); 5365 5366 if (IDVal == ".amd_amdgpu_isa") 5367 return ParseDirectiveISAVersion(); 5368 5369 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5370 return ParseDirectiveHSAMetadata(); 5371 } 5372 5373 if (IDVal == ".amdgcn_target") 5374 return ParseDirectiveAMDGCNTarget(); 5375 5376 if (IDVal == ".amdgpu_lds") 5377 return ParseDirectiveAMDGPULDS(); 5378 5379 if (IDVal == PALMD::AssemblerDirectiveBegin) 5380 return ParseDirectivePALMetadataBegin(); 5381 5382 if (IDVal == PALMD::AssemblerDirective) 5383 return ParseDirectivePALMetadata(); 5384 5385 return true; 5386 } 5387 5388 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5389 unsigned RegNo) { 5390 5391 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5392 return isGFX9Plus(); 5393 5394 // GFX10 has 2 more SGPRs 104 and 105. 5395 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5396 return hasSGPR104_SGPR105(); 5397 5398 switch (RegNo) { 5399 case AMDGPU::SRC_SHARED_BASE: 5400 case AMDGPU::SRC_SHARED_LIMIT: 5401 case AMDGPU::SRC_PRIVATE_BASE: 5402 case AMDGPU::SRC_PRIVATE_LIMIT: 5403 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5404 return isGFX9Plus(); 5405 case AMDGPU::TBA: 5406 case AMDGPU::TBA_LO: 5407 case AMDGPU::TBA_HI: 5408 case AMDGPU::TMA: 5409 case AMDGPU::TMA_LO: 5410 case AMDGPU::TMA_HI: 5411 return !isGFX9Plus(); 5412 case AMDGPU::XNACK_MASK: 5413 case AMDGPU::XNACK_MASK_LO: 5414 case AMDGPU::XNACK_MASK_HI: 5415 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5416 case AMDGPU::SGPR_NULL: 5417 return isGFX10Plus(); 5418 default: 5419 break; 5420 } 5421 5422 if (isCI()) 5423 return true; 5424 5425 if (isSI() || isGFX10Plus()) { 5426 // No flat_scr on SI. 5427 // On GFX10 flat scratch is not a valid register operand and can only be 5428 // accessed with s_setreg/s_getreg. 5429 switch (RegNo) { 5430 case AMDGPU::FLAT_SCR: 5431 case AMDGPU::FLAT_SCR_LO: 5432 case AMDGPU::FLAT_SCR_HI: 5433 return false; 5434 default: 5435 return true; 5436 } 5437 } 5438 5439 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5440 // SI/CI have. 5441 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5442 return hasSGPR102_SGPR103(); 5443 5444 return true; 5445 } 5446 5447 OperandMatchResultTy 5448 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5449 OperandMode Mode) { 5450 // Try to parse with a custom parser 5451 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5452 5453 // If we successfully parsed the operand or if there as an error parsing, 5454 // we are done. 5455 // 5456 // If we are parsing after we reach EndOfStatement then this means we 5457 // are appending default values to the Operands list. This is only done 5458 // by custom parser, so we shouldn't continue on to the generic parsing. 5459 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5460 isToken(AsmToken::EndOfStatement)) 5461 return ResTy; 5462 5463 SMLoc RBraceLoc; 5464 SMLoc LBraceLoc = getLoc(); 5465 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5466 unsigned Prefix = Operands.size(); 5467 5468 for (;;) { 5469 auto Loc = getLoc(); 5470 ResTy = parseReg(Operands); 5471 if (ResTy == MatchOperand_NoMatch) 5472 Error(Loc, "expected a register"); 5473 if (ResTy != MatchOperand_Success) 5474 return MatchOperand_ParseFail; 5475 5476 RBraceLoc = getLoc(); 5477 if (trySkipToken(AsmToken::RBrac)) 5478 break; 5479 5480 if (!skipToken(AsmToken::Comma, 5481 "expected a comma or a closing square bracket")) { 5482 return MatchOperand_ParseFail; 5483 } 5484 } 5485 5486 if (Operands.size() - Prefix > 1) { 5487 Operands.insert(Operands.begin() + Prefix, 5488 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5489 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5490 } 5491 5492 return MatchOperand_Success; 5493 } 5494 5495 return parseRegOrImm(Operands); 5496 } 5497 5498 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5499 // Clear any forced encodings from the previous instruction. 5500 setForcedEncodingSize(0); 5501 setForcedDPP(false); 5502 setForcedSDWA(false); 5503 5504 if (Name.endswith("_e64")) { 5505 setForcedEncodingSize(64); 5506 return Name.substr(0, Name.size() - 4); 5507 } else if (Name.endswith("_e32")) { 5508 setForcedEncodingSize(32); 5509 return Name.substr(0, Name.size() - 4); 5510 } else if (Name.endswith("_dpp")) { 5511 setForcedDPP(true); 5512 return Name.substr(0, Name.size() - 4); 5513 } else if (Name.endswith("_sdwa")) { 5514 setForcedSDWA(true); 5515 return Name.substr(0, Name.size() - 5); 5516 } 5517 return Name; 5518 } 5519 5520 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5521 StringRef Name, 5522 SMLoc NameLoc, OperandVector &Operands) { 5523 // Add the instruction mnemonic 5524 Name = parseMnemonicSuffix(Name); 5525 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5526 5527 bool IsMIMG = Name.startswith("image_"); 5528 5529 while (!trySkipToken(AsmToken::EndOfStatement)) { 5530 OperandMode Mode = OperandMode_Default; 5531 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5532 Mode = OperandMode_NSA; 5533 CPolSeen = 0; 5534 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5535 5536 if (Res != MatchOperand_Success) { 5537 checkUnsupportedInstruction(Name, NameLoc); 5538 if (!Parser.hasPendingError()) { 5539 // FIXME: use real operand location rather than the current location. 5540 StringRef Msg = 5541 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5542 "not a valid operand."; 5543 Error(getLoc(), Msg); 5544 } 5545 while (!trySkipToken(AsmToken::EndOfStatement)) { 5546 lex(); 5547 } 5548 return true; 5549 } 5550 5551 // Eat the comma or space if there is one. 5552 trySkipToken(AsmToken::Comma); 5553 } 5554 5555 return false; 5556 } 5557 5558 //===----------------------------------------------------------------------===// 5559 // Utility functions 5560 //===----------------------------------------------------------------------===// 5561 5562 OperandMatchResultTy 5563 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5564 5565 if (!trySkipId(Prefix, AsmToken::Colon)) 5566 return MatchOperand_NoMatch; 5567 5568 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5569 } 5570 5571 OperandMatchResultTy 5572 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5573 AMDGPUOperand::ImmTy ImmTy, 5574 bool (*ConvertResult)(int64_t&)) { 5575 SMLoc S = getLoc(); 5576 int64_t Value = 0; 5577 5578 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5579 if (Res != MatchOperand_Success) 5580 return Res; 5581 5582 if (ConvertResult && !ConvertResult(Value)) { 5583 Error(S, "invalid " + StringRef(Prefix) + " value."); 5584 } 5585 5586 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5587 return MatchOperand_Success; 5588 } 5589 5590 OperandMatchResultTy 5591 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5592 OperandVector &Operands, 5593 AMDGPUOperand::ImmTy ImmTy, 5594 bool (*ConvertResult)(int64_t&)) { 5595 SMLoc S = getLoc(); 5596 if (!trySkipId(Prefix, AsmToken::Colon)) 5597 return MatchOperand_NoMatch; 5598 5599 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5600 return MatchOperand_ParseFail; 5601 5602 unsigned Val = 0; 5603 const unsigned MaxSize = 4; 5604 5605 // FIXME: How to verify the number of elements matches the number of src 5606 // operands? 5607 for (int I = 0; ; ++I) { 5608 int64_t Op; 5609 SMLoc Loc = getLoc(); 5610 if (!parseExpr(Op)) 5611 return MatchOperand_ParseFail; 5612 5613 if (Op != 0 && Op != 1) { 5614 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5615 return MatchOperand_ParseFail; 5616 } 5617 5618 Val |= (Op << I); 5619 5620 if (trySkipToken(AsmToken::RBrac)) 5621 break; 5622 5623 if (I + 1 == MaxSize) { 5624 Error(getLoc(), "expected a closing square bracket"); 5625 return MatchOperand_ParseFail; 5626 } 5627 5628 if (!skipToken(AsmToken::Comma, "expected a comma")) 5629 return MatchOperand_ParseFail; 5630 } 5631 5632 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5633 return MatchOperand_Success; 5634 } 5635 5636 OperandMatchResultTy 5637 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5638 AMDGPUOperand::ImmTy ImmTy) { 5639 int64_t Bit; 5640 SMLoc S = getLoc(); 5641 5642 if (trySkipId(Name)) { 5643 Bit = 1; 5644 } else if (trySkipId("no", Name)) { 5645 Bit = 0; 5646 } else { 5647 return MatchOperand_NoMatch; 5648 } 5649 5650 if (Name == "r128" && !hasMIMG_R128()) { 5651 Error(S, "r128 modifier is not supported on this GPU"); 5652 return MatchOperand_ParseFail; 5653 } 5654 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5655 Error(S, "a16 modifier is not supported on this GPU"); 5656 return MatchOperand_ParseFail; 5657 } 5658 5659 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5660 ImmTy = AMDGPUOperand::ImmTyR128A16; 5661 5662 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5663 return MatchOperand_Success; 5664 } 5665 5666 OperandMatchResultTy 5667 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5668 unsigned CPolOn = 0; 5669 unsigned CPolOff = 0; 5670 SMLoc S = getLoc(); 5671 5672 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5673 if (isGFX940() && !Mnemo.startswith("s_")) { 5674 if (trySkipId("sc0")) 5675 CPolOn = AMDGPU::CPol::SC0; 5676 else if (trySkipId("nosc0")) 5677 CPolOff = AMDGPU::CPol::SC0; 5678 else if (trySkipId("nt")) 5679 CPolOn = AMDGPU::CPol::NT; 5680 else if (trySkipId("nont")) 5681 CPolOff = AMDGPU::CPol::NT; 5682 else if (trySkipId("sc1")) 5683 CPolOn = AMDGPU::CPol::SC1; 5684 else if (trySkipId("nosc1")) 5685 CPolOff = AMDGPU::CPol::SC1; 5686 else 5687 return MatchOperand_NoMatch; 5688 } 5689 else if (trySkipId("glc")) 5690 CPolOn = AMDGPU::CPol::GLC; 5691 else if (trySkipId("noglc")) 5692 CPolOff = AMDGPU::CPol::GLC; 5693 else if (trySkipId("slc")) 5694 CPolOn = AMDGPU::CPol::SLC; 5695 else if (trySkipId("noslc")) 5696 CPolOff = AMDGPU::CPol::SLC; 5697 else if (trySkipId("dlc")) 5698 CPolOn = AMDGPU::CPol::DLC; 5699 else if (trySkipId("nodlc")) 5700 CPolOff = AMDGPU::CPol::DLC; 5701 else if (trySkipId("scc")) 5702 CPolOn = AMDGPU::CPol::SCC; 5703 else if (trySkipId("noscc")) 5704 CPolOff = AMDGPU::CPol::SCC; 5705 else 5706 return MatchOperand_NoMatch; 5707 5708 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5709 Error(S, "dlc modifier is not supported on this GPU"); 5710 return MatchOperand_ParseFail; 5711 } 5712 5713 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5714 Error(S, "scc modifier is not supported on this GPU"); 5715 return MatchOperand_ParseFail; 5716 } 5717 5718 if (CPolSeen & (CPolOn | CPolOff)) { 5719 Error(S, "duplicate cache policy modifier"); 5720 return MatchOperand_ParseFail; 5721 } 5722 5723 CPolSeen |= (CPolOn | CPolOff); 5724 5725 for (unsigned I = 1; I != Operands.size(); ++I) { 5726 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5727 if (Op.isCPol()) { 5728 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5729 return MatchOperand_Success; 5730 } 5731 } 5732 5733 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5734 AMDGPUOperand::ImmTyCPol)); 5735 5736 return MatchOperand_Success; 5737 } 5738 5739 static void addOptionalImmOperand( 5740 MCInst& Inst, const OperandVector& Operands, 5741 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5742 AMDGPUOperand::ImmTy ImmT, 5743 int64_t Default = 0) { 5744 auto i = OptionalIdx.find(ImmT); 5745 if (i != OptionalIdx.end()) { 5746 unsigned Idx = i->second; 5747 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5748 } else { 5749 Inst.addOperand(MCOperand::createImm(Default)); 5750 } 5751 } 5752 5753 OperandMatchResultTy 5754 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5755 StringRef &Value, 5756 SMLoc &StringLoc) { 5757 if (!trySkipId(Prefix, AsmToken::Colon)) 5758 return MatchOperand_NoMatch; 5759 5760 StringLoc = getLoc(); 5761 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5762 : MatchOperand_ParseFail; 5763 } 5764 5765 //===----------------------------------------------------------------------===// 5766 // MTBUF format 5767 //===----------------------------------------------------------------------===// 5768 5769 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5770 int64_t MaxVal, 5771 int64_t &Fmt) { 5772 int64_t Val; 5773 SMLoc Loc = getLoc(); 5774 5775 auto Res = parseIntWithPrefix(Pref, Val); 5776 if (Res == MatchOperand_ParseFail) 5777 return false; 5778 if (Res == MatchOperand_NoMatch) 5779 return true; 5780 5781 if (Val < 0 || Val > MaxVal) { 5782 Error(Loc, Twine("out of range ", StringRef(Pref))); 5783 return false; 5784 } 5785 5786 Fmt = Val; 5787 return true; 5788 } 5789 5790 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5791 // values to live in a joint format operand in the MCInst encoding. 5792 OperandMatchResultTy 5793 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5794 using namespace llvm::AMDGPU::MTBUFFormat; 5795 5796 int64_t Dfmt = DFMT_UNDEF; 5797 int64_t Nfmt = NFMT_UNDEF; 5798 5799 // dfmt and nfmt can appear in either order, and each is optional. 5800 for (int I = 0; I < 2; ++I) { 5801 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5802 return MatchOperand_ParseFail; 5803 5804 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5805 return MatchOperand_ParseFail; 5806 } 5807 // Skip optional comma between dfmt/nfmt 5808 // but guard against 2 commas following each other. 5809 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5810 !peekToken().is(AsmToken::Comma)) { 5811 trySkipToken(AsmToken::Comma); 5812 } 5813 } 5814 5815 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5816 return MatchOperand_NoMatch; 5817 5818 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5819 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5820 5821 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5822 return MatchOperand_Success; 5823 } 5824 5825 OperandMatchResultTy 5826 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5827 using namespace llvm::AMDGPU::MTBUFFormat; 5828 5829 int64_t Fmt = UFMT_UNDEF; 5830 5831 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5832 return MatchOperand_ParseFail; 5833 5834 if (Fmt == UFMT_UNDEF) 5835 return MatchOperand_NoMatch; 5836 5837 Format = Fmt; 5838 return MatchOperand_Success; 5839 } 5840 5841 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5842 int64_t &Nfmt, 5843 StringRef FormatStr, 5844 SMLoc Loc) { 5845 using namespace llvm::AMDGPU::MTBUFFormat; 5846 int64_t Format; 5847 5848 Format = getDfmt(FormatStr); 5849 if (Format != DFMT_UNDEF) { 5850 Dfmt = Format; 5851 return true; 5852 } 5853 5854 Format = getNfmt(FormatStr, getSTI()); 5855 if (Format != NFMT_UNDEF) { 5856 Nfmt = Format; 5857 return true; 5858 } 5859 5860 Error(Loc, "unsupported format"); 5861 return false; 5862 } 5863 5864 OperandMatchResultTy 5865 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5866 SMLoc FormatLoc, 5867 int64_t &Format) { 5868 using namespace llvm::AMDGPU::MTBUFFormat; 5869 5870 int64_t Dfmt = DFMT_UNDEF; 5871 int64_t Nfmt = NFMT_UNDEF; 5872 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5873 return MatchOperand_ParseFail; 5874 5875 if (trySkipToken(AsmToken::Comma)) { 5876 StringRef Str; 5877 SMLoc Loc = getLoc(); 5878 if (!parseId(Str, "expected a format string") || 5879 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5880 return MatchOperand_ParseFail; 5881 } 5882 if (Dfmt == DFMT_UNDEF) { 5883 Error(Loc, "duplicate numeric format"); 5884 return MatchOperand_ParseFail; 5885 } else if (Nfmt == NFMT_UNDEF) { 5886 Error(Loc, "duplicate data format"); 5887 return MatchOperand_ParseFail; 5888 } 5889 } 5890 5891 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5892 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5893 5894 if (isGFX10Plus()) { 5895 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5896 if (Ufmt == UFMT_UNDEF) { 5897 Error(FormatLoc, "unsupported format"); 5898 return MatchOperand_ParseFail; 5899 } 5900 Format = Ufmt; 5901 } else { 5902 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5903 } 5904 5905 return MatchOperand_Success; 5906 } 5907 5908 OperandMatchResultTy 5909 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5910 SMLoc Loc, 5911 int64_t &Format) { 5912 using namespace llvm::AMDGPU::MTBUFFormat; 5913 5914 auto Id = getUnifiedFormat(FormatStr); 5915 if (Id == UFMT_UNDEF) 5916 return MatchOperand_NoMatch; 5917 5918 if (!isGFX10Plus()) { 5919 Error(Loc, "unified format is not supported on this GPU"); 5920 return MatchOperand_ParseFail; 5921 } 5922 5923 Format = Id; 5924 return MatchOperand_Success; 5925 } 5926 5927 OperandMatchResultTy 5928 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5929 using namespace llvm::AMDGPU::MTBUFFormat; 5930 SMLoc Loc = getLoc(); 5931 5932 if (!parseExpr(Format)) 5933 return MatchOperand_ParseFail; 5934 if (!isValidFormatEncoding(Format, getSTI())) { 5935 Error(Loc, "out of range format"); 5936 return MatchOperand_ParseFail; 5937 } 5938 5939 return MatchOperand_Success; 5940 } 5941 5942 OperandMatchResultTy 5943 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5944 using namespace llvm::AMDGPU::MTBUFFormat; 5945 5946 if (!trySkipId("format", AsmToken::Colon)) 5947 return MatchOperand_NoMatch; 5948 5949 if (trySkipToken(AsmToken::LBrac)) { 5950 StringRef FormatStr; 5951 SMLoc Loc = getLoc(); 5952 if (!parseId(FormatStr, "expected a format string")) 5953 return MatchOperand_ParseFail; 5954 5955 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5956 if (Res == MatchOperand_NoMatch) 5957 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5958 if (Res != MatchOperand_Success) 5959 return Res; 5960 5961 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5962 return MatchOperand_ParseFail; 5963 5964 return MatchOperand_Success; 5965 } 5966 5967 return parseNumericFormat(Format); 5968 } 5969 5970 OperandMatchResultTy 5971 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5972 using namespace llvm::AMDGPU::MTBUFFormat; 5973 5974 int64_t Format = getDefaultFormatEncoding(getSTI()); 5975 OperandMatchResultTy Res; 5976 SMLoc Loc = getLoc(); 5977 5978 // Parse legacy format syntax. 5979 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5980 if (Res == MatchOperand_ParseFail) 5981 return Res; 5982 5983 bool FormatFound = (Res == MatchOperand_Success); 5984 5985 Operands.push_back( 5986 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5987 5988 if (FormatFound) 5989 trySkipToken(AsmToken::Comma); 5990 5991 if (isToken(AsmToken::EndOfStatement)) { 5992 // We are expecting an soffset operand, 5993 // but let matcher handle the error. 5994 return MatchOperand_Success; 5995 } 5996 5997 // Parse soffset. 5998 Res = parseRegOrImm(Operands); 5999 if (Res != MatchOperand_Success) 6000 return Res; 6001 6002 trySkipToken(AsmToken::Comma); 6003 6004 if (!FormatFound) { 6005 Res = parseSymbolicOrNumericFormat(Format); 6006 if (Res == MatchOperand_ParseFail) 6007 return Res; 6008 if (Res == MatchOperand_Success) { 6009 auto Size = Operands.size(); 6010 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6011 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6012 Op.setImm(Format); 6013 } 6014 return MatchOperand_Success; 6015 } 6016 6017 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6018 Error(getLoc(), "duplicate format"); 6019 return MatchOperand_ParseFail; 6020 } 6021 return MatchOperand_Success; 6022 } 6023 6024 //===----------------------------------------------------------------------===// 6025 // ds 6026 //===----------------------------------------------------------------------===// 6027 6028 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6029 const OperandVector &Operands) { 6030 OptionalImmIndexMap OptionalIdx; 6031 6032 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6033 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6034 6035 // Add the register arguments 6036 if (Op.isReg()) { 6037 Op.addRegOperands(Inst, 1); 6038 continue; 6039 } 6040 6041 // Handle optional arguments 6042 OptionalIdx[Op.getImmTy()] = i; 6043 } 6044 6045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6048 6049 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6050 } 6051 6052 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6053 bool IsGdsHardcoded) { 6054 OptionalImmIndexMap OptionalIdx; 6055 6056 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6057 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6058 6059 // Add the register arguments 6060 if (Op.isReg()) { 6061 Op.addRegOperands(Inst, 1); 6062 continue; 6063 } 6064 6065 if (Op.isToken() && Op.getToken() == "gds") { 6066 IsGdsHardcoded = true; 6067 continue; 6068 } 6069 6070 // Handle optional arguments 6071 OptionalIdx[Op.getImmTy()] = i; 6072 } 6073 6074 AMDGPUOperand::ImmTy OffsetType = 6075 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6076 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6077 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6078 AMDGPUOperand::ImmTyOffset; 6079 6080 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6081 6082 if (!IsGdsHardcoded) { 6083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6084 } 6085 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6086 } 6087 6088 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6089 OptionalImmIndexMap OptionalIdx; 6090 6091 unsigned OperandIdx[4]; 6092 unsigned EnMask = 0; 6093 int SrcIdx = 0; 6094 6095 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6096 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6097 6098 // Add the register arguments 6099 if (Op.isReg()) { 6100 assert(SrcIdx < 4); 6101 OperandIdx[SrcIdx] = Inst.size(); 6102 Op.addRegOperands(Inst, 1); 6103 ++SrcIdx; 6104 continue; 6105 } 6106 6107 if (Op.isOff()) { 6108 assert(SrcIdx < 4); 6109 OperandIdx[SrcIdx] = Inst.size(); 6110 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6111 ++SrcIdx; 6112 continue; 6113 } 6114 6115 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6116 Op.addImmOperands(Inst, 1); 6117 continue; 6118 } 6119 6120 if (Op.isToken() && Op.getToken() == "done") 6121 continue; 6122 6123 // Handle optional arguments 6124 OptionalIdx[Op.getImmTy()] = i; 6125 } 6126 6127 assert(SrcIdx == 4); 6128 6129 bool Compr = false; 6130 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6131 Compr = true; 6132 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6133 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6134 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6135 } 6136 6137 for (auto i = 0; i < SrcIdx; ++i) { 6138 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6139 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6140 } 6141 } 6142 6143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6145 6146 Inst.addOperand(MCOperand::createImm(EnMask)); 6147 } 6148 6149 //===----------------------------------------------------------------------===// 6150 // s_waitcnt 6151 //===----------------------------------------------------------------------===// 6152 6153 static bool 6154 encodeCnt( 6155 const AMDGPU::IsaVersion ISA, 6156 int64_t &IntVal, 6157 int64_t CntVal, 6158 bool Saturate, 6159 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6160 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6161 { 6162 bool Failed = false; 6163 6164 IntVal = encode(ISA, IntVal, CntVal); 6165 if (CntVal != decode(ISA, IntVal)) { 6166 if (Saturate) { 6167 IntVal = encode(ISA, IntVal, -1); 6168 } else { 6169 Failed = true; 6170 } 6171 } 6172 return Failed; 6173 } 6174 6175 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6176 6177 SMLoc CntLoc = getLoc(); 6178 StringRef CntName = getTokenStr(); 6179 6180 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6181 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6182 return false; 6183 6184 int64_t CntVal; 6185 SMLoc ValLoc = getLoc(); 6186 if (!parseExpr(CntVal)) 6187 return false; 6188 6189 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6190 6191 bool Failed = true; 6192 bool Sat = CntName.endswith("_sat"); 6193 6194 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6195 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6196 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6197 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6198 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6199 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6200 } else { 6201 Error(CntLoc, "invalid counter name " + CntName); 6202 return false; 6203 } 6204 6205 if (Failed) { 6206 Error(ValLoc, "too large value for " + CntName); 6207 return false; 6208 } 6209 6210 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6211 return false; 6212 6213 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6214 if (isToken(AsmToken::EndOfStatement)) { 6215 Error(getLoc(), "expected a counter name"); 6216 return false; 6217 } 6218 } 6219 6220 return true; 6221 } 6222 6223 OperandMatchResultTy 6224 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6225 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6226 int64_t Waitcnt = getWaitcntBitMask(ISA); 6227 SMLoc S = getLoc(); 6228 6229 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6230 while (!isToken(AsmToken::EndOfStatement)) { 6231 if (!parseCnt(Waitcnt)) 6232 return MatchOperand_ParseFail; 6233 } 6234 } else { 6235 if (!parseExpr(Waitcnt)) 6236 return MatchOperand_ParseFail; 6237 } 6238 6239 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6240 return MatchOperand_Success; 6241 } 6242 6243 bool 6244 AMDGPUOperand::isSWaitCnt() const { 6245 return isImm(); 6246 } 6247 6248 //===----------------------------------------------------------------------===// 6249 // hwreg 6250 //===----------------------------------------------------------------------===// 6251 6252 bool 6253 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6254 OperandInfoTy &Offset, 6255 OperandInfoTy &Width) { 6256 using namespace llvm::AMDGPU::Hwreg; 6257 6258 // The register may be specified by name or using a numeric code 6259 HwReg.Loc = getLoc(); 6260 if (isToken(AsmToken::Identifier) && 6261 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) { 6262 HwReg.IsSymbolic = true; 6263 HwReg.Name = getTokenStr(); 6264 lex(); // skip register name 6265 } else if (!parseExpr(HwReg.Id, "a register name")) { 6266 return false; 6267 } 6268 6269 if (trySkipToken(AsmToken::RParen)) 6270 return true; 6271 6272 // parse optional params 6273 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6274 return false; 6275 6276 Offset.Loc = getLoc(); 6277 if (!parseExpr(Offset.Id)) 6278 return false; 6279 6280 if (!skipToken(AsmToken::Comma, "expected a comma")) 6281 return false; 6282 6283 Width.Loc = getLoc(); 6284 return parseExpr(Width.Id) && 6285 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6286 } 6287 6288 bool 6289 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6290 const OperandInfoTy &Offset, 6291 const OperandInfoTy &Width) { 6292 6293 using namespace llvm::AMDGPU::Hwreg; 6294 6295 if (HwReg.IsSymbolic && 6296 !isValidHwreg(HwReg.Id, getSTI(), HwReg.Name)) { 6297 Error(HwReg.Loc, 6298 "specified hardware register is not supported on this GPU"); 6299 return false; 6300 } 6301 if (!isValidHwreg(HwReg.Id)) { 6302 Error(HwReg.Loc, 6303 "invalid code of hardware register: only 6-bit values are legal"); 6304 return false; 6305 } 6306 if (!isValidHwregOffset(Offset.Id)) { 6307 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6308 return false; 6309 } 6310 if (!isValidHwregWidth(Width.Id)) { 6311 Error(Width.Loc, 6312 "invalid bitfield width: only values from 1 to 32 are legal"); 6313 return false; 6314 } 6315 return true; 6316 } 6317 6318 OperandMatchResultTy 6319 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6320 using namespace llvm::AMDGPU::Hwreg; 6321 6322 int64_t ImmVal = 0; 6323 SMLoc Loc = getLoc(); 6324 6325 if (trySkipId("hwreg", AsmToken::LParen)) { 6326 OperandInfoTy HwReg(ID_UNKNOWN_); 6327 OperandInfoTy Offset(OFFSET_DEFAULT_); 6328 OperandInfoTy Width(WIDTH_DEFAULT_); 6329 if (parseHwregBody(HwReg, Offset, Width) && 6330 validateHwreg(HwReg, Offset, Width)) { 6331 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6332 } else { 6333 return MatchOperand_ParseFail; 6334 } 6335 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6336 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6337 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6338 return MatchOperand_ParseFail; 6339 } 6340 } else { 6341 return MatchOperand_ParseFail; 6342 } 6343 6344 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6345 return MatchOperand_Success; 6346 } 6347 6348 bool AMDGPUOperand::isHwreg() const { 6349 return isImmTy(ImmTyHwreg); 6350 } 6351 6352 //===----------------------------------------------------------------------===// 6353 // sendmsg 6354 //===----------------------------------------------------------------------===// 6355 6356 bool 6357 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6358 OperandInfoTy &Op, 6359 OperandInfoTy &Stream) { 6360 using namespace llvm::AMDGPU::SendMsg; 6361 6362 Msg.Loc = getLoc(); 6363 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6364 Msg.IsSymbolic = true; 6365 lex(); // skip message name 6366 } else if (!parseExpr(Msg.Id, "a message name")) { 6367 return false; 6368 } 6369 6370 if (trySkipToken(AsmToken::Comma)) { 6371 Op.IsDefined = true; 6372 Op.Loc = getLoc(); 6373 if (isToken(AsmToken::Identifier) && 6374 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6375 lex(); // skip operation name 6376 } else if (!parseExpr(Op.Id, "an operation name")) { 6377 return false; 6378 } 6379 6380 if (trySkipToken(AsmToken::Comma)) { 6381 Stream.IsDefined = true; 6382 Stream.Loc = getLoc(); 6383 if (!parseExpr(Stream.Id)) 6384 return false; 6385 } 6386 } 6387 6388 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6389 } 6390 6391 bool 6392 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6393 const OperandInfoTy &Op, 6394 const OperandInfoTy &Stream) { 6395 using namespace llvm::AMDGPU::SendMsg; 6396 6397 // Validation strictness depends on whether message is specified 6398 // in a symbolic or in a numeric form. In the latter case 6399 // only encoding possibility is checked. 6400 bool Strict = Msg.IsSymbolic; 6401 6402 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6403 Error(Msg.Loc, "invalid message id"); 6404 return false; 6405 } 6406 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6407 if (Op.IsDefined) { 6408 Error(Op.Loc, "message does not support operations"); 6409 } else { 6410 Error(Msg.Loc, "missing message operation"); 6411 } 6412 return false; 6413 } 6414 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6415 Error(Op.Loc, "invalid operation id"); 6416 return false; 6417 } 6418 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6419 Error(Stream.Loc, "message operation does not support streams"); 6420 return false; 6421 } 6422 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6423 Error(Stream.Loc, "invalid message stream id"); 6424 return false; 6425 } 6426 return true; 6427 } 6428 6429 OperandMatchResultTy 6430 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6431 using namespace llvm::AMDGPU::SendMsg; 6432 6433 int64_t ImmVal = 0; 6434 SMLoc Loc = getLoc(); 6435 6436 if (trySkipId("sendmsg", AsmToken::LParen)) { 6437 OperandInfoTy Msg(ID_UNKNOWN_); 6438 OperandInfoTy Op(OP_NONE_); 6439 OperandInfoTy Stream(STREAM_ID_NONE_); 6440 if (parseSendMsgBody(Msg, Op, Stream) && 6441 validateSendMsg(Msg, Op, Stream)) { 6442 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6443 } else { 6444 return MatchOperand_ParseFail; 6445 } 6446 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6447 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6448 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6449 return MatchOperand_ParseFail; 6450 } 6451 } else { 6452 return MatchOperand_ParseFail; 6453 } 6454 6455 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6456 return MatchOperand_Success; 6457 } 6458 6459 bool AMDGPUOperand::isSendMsg() const { 6460 return isImmTy(ImmTySendMsg); 6461 } 6462 6463 //===----------------------------------------------------------------------===// 6464 // v_interp 6465 //===----------------------------------------------------------------------===// 6466 6467 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6468 StringRef Str; 6469 SMLoc S = getLoc(); 6470 6471 if (!parseId(Str)) 6472 return MatchOperand_NoMatch; 6473 6474 int Slot = StringSwitch<int>(Str) 6475 .Case("p10", 0) 6476 .Case("p20", 1) 6477 .Case("p0", 2) 6478 .Default(-1); 6479 6480 if (Slot == -1) { 6481 Error(S, "invalid interpolation slot"); 6482 return MatchOperand_ParseFail; 6483 } 6484 6485 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6486 AMDGPUOperand::ImmTyInterpSlot)); 6487 return MatchOperand_Success; 6488 } 6489 6490 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6491 StringRef Str; 6492 SMLoc S = getLoc(); 6493 6494 if (!parseId(Str)) 6495 return MatchOperand_NoMatch; 6496 6497 if (!Str.startswith("attr")) { 6498 Error(S, "invalid interpolation attribute"); 6499 return MatchOperand_ParseFail; 6500 } 6501 6502 StringRef Chan = Str.take_back(2); 6503 int AttrChan = StringSwitch<int>(Chan) 6504 .Case(".x", 0) 6505 .Case(".y", 1) 6506 .Case(".z", 2) 6507 .Case(".w", 3) 6508 .Default(-1); 6509 if (AttrChan == -1) { 6510 Error(S, "invalid or missing interpolation attribute channel"); 6511 return MatchOperand_ParseFail; 6512 } 6513 6514 Str = Str.drop_back(2).drop_front(4); 6515 6516 uint8_t Attr; 6517 if (Str.getAsInteger(10, Attr)) { 6518 Error(S, "invalid or missing interpolation attribute number"); 6519 return MatchOperand_ParseFail; 6520 } 6521 6522 if (Attr > 63) { 6523 Error(S, "out of bounds interpolation attribute number"); 6524 return MatchOperand_ParseFail; 6525 } 6526 6527 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6528 6529 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6530 AMDGPUOperand::ImmTyInterpAttr)); 6531 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6532 AMDGPUOperand::ImmTyAttrChan)); 6533 return MatchOperand_Success; 6534 } 6535 6536 //===----------------------------------------------------------------------===// 6537 // exp 6538 //===----------------------------------------------------------------------===// 6539 6540 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6541 using namespace llvm::AMDGPU::Exp; 6542 6543 StringRef Str; 6544 SMLoc S = getLoc(); 6545 6546 if (!parseId(Str)) 6547 return MatchOperand_NoMatch; 6548 6549 unsigned Id = getTgtId(Str); 6550 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6551 Error(S, (Id == ET_INVALID) ? 6552 "invalid exp target" : 6553 "exp target is not supported on this GPU"); 6554 return MatchOperand_ParseFail; 6555 } 6556 6557 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6558 AMDGPUOperand::ImmTyExpTgt)); 6559 return MatchOperand_Success; 6560 } 6561 6562 //===----------------------------------------------------------------------===// 6563 // parser helpers 6564 //===----------------------------------------------------------------------===// 6565 6566 bool 6567 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6568 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6569 } 6570 6571 bool 6572 AMDGPUAsmParser::isId(const StringRef Id) const { 6573 return isId(getToken(), Id); 6574 } 6575 6576 bool 6577 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6578 return getTokenKind() == Kind; 6579 } 6580 6581 bool 6582 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6583 if (isId(Id)) { 6584 lex(); 6585 return true; 6586 } 6587 return false; 6588 } 6589 6590 bool 6591 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6592 if (isToken(AsmToken::Identifier)) { 6593 StringRef Tok = getTokenStr(); 6594 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6595 lex(); 6596 return true; 6597 } 6598 } 6599 return false; 6600 } 6601 6602 bool 6603 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6604 if (isId(Id) && peekToken().is(Kind)) { 6605 lex(); 6606 lex(); 6607 return true; 6608 } 6609 return false; 6610 } 6611 6612 bool 6613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6614 if (isToken(Kind)) { 6615 lex(); 6616 return true; 6617 } 6618 return false; 6619 } 6620 6621 bool 6622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6623 const StringRef ErrMsg) { 6624 if (!trySkipToken(Kind)) { 6625 Error(getLoc(), ErrMsg); 6626 return false; 6627 } 6628 return true; 6629 } 6630 6631 bool 6632 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6633 SMLoc S = getLoc(); 6634 6635 const MCExpr *Expr; 6636 if (Parser.parseExpression(Expr)) 6637 return false; 6638 6639 if (Expr->evaluateAsAbsolute(Imm)) 6640 return true; 6641 6642 if (Expected.empty()) { 6643 Error(S, "expected absolute expression"); 6644 } else { 6645 Error(S, Twine("expected ", Expected) + 6646 Twine(" or an absolute expression")); 6647 } 6648 return false; 6649 } 6650 6651 bool 6652 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6653 SMLoc S = getLoc(); 6654 6655 const MCExpr *Expr; 6656 if (Parser.parseExpression(Expr)) 6657 return false; 6658 6659 int64_t IntVal; 6660 if (Expr->evaluateAsAbsolute(IntVal)) { 6661 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6662 } else { 6663 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6664 } 6665 return true; 6666 } 6667 6668 bool 6669 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6670 if (isToken(AsmToken::String)) { 6671 Val = getToken().getStringContents(); 6672 lex(); 6673 return true; 6674 } else { 6675 Error(getLoc(), ErrMsg); 6676 return false; 6677 } 6678 } 6679 6680 bool 6681 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6682 if (isToken(AsmToken::Identifier)) { 6683 Val = getTokenStr(); 6684 lex(); 6685 return true; 6686 } else { 6687 if (!ErrMsg.empty()) 6688 Error(getLoc(), ErrMsg); 6689 return false; 6690 } 6691 } 6692 6693 AsmToken 6694 AMDGPUAsmParser::getToken() const { 6695 return Parser.getTok(); 6696 } 6697 6698 AsmToken 6699 AMDGPUAsmParser::peekToken() { 6700 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6701 } 6702 6703 void 6704 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6705 auto TokCount = getLexer().peekTokens(Tokens); 6706 6707 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6708 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6709 } 6710 6711 AsmToken::TokenKind 6712 AMDGPUAsmParser::getTokenKind() const { 6713 return getLexer().getKind(); 6714 } 6715 6716 SMLoc 6717 AMDGPUAsmParser::getLoc() const { 6718 return getToken().getLoc(); 6719 } 6720 6721 StringRef 6722 AMDGPUAsmParser::getTokenStr() const { 6723 return getToken().getString(); 6724 } 6725 6726 void 6727 AMDGPUAsmParser::lex() { 6728 Parser.Lex(); 6729 } 6730 6731 SMLoc 6732 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6733 const OperandVector &Operands) const { 6734 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6735 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6736 if (Test(Op)) 6737 return Op.getStartLoc(); 6738 } 6739 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6740 } 6741 6742 SMLoc 6743 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6744 const OperandVector &Operands) const { 6745 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6746 return getOperandLoc(Test, Operands); 6747 } 6748 6749 SMLoc 6750 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6751 const OperandVector &Operands) const { 6752 auto Test = [=](const AMDGPUOperand& Op) { 6753 return Op.isRegKind() && Op.getReg() == Reg; 6754 }; 6755 return getOperandLoc(Test, Operands); 6756 } 6757 6758 SMLoc 6759 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6760 auto Test = [](const AMDGPUOperand& Op) { 6761 return Op.IsImmKindLiteral() || Op.isExpr(); 6762 }; 6763 return getOperandLoc(Test, Operands); 6764 } 6765 6766 SMLoc 6767 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6768 auto Test = [](const AMDGPUOperand& Op) { 6769 return Op.isImmKindConst(); 6770 }; 6771 return getOperandLoc(Test, Operands); 6772 } 6773 6774 //===----------------------------------------------------------------------===// 6775 // swizzle 6776 //===----------------------------------------------------------------------===// 6777 6778 LLVM_READNONE 6779 static unsigned 6780 encodeBitmaskPerm(const unsigned AndMask, 6781 const unsigned OrMask, 6782 const unsigned XorMask) { 6783 using namespace llvm::AMDGPU::Swizzle; 6784 6785 return BITMASK_PERM_ENC | 6786 (AndMask << BITMASK_AND_SHIFT) | 6787 (OrMask << BITMASK_OR_SHIFT) | 6788 (XorMask << BITMASK_XOR_SHIFT); 6789 } 6790 6791 bool 6792 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6793 const unsigned MinVal, 6794 const unsigned MaxVal, 6795 const StringRef ErrMsg, 6796 SMLoc &Loc) { 6797 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6798 return false; 6799 } 6800 Loc = getLoc(); 6801 if (!parseExpr(Op)) { 6802 return false; 6803 } 6804 if (Op < MinVal || Op > MaxVal) { 6805 Error(Loc, ErrMsg); 6806 return false; 6807 } 6808 6809 return true; 6810 } 6811 6812 bool 6813 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6814 const unsigned MinVal, 6815 const unsigned MaxVal, 6816 const StringRef ErrMsg) { 6817 SMLoc Loc; 6818 for (unsigned i = 0; i < OpNum; ++i) { 6819 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6820 return false; 6821 } 6822 6823 return true; 6824 } 6825 6826 bool 6827 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6828 using namespace llvm::AMDGPU::Swizzle; 6829 6830 int64_t Lane[LANE_NUM]; 6831 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6832 "expected a 2-bit lane id")) { 6833 Imm = QUAD_PERM_ENC; 6834 for (unsigned I = 0; I < LANE_NUM; ++I) { 6835 Imm |= Lane[I] << (LANE_SHIFT * I); 6836 } 6837 return true; 6838 } 6839 return false; 6840 } 6841 6842 bool 6843 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6844 using namespace llvm::AMDGPU::Swizzle; 6845 6846 SMLoc Loc; 6847 int64_t GroupSize; 6848 int64_t LaneIdx; 6849 6850 if (!parseSwizzleOperand(GroupSize, 6851 2, 32, 6852 "group size must be in the interval [2,32]", 6853 Loc)) { 6854 return false; 6855 } 6856 if (!isPowerOf2_64(GroupSize)) { 6857 Error(Loc, "group size must be a power of two"); 6858 return false; 6859 } 6860 if (parseSwizzleOperand(LaneIdx, 6861 0, GroupSize - 1, 6862 "lane id must be in the interval [0,group size - 1]", 6863 Loc)) { 6864 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6865 return true; 6866 } 6867 return false; 6868 } 6869 6870 bool 6871 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6872 using namespace llvm::AMDGPU::Swizzle; 6873 6874 SMLoc Loc; 6875 int64_t GroupSize; 6876 6877 if (!parseSwizzleOperand(GroupSize, 6878 2, 32, 6879 "group size must be in the interval [2,32]", 6880 Loc)) { 6881 return false; 6882 } 6883 if (!isPowerOf2_64(GroupSize)) { 6884 Error(Loc, "group size must be a power of two"); 6885 return false; 6886 } 6887 6888 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6889 return true; 6890 } 6891 6892 bool 6893 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6894 using namespace llvm::AMDGPU::Swizzle; 6895 6896 SMLoc Loc; 6897 int64_t GroupSize; 6898 6899 if (!parseSwizzleOperand(GroupSize, 6900 1, 16, 6901 "group size must be in the interval [1,16]", 6902 Loc)) { 6903 return false; 6904 } 6905 if (!isPowerOf2_64(GroupSize)) { 6906 Error(Loc, "group size must be a power of two"); 6907 return false; 6908 } 6909 6910 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6911 return true; 6912 } 6913 6914 bool 6915 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6916 using namespace llvm::AMDGPU::Swizzle; 6917 6918 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6919 return false; 6920 } 6921 6922 StringRef Ctl; 6923 SMLoc StrLoc = getLoc(); 6924 if (!parseString(Ctl)) { 6925 return false; 6926 } 6927 if (Ctl.size() != BITMASK_WIDTH) { 6928 Error(StrLoc, "expected a 5-character mask"); 6929 return false; 6930 } 6931 6932 unsigned AndMask = 0; 6933 unsigned OrMask = 0; 6934 unsigned XorMask = 0; 6935 6936 for (size_t i = 0; i < Ctl.size(); ++i) { 6937 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6938 switch(Ctl[i]) { 6939 default: 6940 Error(StrLoc, "invalid mask"); 6941 return false; 6942 case '0': 6943 break; 6944 case '1': 6945 OrMask |= Mask; 6946 break; 6947 case 'p': 6948 AndMask |= Mask; 6949 break; 6950 case 'i': 6951 AndMask |= Mask; 6952 XorMask |= Mask; 6953 break; 6954 } 6955 } 6956 6957 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6958 return true; 6959 } 6960 6961 bool 6962 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6963 6964 SMLoc OffsetLoc = getLoc(); 6965 6966 if (!parseExpr(Imm, "a swizzle macro")) { 6967 return false; 6968 } 6969 if (!isUInt<16>(Imm)) { 6970 Error(OffsetLoc, "expected a 16-bit offset"); 6971 return false; 6972 } 6973 return true; 6974 } 6975 6976 bool 6977 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6978 using namespace llvm::AMDGPU::Swizzle; 6979 6980 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6981 6982 SMLoc ModeLoc = getLoc(); 6983 bool Ok = false; 6984 6985 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6986 Ok = parseSwizzleQuadPerm(Imm); 6987 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6988 Ok = parseSwizzleBitmaskPerm(Imm); 6989 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6990 Ok = parseSwizzleBroadcast(Imm); 6991 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6992 Ok = parseSwizzleSwap(Imm); 6993 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6994 Ok = parseSwizzleReverse(Imm); 6995 } else { 6996 Error(ModeLoc, "expected a swizzle mode"); 6997 } 6998 6999 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7000 } 7001 7002 return false; 7003 } 7004 7005 OperandMatchResultTy 7006 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7007 SMLoc S = getLoc(); 7008 int64_t Imm = 0; 7009 7010 if (trySkipId("offset")) { 7011 7012 bool Ok = false; 7013 if (skipToken(AsmToken::Colon, "expected a colon")) { 7014 if (trySkipId("swizzle")) { 7015 Ok = parseSwizzleMacro(Imm); 7016 } else { 7017 Ok = parseSwizzleOffset(Imm); 7018 } 7019 } 7020 7021 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7022 7023 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7024 } else { 7025 // Swizzle "offset" operand is optional. 7026 // If it is omitted, try parsing other optional operands. 7027 return parseOptionalOpr(Operands); 7028 } 7029 } 7030 7031 bool 7032 AMDGPUOperand::isSwizzle() const { 7033 return isImmTy(ImmTySwizzle); 7034 } 7035 7036 //===----------------------------------------------------------------------===// 7037 // VGPR Index Mode 7038 //===----------------------------------------------------------------------===// 7039 7040 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7041 7042 using namespace llvm::AMDGPU::VGPRIndexMode; 7043 7044 if (trySkipToken(AsmToken::RParen)) { 7045 return OFF; 7046 } 7047 7048 int64_t Imm = 0; 7049 7050 while (true) { 7051 unsigned Mode = 0; 7052 SMLoc S = getLoc(); 7053 7054 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7055 if (trySkipId(IdSymbolic[ModeId])) { 7056 Mode = 1 << ModeId; 7057 break; 7058 } 7059 } 7060 7061 if (Mode == 0) { 7062 Error(S, (Imm == 0)? 7063 "expected a VGPR index mode or a closing parenthesis" : 7064 "expected a VGPR index mode"); 7065 return UNDEF; 7066 } 7067 7068 if (Imm & Mode) { 7069 Error(S, "duplicate VGPR index mode"); 7070 return UNDEF; 7071 } 7072 Imm |= Mode; 7073 7074 if (trySkipToken(AsmToken::RParen)) 7075 break; 7076 if (!skipToken(AsmToken::Comma, 7077 "expected a comma or a closing parenthesis")) 7078 return UNDEF; 7079 } 7080 7081 return Imm; 7082 } 7083 7084 OperandMatchResultTy 7085 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7086 7087 using namespace llvm::AMDGPU::VGPRIndexMode; 7088 7089 int64_t Imm = 0; 7090 SMLoc S = getLoc(); 7091 7092 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7093 Imm = parseGPRIdxMacro(); 7094 if (Imm == UNDEF) 7095 return MatchOperand_ParseFail; 7096 } else { 7097 if (getParser().parseAbsoluteExpression(Imm)) 7098 return MatchOperand_ParseFail; 7099 if (Imm < 0 || !isUInt<4>(Imm)) { 7100 Error(S, "invalid immediate: only 4-bit values are legal"); 7101 return MatchOperand_ParseFail; 7102 } 7103 } 7104 7105 Operands.push_back( 7106 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7107 return MatchOperand_Success; 7108 } 7109 7110 bool AMDGPUOperand::isGPRIdxMode() const { 7111 return isImmTy(ImmTyGprIdxMode); 7112 } 7113 7114 //===----------------------------------------------------------------------===// 7115 // sopp branch targets 7116 //===----------------------------------------------------------------------===// 7117 7118 OperandMatchResultTy 7119 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7120 7121 // Make sure we are not parsing something 7122 // that looks like a label or an expression but is not. 7123 // This will improve error messages. 7124 if (isRegister() || isModifier()) 7125 return MatchOperand_NoMatch; 7126 7127 if (!parseExpr(Operands)) 7128 return MatchOperand_ParseFail; 7129 7130 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7131 assert(Opr.isImm() || Opr.isExpr()); 7132 SMLoc Loc = Opr.getStartLoc(); 7133 7134 // Currently we do not support arbitrary expressions as branch targets. 7135 // Only labels and absolute expressions are accepted. 7136 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7137 Error(Loc, "expected an absolute expression or a label"); 7138 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7139 Error(Loc, "expected a 16-bit signed jump offset"); 7140 } 7141 7142 return MatchOperand_Success; 7143 } 7144 7145 //===----------------------------------------------------------------------===// 7146 // Boolean holding registers 7147 //===----------------------------------------------------------------------===// 7148 7149 OperandMatchResultTy 7150 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7151 return parseReg(Operands); 7152 } 7153 7154 //===----------------------------------------------------------------------===// 7155 // mubuf 7156 //===----------------------------------------------------------------------===// 7157 7158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7159 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7160 } 7161 7162 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7163 const OperandVector &Operands, 7164 bool IsAtomic, 7165 bool IsLds) { 7166 bool IsLdsOpcode = IsLds; 7167 bool HasLdsModifier = false; 7168 OptionalImmIndexMap OptionalIdx; 7169 unsigned FirstOperandIdx = 1; 7170 bool IsAtomicReturn = false; 7171 7172 if (IsAtomic) { 7173 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7174 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7175 if (!Op.isCPol()) 7176 continue; 7177 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7178 break; 7179 } 7180 7181 if (!IsAtomicReturn) { 7182 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7183 if (NewOpc != -1) 7184 Inst.setOpcode(NewOpc); 7185 } 7186 7187 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7188 SIInstrFlags::IsAtomicRet; 7189 } 7190 7191 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7192 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7193 7194 // Add the register arguments 7195 if (Op.isReg()) { 7196 Op.addRegOperands(Inst, 1); 7197 // Insert a tied src for atomic return dst. 7198 // This cannot be postponed as subsequent calls to 7199 // addImmOperands rely on correct number of MC operands. 7200 if (IsAtomicReturn && i == FirstOperandIdx) 7201 Op.addRegOperands(Inst, 1); 7202 continue; 7203 } 7204 7205 // Handle the case where soffset is an immediate 7206 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7207 Op.addImmOperands(Inst, 1); 7208 continue; 7209 } 7210 7211 HasLdsModifier |= Op.isLDS(); 7212 7213 // Handle tokens like 'offen' which are sometimes hard-coded into the 7214 // asm string. There are no MCInst operands for these. 7215 if (Op.isToken()) { 7216 continue; 7217 } 7218 assert(Op.isImm()); 7219 7220 // Handle optional arguments 7221 OptionalIdx[Op.getImmTy()] = i; 7222 } 7223 7224 // This is a workaround for an llvm quirk which may result in an 7225 // incorrect instruction selection. Lds and non-lds versions of 7226 // MUBUF instructions are identical except that lds versions 7227 // have mandatory 'lds' modifier. However this modifier follows 7228 // optional modifiers and llvm asm matcher regards this 'lds' 7229 // modifier as an optional one. As a result, an lds version 7230 // of opcode may be selected even if it has no 'lds' modifier. 7231 if (IsLdsOpcode && !HasLdsModifier) { 7232 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7233 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7234 Inst.setOpcode(NoLdsOpcode); 7235 IsLdsOpcode = false; 7236 } 7237 } 7238 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7241 7242 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7244 } 7245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7246 } 7247 7248 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7249 OptionalImmIndexMap OptionalIdx; 7250 7251 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7252 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7253 7254 // Add the register arguments 7255 if (Op.isReg()) { 7256 Op.addRegOperands(Inst, 1); 7257 continue; 7258 } 7259 7260 // Handle the case where soffset is an immediate 7261 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7262 Op.addImmOperands(Inst, 1); 7263 continue; 7264 } 7265 7266 // Handle tokens like 'offen' which are sometimes hard-coded into the 7267 // asm string. There are no MCInst operands for these. 7268 if (Op.isToken()) { 7269 continue; 7270 } 7271 assert(Op.isImm()); 7272 7273 // Handle optional arguments 7274 OptionalIdx[Op.getImmTy()] = i; 7275 } 7276 7277 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7278 AMDGPUOperand::ImmTyOffset); 7279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7283 } 7284 7285 //===----------------------------------------------------------------------===// 7286 // mimg 7287 //===----------------------------------------------------------------------===// 7288 7289 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7290 bool IsAtomic) { 7291 unsigned I = 1; 7292 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7293 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7294 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7295 } 7296 7297 if (IsAtomic) { 7298 // Add src, same as dst 7299 assert(Desc.getNumDefs() == 1); 7300 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7301 } 7302 7303 OptionalImmIndexMap OptionalIdx; 7304 7305 for (unsigned E = Operands.size(); I != E; ++I) { 7306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7307 7308 // Add the register arguments 7309 if (Op.isReg()) { 7310 Op.addRegOperands(Inst, 1); 7311 } else if (Op.isImmModifier()) { 7312 OptionalIdx[Op.getImmTy()] = I; 7313 } else if (!Op.isToken()) { 7314 llvm_unreachable("unexpected operand type"); 7315 } 7316 } 7317 7318 bool IsGFX10Plus = isGFX10Plus(); 7319 7320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7321 if (IsGFX10Plus) 7322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7326 if (IsGFX10Plus) 7327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7328 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7331 if (!IsGFX10Plus) 7332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7334 } 7335 7336 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7337 cvtMIMG(Inst, Operands, true); 7338 } 7339 7340 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7341 OptionalImmIndexMap OptionalIdx; 7342 bool IsAtomicReturn = false; 7343 7344 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7345 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7346 if (!Op.isCPol()) 7347 continue; 7348 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7349 break; 7350 } 7351 7352 if (!IsAtomicReturn) { 7353 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7354 if (NewOpc != -1) 7355 Inst.setOpcode(NewOpc); 7356 } 7357 7358 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7359 SIInstrFlags::IsAtomicRet; 7360 7361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7363 7364 // Add the register arguments 7365 if (Op.isReg()) { 7366 Op.addRegOperands(Inst, 1); 7367 if (IsAtomicReturn && i == 1) 7368 Op.addRegOperands(Inst, 1); 7369 continue; 7370 } 7371 7372 // Handle the case where soffset is an immediate 7373 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7374 Op.addImmOperands(Inst, 1); 7375 continue; 7376 } 7377 7378 // Handle tokens like 'offen' which are sometimes hard-coded into the 7379 // asm string. There are no MCInst operands for these. 7380 if (Op.isToken()) { 7381 continue; 7382 } 7383 assert(Op.isImm()); 7384 7385 // Handle optional arguments 7386 OptionalIdx[Op.getImmTy()] = i; 7387 } 7388 7389 if ((int)Inst.getNumOperands() <= 7390 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7393 } 7394 7395 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7396 const OperandVector &Operands) { 7397 for (unsigned I = 1; I < Operands.size(); ++I) { 7398 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7399 if (Operand.isReg()) 7400 Operand.addRegOperands(Inst, 1); 7401 } 7402 7403 Inst.addOperand(MCOperand::createImm(1)); // a16 7404 } 7405 7406 //===----------------------------------------------------------------------===// 7407 // smrd 7408 //===----------------------------------------------------------------------===// 7409 7410 bool AMDGPUOperand::isSMRDOffset8() const { 7411 return isImm() && isUInt<8>(getImm()); 7412 } 7413 7414 bool AMDGPUOperand::isSMEMOffset() const { 7415 return isImm(); // Offset range is checked later by validator. 7416 } 7417 7418 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7419 // 32-bit literals are only supported on CI and we only want to use them 7420 // when the offset is > 8-bits. 7421 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7422 } 7423 7424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7425 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7426 } 7427 7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7429 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7430 } 7431 7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7433 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7434 } 7435 7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7437 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7438 } 7439 7440 //===----------------------------------------------------------------------===// 7441 // vop3 7442 //===----------------------------------------------------------------------===// 7443 7444 static bool ConvertOmodMul(int64_t &Mul) { 7445 if (Mul != 1 && Mul != 2 && Mul != 4) 7446 return false; 7447 7448 Mul >>= 1; 7449 return true; 7450 } 7451 7452 static bool ConvertOmodDiv(int64_t &Div) { 7453 if (Div == 1) { 7454 Div = 0; 7455 return true; 7456 } 7457 7458 if (Div == 2) { 7459 Div = 3; 7460 return true; 7461 } 7462 7463 return false; 7464 } 7465 7466 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7467 // This is intentional and ensures compatibility with sp3. 7468 // See bug 35397 for details. 7469 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7470 if (BoundCtrl == 0 || BoundCtrl == 1) { 7471 BoundCtrl = 1; 7472 return true; 7473 } 7474 return false; 7475 } 7476 7477 // Note: the order in this table matches the order of operands in AsmString. 7478 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7479 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7480 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7481 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7482 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7483 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7484 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7485 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7486 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7487 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7488 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7489 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7490 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7491 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7492 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7493 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7494 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7495 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7496 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7497 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7498 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7499 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7500 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7501 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7502 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7503 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7504 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7505 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7506 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7507 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7508 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7509 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7510 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7511 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7512 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7513 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7514 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7515 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7516 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7517 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7518 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7519 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7520 }; 7521 7522 void AMDGPUAsmParser::onBeginOfFile() { 7523 if (!getParser().getStreamer().getTargetStreamer() || 7524 getSTI().getTargetTriple().getArch() == Triple::r600) 7525 return; 7526 7527 if (!getTargetStreamer().getTargetID()) 7528 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7529 7530 if (isHsaAbiVersion3AndAbove(&getSTI())) 7531 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7532 } 7533 7534 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7535 7536 OperandMatchResultTy res = parseOptionalOpr(Operands); 7537 7538 // This is a hack to enable hardcoded mandatory operands which follow 7539 // optional operands. 7540 // 7541 // Current design assumes that all operands after the first optional operand 7542 // are also optional. However implementation of some instructions violates 7543 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7544 // 7545 // To alleviate this problem, we have to (implicitly) parse extra operands 7546 // to make sure autogenerated parser of custom operands never hit hardcoded 7547 // mandatory operands. 7548 7549 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7550 if (res != MatchOperand_Success || 7551 isToken(AsmToken::EndOfStatement)) 7552 break; 7553 7554 trySkipToken(AsmToken::Comma); 7555 res = parseOptionalOpr(Operands); 7556 } 7557 7558 return res; 7559 } 7560 7561 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7562 OperandMatchResultTy res; 7563 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7564 // try to parse any optional operand here 7565 if (Op.IsBit) { 7566 res = parseNamedBit(Op.Name, Operands, Op.Type); 7567 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7568 res = parseOModOperand(Operands); 7569 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7570 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7571 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7572 res = parseSDWASel(Operands, Op.Name, Op.Type); 7573 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7574 res = parseSDWADstUnused(Operands); 7575 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7576 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7577 Op.Type == AMDGPUOperand::ImmTyNegLo || 7578 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7579 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7580 Op.ConvertResult); 7581 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7582 res = parseDim(Operands); 7583 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7584 res = parseCPol(Operands); 7585 } else { 7586 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7587 } 7588 if (res != MatchOperand_NoMatch) { 7589 return res; 7590 } 7591 } 7592 return MatchOperand_NoMatch; 7593 } 7594 7595 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7596 StringRef Name = getTokenStr(); 7597 if (Name == "mul") { 7598 return parseIntWithPrefix("mul", Operands, 7599 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7600 } 7601 7602 if (Name == "div") { 7603 return parseIntWithPrefix("div", Operands, 7604 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7605 } 7606 7607 return MatchOperand_NoMatch; 7608 } 7609 7610 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7611 cvtVOP3P(Inst, Operands); 7612 7613 int Opc = Inst.getOpcode(); 7614 7615 int SrcNum; 7616 const int Ops[] = { AMDGPU::OpName::src0, 7617 AMDGPU::OpName::src1, 7618 AMDGPU::OpName::src2 }; 7619 for (SrcNum = 0; 7620 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7621 ++SrcNum); 7622 assert(SrcNum > 0); 7623 7624 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7625 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7626 7627 if ((OpSel & (1 << SrcNum)) != 0) { 7628 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7629 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7630 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7631 } 7632 } 7633 7634 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7635 // 1. This operand is input modifiers 7636 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7637 // 2. This is not last operand 7638 && Desc.NumOperands > (OpNum + 1) 7639 // 3. Next operand is register class 7640 && Desc.OpInfo[OpNum + 1].RegClass != -1 7641 // 4. Next register is not tied to any other operand 7642 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7643 } 7644 7645 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7646 { 7647 OptionalImmIndexMap OptionalIdx; 7648 unsigned Opc = Inst.getOpcode(); 7649 7650 unsigned I = 1; 7651 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7652 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7653 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7654 } 7655 7656 for (unsigned E = Operands.size(); I != E; ++I) { 7657 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7658 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7659 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7660 } else if (Op.isInterpSlot() || 7661 Op.isInterpAttr() || 7662 Op.isAttrChan()) { 7663 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7664 } else if (Op.isImmModifier()) { 7665 OptionalIdx[Op.getImmTy()] = I; 7666 } else { 7667 llvm_unreachable("unhandled operand type"); 7668 } 7669 } 7670 7671 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7673 } 7674 7675 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7677 } 7678 7679 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7681 } 7682 } 7683 7684 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7685 OptionalImmIndexMap &OptionalIdx) { 7686 unsigned Opc = Inst.getOpcode(); 7687 7688 unsigned I = 1; 7689 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7690 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7691 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7692 } 7693 7694 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7695 // This instruction has src modifiers 7696 for (unsigned E = Operands.size(); I != E; ++I) { 7697 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7698 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7699 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7700 } else if (Op.isImmModifier()) { 7701 OptionalIdx[Op.getImmTy()] = I; 7702 } else if (Op.isRegOrImm()) { 7703 Op.addRegOrImmOperands(Inst, 1); 7704 } else { 7705 llvm_unreachable("unhandled operand type"); 7706 } 7707 } 7708 } else { 7709 // No src modifiers 7710 for (unsigned E = Operands.size(); I != E; ++I) { 7711 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7712 if (Op.isMod()) { 7713 OptionalIdx[Op.getImmTy()] = I; 7714 } else { 7715 Op.addRegOrImmOperands(Inst, 1); 7716 } 7717 } 7718 } 7719 7720 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7722 } 7723 7724 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7726 } 7727 7728 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7729 // it has src2 register operand that is tied to dst operand 7730 // we don't allow modifiers for this operand in assembler so src2_modifiers 7731 // should be 0. 7732 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7733 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7734 Opc == AMDGPU::V_MAC_F32_e64_vi || 7735 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7736 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7737 Opc == AMDGPU::V_MAC_F16_e64_vi || 7738 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7739 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7740 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7741 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7742 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7743 auto it = Inst.begin(); 7744 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7745 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7746 ++it; 7747 // Copy the operand to ensure it's not invalidated when Inst grows. 7748 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7749 } 7750 } 7751 7752 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7753 OptionalImmIndexMap OptionalIdx; 7754 cvtVOP3(Inst, Operands, OptionalIdx); 7755 } 7756 7757 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7758 OptionalImmIndexMap &OptIdx) { 7759 const int Opc = Inst.getOpcode(); 7760 const MCInstrDesc &Desc = MII.get(Opc); 7761 7762 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7763 7764 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7765 assert(!IsPacked); 7766 Inst.addOperand(Inst.getOperand(0)); 7767 } 7768 7769 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7770 // instruction, and then figure out where to actually put the modifiers 7771 7772 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7773 if (OpSelIdx != -1) { 7774 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7775 } 7776 7777 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7778 if (OpSelHiIdx != -1) { 7779 int DefaultVal = IsPacked ? -1 : 0; 7780 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7781 DefaultVal); 7782 } 7783 7784 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7785 if (NegLoIdx != -1) { 7786 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7787 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7788 } 7789 7790 const int Ops[] = { AMDGPU::OpName::src0, 7791 AMDGPU::OpName::src1, 7792 AMDGPU::OpName::src2 }; 7793 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7794 AMDGPU::OpName::src1_modifiers, 7795 AMDGPU::OpName::src2_modifiers }; 7796 7797 unsigned OpSel = 0; 7798 unsigned OpSelHi = 0; 7799 unsigned NegLo = 0; 7800 unsigned NegHi = 0; 7801 7802 if (OpSelIdx != -1) 7803 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7804 7805 if (OpSelHiIdx != -1) 7806 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7807 7808 if (NegLoIdx != -1) { 7809 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7810 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7811 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7812 } 7813 7814 for (int J = 0; J < 3; ++J) { 7815 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7816 if (OpIdx == -1) 7817 break; 7818 7819 uint32_t ModVal = 0; 7820 7821 if ((OpSel & (1 << J)) != 0) 7822 ModVal |= SISrcMods::OP_SEL_0; 7823 7824 if ((OpSelHi & (1 << J)) != 0) 7825 ModVal |= SISrcMods::OP_SEL_1; 7826 7827 if ((NegLo & (1 << J)) != 0) 7828 ModVal |= SISrcMods::NEG; 7829 7830 if ((NegHi & (1 << J)) != 0) 7831 ModVal |= SISrcMods::NEG_HI; 7832 7833 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7834 7835 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7836 } 7837 } 7838 7839 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7840 OptionalImmIndexMap OptIdx; 7841 cvtVOP3(Inst, Operands, OptIdx); 7842 cvtVOP3P(Inst, Operands, OptIdx); 7843 } 7844 7845 //===----------------------------------------------------------------------===// 7846 // dpp 7847 //===----------------------------------------------------------------------===// 7848 7849 bool AMDGPUOperand::isDPP8() const { 7850 return isImmTy(ImmTyDPP8); 7851 } 7852 7853 bool AMDGPUOperand::isDPPCtrl() const { 7854 using namespace AMDGPU::DPP; 7855 7856 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7857 if (result) { 7858 int64_t Imm = getImm(); 7859 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7860 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7861 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7862 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7863 (Imm == DppCtrl::WAVE_SHL1) || 7864 (Imm == DppCtrl::WAVE_ROL1) || 7865 (Imm == DppCtrl::WAVE_SHR1) || 7866 (Imm == DppCtrl::WAVE_ROR1) || 7867 (Imm == DppCtrl::ROW_MIRROR) || 7868 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7869 (Imm == DppCtrl::BCAST15) || 7870 (Imm == DppCtrl::BCAST31) || 7871 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7872 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7873 } 7874 return false; 7875 } 7876 7877 //===----------------------------------------------------------------------===// 7878 // mAI 7879 //===----------------------------------------------------------------------===// 7880 7881 bool AMDGPUOperand::isBLGP() const { 7882 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7883 } 7884 7885 bool AMDGPUOperand::isCBSZ() const { 7886 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7887 } 7888 7889 bool AMDGPUOperand::isABID() const { 7890 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7891 } 7892 7893 bool AMDGPUOperand::isS16Imm() const { 7894 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7895 } 7896 7897 bool AMDGPUOperand::isU16Imm() const { 7898 return isImm() && isUInt<16>(getImm()); 7899 } 7900 7901 //===----------------------------------------------------------------------===// 7902 // dim 7903 //===----------------------------------------------------------------------===// 7904 7905 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7906 // We want to allow "dim:1D" etc., 7907 // but the initial 1 is tokenized as an integer. 7908 std::string Token; 7909 if (isToken(AsmToken::Integer)) { 7910 SMLoc Loc = getToken().getEndLoc(); 7911 Token = std::string(getTokenStr()); 7912 lex(); 7913 if (getLoc() != Loc) 7914 return false; 7915 } 7916 7917 StringRef Suffix; 7918 if (!parseId(Suffix)) 7919 return false; 7920 Token += Suffix; 7921 7922 StringRef DimId = Token; 7923 if (DimId.startswith("SQ_RSRC_IMG_")) 7924 DimId = DimId.drop_front(12); 7925 7926 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7927 if (!DimInfo) 7928 return false; 7929 7930 Encoding = DimInfo->Encoding; 7931 return true; 7932 } 7933 7934 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7935 if (!isGFX10Plus()) 7936 return MatchOperand_NoMatch; 7937 7938 SMLoc S = getLoc(); 7939 7940 if (!trySkipId("dim", AsmToken::Colon)) 7941 return MatchOperand_NoMatch; 7942 7943 unsigned Encoding; 7944 SMLoc Loc = getLoc(); 7945 if (!parseDimId(Encoding)) { 7946 Error(Loc, "invalid dim value"); 7947 return MatchOperand_ParseFail; 7948 } 7949 7950 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7951 AMDGPUOperand::ImmTyDim)); 7952 return MatchOperand_Success; 7953 } 7954 7955 //===----------------------------------------------------------------------===// 7956 // dpp 7957 //===----------------------------------------------------------------------===// 7958 7959 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7960 SMLoc S = getLoc(); 7961 7962 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7963 return MatchOperand_NoMatch; 7964 7965 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7966 7967 int64_t Sels[8]; 7968 7969 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7970 return MatchOperand_ParseFail; 7971 7972 for (size_t i = 0; i < 8; ++i) { 7973 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7974 return MatchOperand_ParseFail; 7975 7976 SMLoc Loc = getLoc(); 7977 if (getParser().parseAbsoluteExpression(Sels[i])) 7978 return MatchOperand_ParseFail; 7979 if (0 > Sels[i] || 7 < Sels[i]) { 7980 Error(Loc, "expected a 3-bit value"); 7981 return MatchOperand_ParseFail; 7982 } 7983 } 7984 7985 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7986 return MatchOperand_ParseFail; 7987 7988 unsigned DPP8 = 0; 7989 for (size_t i = 0; i < 8; ++i) 7990 DPP8 |= (Sels[i] << (i * 3)); 7991 7992 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7993 return MatchOperand_Success; 7994 } 7995 7996 bool 7997 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7998 const OperandVector &Operands) { 7999 if (Ctrl == "row_newbcast") 8000 return isGFX90A(); 8001 8002 if (Ctrl == "row_share" || 8003 Ctrl == "row_xmask") 8004 return isGFX10Plus(); 8005 8006 if (Ctrl == "wave_shl" || 8007 Ctrl == "wave_shr" || 8008 Ctrl == "wave_rol" || 8009 Ctrl == "wave_ror" || 8010 Ctrl == "row_bcast") 8011 return isVI() || isGFX9(); 8012 8013 return Ctrl == "row_mirror" || 8014 Ctrl == "row_half_mirror" || 8015 Ctrl == "quad_perm" || 8016 Ctrl == "row_shl" || 8017 Ctrl == "row_shr" || 8018 Ctrl == "row_ror"; 8019 } 8020 8021 int64_t 8022 AMDGPUAsmParser::parseDPPCtrlPerm() { 8023 // quad_perm:[%d,%d,%d,%d] 8024 8025 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8026 return -1; 8027 8028 int64_t Val = 0; 8029 for (int i = 0; i < 4; ++i) { 8030 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8031 return -1; 8032 8033 int64_t Temp; 8034 SMLoc Loc = getLoc(); 8035 if (getParser().parseAbsoluteExpression(Temp)) 8036 return -1; 8037 if (Temp < 0 || Temp > 3) { 8038 Error(Loc, "expected a 2-bit value"); 8039 return -1; 8040 } 8041 8042 Val += (Temp << i * 2); 8043 } 8044 8045 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8046 return -1; 8047 8048 return Val; 8049 } 8050 8051 int64_t 8052 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8053 using namespace AMDGPU::DPP; 8054 8055 // sel:%d 8056 8057 int64_t Val; 8058 SMLoc Loc = getLoc(); 8059 8060 if (getParser().parseAbsoluteExpression(Val)) 8061 return -1; 8062 8063 struct DppCtrlCheck { 8064 int64_t Ctrl; 8065 int Lo; 8066 int Hi; 8067 }; 8068 8069 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8070 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8071 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8072 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8073 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8074 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8075 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8076 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8077 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8078 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8079 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8080 .Default({-1, 0, 0}); 8081 8082 bool Valid; 8083 if (Check.Ctrl == -1) { 8084 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8085 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8086 } else { 8087 Valid = Check.Lo <= Val && Val <= Check.Hi; 8088 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8089 } 8090 8091 if (!Valid) { 8092 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8093 return -1; 8094 } 8095 8096 return Val; 8097 } 8098 8099 OperandMatchResultTy 8100 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8101 using namespace AMDGPU::DPP; 8102 8103 if (!isToken(AsmToken::Identifier) || 8104 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8105 return MatchOperand_NoMatch; 8106 8107 SMLoc S = getLoc(); 8108 int64_t Val = -1; 8109 StringRef Ctrl; 8110 8111 parseId(Ctrl); 8112 8113 if (Ctrl == "row_mirror") { 8114 Val = DppCtrl::ROW_MIRROR; 8115 } else if (Ctrl == "row_half_mirror") { 8116 Val = DppCtrl::ROW_HALF_MIRROR; 8117 } else { 8118 if (skipToken(AsmToken::Colon, "expected a colon")) { 8119 if (Ctrl == "quad_perm") { 8120 Val = parseDPPCtrlPerm(); 8121 } else { 8122 Val = parseDPPCtrlSel(Ctrl); 8123 } 8124 } 8125 } 8126 8127 if (Val == -1) 8128 return MatchOperand_ParseFail; 8129 8130 Operands.push_back( 8131 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8132 return MatchOperand_Success; 8133 } 8134 8135 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8136 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8137 } 8138 8139 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8140 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8141 } 8142 8143 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8144 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8145 } 8146 8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8148 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8149 } 8150 8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8152 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8153 } 8154 8155 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8156 OptionalImmIndexMap OptionalIdx; 8157 8158 unsigned Opc = Inst.getOpcode(); 8159 bool HasModifiers = 8160 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8161 unsigned I = 1; 8162 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8163 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8164 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8165 } 8166 8167 int Fi = 0; 8168 for (unsigned E = Operands.size(); I != E; ++I) { 8169 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8170 MCOI::TIED_TO); 8171 if (TiedTo != -1) { 8172 assert((unsigned)TiedTo < Inst.getNumOperands()); 8173 // handle tied old or src2 for MAC instructions 8174 Inst.addOperand(Inst.getOperand(TiedTo)); 8175 } 8176 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8177 // Add the register arguments 8178 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8179 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8180 // Skip it. 8181 continue; 8182 } 8183 8184 if (IsDPP8) { 8185 if (Op.isDPP8()) { 8186 Op.addImmOperands(Inst, 1); 8187 } else if (HasModifiers && 8188 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8189 Op.addRegWithFPInputModsOperands(Inst, 2); 8190 } else if (Op.isFI()) { 8191 Fi = Op.getImm(); 8192 } else if (Op.isReg()) { 8193 Op.addRegOperands(Inst, 1); 8194 } else { 8195 llvm_unreachable("Invalid operand type"); 8196 } 8197 } else { 8198 if (HasModifiers && 8199 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8200 Op.addRegWithFPInputModsOperands(Inst, 2); 8201 } else if (Op.isReg()) { 8202 Op.addRegOperands(Inst, 1); 8203 } else if (Op.isDPPCtrl()) { 8204 Op.addImmOperands(Inst, 1); 8205 } else if (Op.isImm()) { 8206 // Handle optional arguments 8207 OptionalIdx[Op.getImmTy()] = I; 8208 } else { 8209 llvm_unreachable("Invalid operand type"); 8210 } 8211 } 8212 } 8213 8214 if (IsDPP8) { 8215 using namespace llvm::AMDGPU::DPP; 8216 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8217 } else { 8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8221 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8222 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8223 } 8224 } 8225 } 8226 8227 //===----------------------------------------------------------------------===// 8228 // sdwa 8229 //===----------------------------------------------------------------------===// 8230 8231 OperandMatchResultTy 8232 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8233 AMDGPUOperand::ImmTy Type) { 8234 using namespace llvm::AMDGPU::SDWA; 8235 8236 SMLoc S = getLoc(); 8237 StringRef Value; 8238 OperandMatchResultTy res; 8239 8240 SMLoc StringLoc; 8241 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8242 if (res != MatchOperand_Success) { 8243 return res; 8244 } 8245 8246 int64_t Int; 8247 Int = StringSwitch<int64_t>(Value) 8248 .Case("BYTE_0", SdwaSel::BYTE_0) 8249 .Case("BYTE_1", SdwaSel::BYTE_1) 8250 .Case("BYTE_2", SdwaSel::BYTE_2) 8251 .Case("BYTE_3", SdwaSel::BYTE_3) 8252 .Case("WORD_0", SdwaSel::WORD_0) 8253 .Case("WORD_1", SdwaSel::WORD_1) 8254 .Case("DWORD", SdwaSel::DWORD) 8255 .Default(0xffffffff); 8256 8257 if (Int == 0xffffffff) { 8258 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8259 return MatchOperand_ParseFail; 8260 } 8261 8262 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8263 return MatchOperand_Success; 8264 } 8265 8266 OperandMatchResultTy 8267 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8268 using namespace llvm::AMDGPU::SDWA; 8269 8270 SMLoc S = getLoc(); 8271 StringRef Value; 8272 OperandMatchResultTy res; 8273 8274 SMLoc StringLoc; 8275 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8276 if (res != MatchOperand_Success) { 8277 return res; 8278 } 8279 8280 int64_t Int; 8281 Int = StringSwitch<int64_t>(Value) 8282 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8283 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8284 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8285 .Default(0xffffffff); 8286 8287 if (Int == 0xffffffff) { 8288 Error(StringLoc, "invalid dst_unused value"); 8289 return MatchOperand_ParseFail; 8290 } 8291 8292 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8293 return MatchOperand_Success; 8294 } 8295 8296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8297 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8298 } 8299 8300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8301 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8302 } 8303 8304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8305 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8306 } 8307 8308 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8309 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8310 } 8311 8312 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8313 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8314 } 8315 8316 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8317 uint64_t BasicInstType, 8318 bool SkipDstVcc, 8319 bool SkipSrcVcc) { 8320 using namespace llvm::AMDGPU::SDWA; 8321 8322 OptionalImmIndexMap OptionalIdx; 8323 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8324 bool SkippedVcc = false; 8325 8326 unsigned I = 1; 8327 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8328 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8329 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8330 } 8331 8332 for (unsigned E = Operands.size(); I != E; ++I) { 8333 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8334 if (SkipVcc && !SkippedVcc && Op.isReg() && 8335 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8336 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8337 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8338 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8339 // Skip VCC only if we didn't skip it on previous iteration. 8340 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8341 if (BasicInstType == SIInstrFlags::VOP2 && 8342 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8343 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8344 SkippedVcc = true; 8345 continue; 8346 } else if (BasicInstType == SIInstrFlags::VOPC && 8347 Inst.getNumOperands() == 0) { 8348 SkippedVcc = true; 8349 continue; 8350 } 8351 } 8352 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8353 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8354 } else if (Op.isImm()) { 8355 // Handle optional arguments 8356 OptionalIdx[Op.getImmTy()] = I; 8357 } else { 8358 llvm_unreachable("Invalid operand type"); 8359 } 8360 SkippedVcc = false; 8361 } 8362 8363 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8364 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8365 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8366 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8367 switch (BasicInstType) { 8368 case SIInstrFlags::VOP1: 8369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8370 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8371 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8372 } 8373 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8376 break; 8377 8378 case SIInstrFlags::VOP2: 8379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8380 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8382 } 8383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8385 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8387 break; 8388 8389 case SIInstrFlags::VOPC: 8390 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8394 break; 8395 8396 default: 8397 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8398 } 8399 } 8400 8401 // special case v_mac_{f16, f32}: 8402 // it has src2 register operand that is tied to dst operand 8403 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8404 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8405 auto it = Inst.begin(); 8406 std::advance( 8407 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8408 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8409 } 8410 } 8411 8412 //===----------------------------------------------------------------------===// 8413 // mAI 8414 //===----------------------------------------------------------------------===// 8415 8416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8417 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8418 } 8419 8420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8421 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8422 } 8423 8424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8425 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8426 } 8427 8428 /// Force static initialization. 8429 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8430 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8431 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8432 } 8433 8434 #define GET_REGISTER_MATCHER 8435 #define GET_MATCHER_IMPLEMENTATION 8436 #define GET_MNEMONIC_SPELL_CHECKER 8437 #define GET_MNEMONIC_CHECKER 8438 #include "AMDGPUGenAsmMatcher.inc" 8439 8440 // This function should be defined after auto-generated include so that we have 8441 // MatchClassKind enum defined 8442 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8443 unsigned Kind) { 8444 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8445 // But MatchInstructionImpl() expects to meet token and fails to validate 8446 // operand. This method checks if we are given immediate operand but expect to 8447 // get corresponding token. 8448 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8449 switch (Kind) { 8450 case MCK_addr64: 8451 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8452 case MCK_gds: 8453 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8454 case MCK_lds: 8455 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8456 case MCK_idxen: 8457 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8458 case MCK_offen: 8459 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8460 case MCK_SSrcB32: 8461 // When operands have expression values, they will return true for isToken, 8462 // because it is not possible to distinguish between a token and an 8463 // expression at parse time. MatchInstructionImpl() will always try to 8464 // match an operand as a token, when isToken returns true, and when the 8465 // name of the expression is not a valid token, the match will fail, 8466 // so we need to handle it here. 8467 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8468 case MCK_SSrcF32: 8469 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8470 case MCK_SoppBrTarget: 8471 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8472 case MCK_VReg32OrOff: 8473 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8474 case MCK_InterpSlot: 8475 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8476 case MCK_Attr: 8477 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8478 case MCK_AttrChan: 8479 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8480 case MCK_ImmSMEMOffset: 8481 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8482 case MCK_SReg_64: 8483 case MCK_SReg_64_XEXEC: 8484 // Null is defined as a 32-bit register but 8485 // it should also be enabled with 64-bit operands. 8486 // The following code enables it for SReg_64 operands 8487 // used as source and destination. Remaining source 8488 // operands are handled in isInlinableImm. 8489 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8490 default: 8491 return Match_InvalidOperand; 8492 } 8493 } 8494 8495 //===----------------------------------------------------------------------===// 8496 // endpgm 8497 //===----------------------------------------------------------------------===// 8498 8499 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8500 SMLoc S = getLoc(); 8501 int64_t Imm = 0; 8502 8503 if (!parseExpr(Imm)) { 8504 // The operand is optional, if not present default to 0 8505 Imm = 0; 8506 } 8507 8508 if (!isUInt<16>(Imm)) { 8509 Error(S, "expected a 16-bit value"); 8510 return MatchOperand_ParseFail; 8511 } 8512 8513 Operands.push_back( 8514 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8515 return MatchOperand_Success; 8516 } 8517 8518 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8519