1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/TargetParser.h" 39 40 using namespace llvm; 41 using namespace llvm::AMDGPU; 42 using namespace llvm::amdhsa; 43 44 namespace { 45 46 class AMDGPUAsmParser; 47 48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 49 50 //===----------------------------------------------------------------------===// 51 // Operand 52 //===----------------------------------------------------------------------===// 53 54 class AMDGPUOperand : public MCParsedAsmOperand { 55 enum KindTy { 56 Token, 57 Immediate, 58 Register, 59 Expression 60 } Kind; 61 62 SMLoc StartLoc, EndLoc; 63 const AMDGPUAsmParser *AsmParser; 64 65 public: 66 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 67 : Kind(Kind_), AsmParser(AsmParser_) {} 68 69 using Ptr = std::unique_ptr<AMDGPUOperand>; 70 71 struct Modifiers { 72 bool Abs = false; 73 bool Neg = false; 74 bool Sext = false; 75 76 bool hasFPModifiers() const { return Abs || Neg; } 77 bool hasIntModifiers() const { return Sext; } 78 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 79 80 int64_t getFPModifiersOperand() const { 81 int64_t Operand = 0; 82 Operand |= Abs ? SISrcMods::ABS : 0u; 83 Operand |= Neg ? SISrcMods::NEG : 0u; 84 return Operand; 85 } 86 87 int64_t getIntModifiersOperand() const { 88 int64_t Operand = 0; 89 Operand |= Sext ? SISrcMods::SEXT : 0u; 90 return Operand; 91 } 92 93 int64_t getModifiersOperand() const { 94 assert(!(hasFPModifiers() && hasIntModifiers()) 95 && "fp and int modifiers should not be used simultaneously"); 96 if (hasFPModifiers()) { 97 return getFPModifiersOperand(); 98 } else if (hasIntModifiers()) { 99 return getIntModifiersOperand(); 100 } else { 101 return 0; 102 } 103 } 104 105 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 106 }; 107 108 enum ImmTy { 109 ImmTyNone, 110 ImmTyGDS, 111 ImmTyLDS, 112 ImmTyOffen, 113 ImmTyIdxen, 114 ImmTyAddr64, 115 ImmTyOffset, 116 ImmTyInstOffset, 117 ImmTyOffset0, 118 ImmTyOffset1, 119 ImmTyCPol, 120 ImmTySWZ, 121 ImmTyTFE, 122 ImmTyD16, 123 ImmTyClampSI, 124 ImmTyOModSI, 125 ImmTyDPP8, 126 ImmTyDppCtrl, 127 ImmTyDppRowMask, 128 ImmTyDppBankMask, 129 ImmTyDppBoundCtrl, 130 ImmTyDppFi, 131 ImmTySdwaDstSel, 132 ImmTySdwaSrc0Sel, 133 ImmTySdwaSrc1Sel, 134 ImmTySdwaDstUnused, 135 ImmTyDMask, 136 ImmTyDim, 137 ImmTyUNorm, 138 ImmTyDA, 139 ImmTyR128A16, 140 ImmTyA16, 141 ImmTyLWE, 142 ImmTyExpTgt, 143 ImmTyExpCompr, 144 ImmTyExpVM, 145 ImmTyFORMAT, 146 ImmTyHwreg, 147 ImmTyOff, 148 ImmTySendMsg, 149 ImmTyInterpSlot, 150 ImmTyInterpAttr, 151 ImmTyAttrChan, 152 ImmTyOpSel, 153 ImmTyOpSelHi, 154 ImmTyNegLo, 155 ImmTyNegHi, 156 ImmTySwizzle, 157 ImmTyGprIdxMode, 158 ImmTyHigh, 159 ImmTyBLGP, 160 ImmTyCBSZ, 161 ImmTyABID, 162 ImmTyEndpgm, 163 }; 164 165 enum ImmKindTy { 166 ImmKindTyNone, 167 ImmKindTyLiteral, 168 ImmKindTyConst, 169 }; 170 171 private: 172 struct TokOp { 173 const char *Data; 174 unsigned Length; 175 }; 176 177 struct ImmOp { 178 int64_t Val; 179 ImmTy Type; 180 bool IsFPImm; 181 mutable ImmKindTy Kind; 182 Modifiers Mods; 183 }; 184 185 struct RegOp { 186 unsigned RegNo; 187 Modifiers Mods; 188 }; 189 190 union { 191 TokOp Tok; 192 ImmOp Imm; 193 RegOp Reg; 194 const MCExpr *Expr; 195 }; 196 197 public: 198 bool isToken() const override { 199 if (Kind == Token) 200 return true; 201 202 // When parsing operands, we can't always tell if something was meant to be 203 // a token, like 'gds', or an expression that references a global variable. 204 // In this case, we assume the string is an expression, and if we need to 205 // interpret is a token, then we treat the symbol name as the token. 206 return isSymbolRefExpr(); 207 } 208 209 bool isSymbolRefExpr() const { 210 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 211 } 212 213 bool isImm() const override { 214 return Kind == Immediate; 215 } 216 217 void setImmKindNone() const { 218 assert(isImm()); 219 Imm.Kind = ImmKindTyNone; 220 } 221 222 void setImmKindLiteral() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyLiteral; 225 } 226 227 void setImmKindConst() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyConst; 230 } 231 232 bool IsImmKindLiteral() const { 233 return isImm() && Imm.Kind == ImmKindTyLiteral; 234 } 235 236 bool isImmKindConst() const { 237 return isImm() && Imm.Kind == ImmKindTyConst; 238 } 239 240 bool isInlinableImm(MVT type) const; 241 bool isLiteralImm(MVT type) const; 242 243 bool isRegKind() const { 244 return Kind == Register; 245 } 246 247 bool isReg() const override { 248 return isRegKind() && !hasModifiers(); 249 } 250 251 bool isRegOrInline(unsigned RCID, MVT type) const { 252 return isRegClass(RCID) || isInlinableImm(type); 253 } 254 255 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 256 return isRegOrInline(RCID, type) || isLiteralImm(type); 257 } 258 259 bool isRegOrImmWithInt16InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 261 } 262 263 bool isRegOrImmWithInt32InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 265 } 266 267 bool isRegOrImmWithInt64InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 269 } 270 271 bool isRegOrImmWithFP16InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 273 } 274 275 bool isRegOrImmWithFP32InputMods() const { 276 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 277 } 278 279 bool isRegOrImmWithFP64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 281 } 282 283 bool isVReg() const { 284 return isRegClass(AMDGPU::VGPR_32RegClassID) || 285 isRegClass(AMDGPU::VReg_64RegClassID) || 286 isRegClass(AMDGPU::VReg_96RegClassID) || 287 isRegClass(AMDGPU::VReg_128RegClassID) || 288 isRegClass(AMDGPU::VReg_160RegClassID) || 289 isRegClass(AMDGPU::VReg_192RegClassID) || 290 isRegClass(AMDGPU::VReg_256RegClassID) || 291 isRegClass(AMDGPU::VReg_512RegClassID) || 292 isRegClass(AMDGPU::VReg_1024RegClassID); 293 } 294 295 bool isVReg32() const { 296 return isRegClass(AMDGPU::VGPR_32RegClassID); 297 } 298 299 bool isVReg32OrOff() const { 300 return isOff() || isVReg32(); 301 } 302 303 bool isNull() const { 304 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 305 } 306 307 bool isVRegWithInputMods() const; 308 309 bool isSDWAOperand(MVT type) const; 310 bool isSDWAFP16Operand() const; 311 bool isSDWAFP32Operand() const; 312 bool isSDWAInt16Operand() const; 313 bool isSDWAInt32Operand() const; 314 315 bool isImmTy(ImmTy ImmT) const { 316 return isImm() && Imm.Type == ImmT; 317 } 318 319 bool isImmModifier() const { 320 return isImm() && Imm.Type != ImmTyNone; 321 } 322 323 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 324 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 325 bool isDMask() const { return isImmTy(ImmTyDMask); } 326 bool isDim() const { return isImmTy(ImmTyDim); } 327 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 328 bool isDA() const { return isImmTy(ImmTyDA); } 329 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 330 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 331 bool isLWE() const { return isImmTy(ImmTyLWE); } 332 bool isOff() const { return isImmTy(ImmTyOff); } 333 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 334 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 335 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 336 bool isOffen() const { return isImmTy(ImmTyOffen); } 337 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 338 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 339 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 340 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 341 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 342 343 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 344 bool isGDS() const { return isImmTy(ImmTyGDS); } 345 bool isLDS() const { return isImmTy(ImmTyLDS); } 346 bool isCPol() const { return isImmTy(ImmTyCPol); } 347 bool isSWZ() const { return isImmTy(ImmTySWZ); } 348 bool isTFE() const { return isImmTy(ImmTyTFE); } 349 bool isD16() const { return isImmTy(ImmTyD16); } 350 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 351 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 352 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 353 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 354 bool isFI() const { return isImmTy(ImmTyDppFi); } 355 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 356 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 357 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 358 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 359 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 360 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 361 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 362 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 363 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 364 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 365 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 366 bool isHigh() const { return isImmTy(ImmTyHigh); } 367 368 bool isMod() const { 369 return isClampSI() || isOModSI(); 370 } 371 372 bool isRegOrImm() const { 373 return isReg() || isImm(); 374 } 375 376 bool isRegClass(unsigned RCID) const; 377 378 bool isInlineValue() const; 379 380 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 381 return isRegOrInline(RCID, type) && !hasModifiers(); 382 } 383 384 bool isSCSrcB16() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 386 } 387 388 bool isSCSrcV2B16() const { 389 return isSCSrcB16(); 390 } 391 392 bool isSCSrcB32() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 394 } 395 396 bool isSCSrcB64() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 398 } 399 400 bool isBoolReg() const; 401 402 bool isSCSrcF16() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 404 } 405 406 bool isSCSrcV2F16() const { 407 return isSCSrcF16(); 408 } 409 410 bool isSCSrcF32() const { 411 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 412 } 413 414 bool isSCSrcF64() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 416 } 417 418 bool isSSrcB32() const { 419 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 420 } 421 422 bool isSSrcB16() const { 423 return isSCSrcB16() || isLiteralImm(MVT::i16); 424 } 425 426 bool isSSrcV2B16() const { 427 llvm_unreachable("cannot happen"); 428 return isSSrcB16(); 429 } 430 431 bool isSSrcB64() const { 432 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 433 // See isVSrc64(). 434 return isSCSrcB64() || isLiteralImm(MVT::i64); 435 } 436 437 bool isSSrcF32() const { 438 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 439 } 440 441 bool isSSrcF64() const { 442 return isSCSrcB64() || isLiteralImm(MVT::f64); 443 } 444 445 bool isSSrcF16() const { 446 return isSCSrcB16() || isLiteralImm(MVT::f16); 447 } 448 449 bool isSSrcV2F16() const { 450 llvm_unreachable("cannot happen"); 451 return isSSrcF16(); 452 } 453 454 bool isSSrcV2FP32() const { 455 llvm_unreachable("cannot happen"); 456 return isSSrcF32(); 457 } 458 459 bool isSCSrcV2FP32() const { 460 llvm_unreachable("cannot happen"); 461 return isSCSrcF32(); 462 } 463 464 bool isSSrcV2INT32() const { 465 llvm_unreachable("cannot happen"); 466 return isSSrcB32(); 467 } 468 469 bool isSCSrcV2INT32() const { 470 llvm_unreachable("cannot happen"); 471 return isSCSrcB32(); 472 } 473 474 bool isSSrcOrLdsB32() const { 475 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 476 isLiteralImm(MVT::i32) || isExpr(); 477 } 478 479 bool isVCSrcB32() const { 480 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 481 } 482 483 bool isVCSrcB64() const { 484 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 485 } 486 487 bool isVCSrcB16() const { 488 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 489 } 490 491 bool isVCSrcV2B16() const { 492 return isVCSrcB16(); 493 } 494 495 bool isVCSrcF32() const { 496 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 497 } 498 499 bool isVCSrcF64() const { 500 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 501 } 502 503 bool isVCSrcF16() const { 504 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 505 } 506 507 bool isVCSrcV2F16() const { 508 return isVCSrcF16(); 509 } 510 511 bool isVSrcB32() const { 512 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 513 } 514 515 bool isVSrcB64() const { 516 return isVCSrcF64() || isLiteralImm(MVT::i64); 517 } 518 519 bool isVSrcB16() const { 520 return isVCSrcB16() || isLiteralImm(MVT::i16); 521 } 522 523 bool isVSrcV2B16() const { 524 return isVSrcB16() || isLiteralImm(MVT::v2i16); 525 } 526 527 bool isVCSrcV2FP32() const { 528 return isVCSrcF64(); 529 } 530 531 bool isVSrcV2FP32() const { 532 return isVSrcF64() || isLiteralImm(MVT::v2f32); 533 } 534 535 bool isVCSrcV2INT32() const { 536 return isVCSrcB64(); 537 } 538 539 bool isVSrcV2INT32() const { 540 return isVSrcB64() || isLiteralImm(MVT::v2i32); 541 } 542 543 bool isVSrcF32() const { 544 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 545 } 546 547 bool isVSrcF64() const { 548 return isVCSrcF64() || isLiteralImm(MVT::f64); 549 } 550 551 bool isVSrcF16() const { 552 return isVCSrcF16() || isLiteralImm(MVT::f16); 553 } 554 555 bool isVSrcV2F16() const { 556 return isVSrcF16() || isLiteralImm(MVT::v2f16); 557 } 558 559 bool isVISrcB32() const { 560 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 561 } 562 563 bool isVISrcB16() const { 564 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 565 } 566 567 bool isVISrcV2B16() const { 568 return isVISrcB16(); 569 } 570 571 bool isVISrcF32() const { 572 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 573 } 574 575 bool isVISrcF16() const { 576 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 577 } 578 579 bool isVISrcV2F16() const { 580 return isVISrcF16() || isVISrcB32(); 581 } 582 583 bool isVISrc_64B64() const { 584 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 585 } 586 587 bool isVISrc_64F64() const { 588 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 589 } 590 591 bool isVISrc_64V2FP32() const { 592 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 593 } 594 595 bool isVISrc_64V2INT32() const { 596 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 597 } 598 599 bool isVISrc_256B64() const { 600 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 601 } 602 603 bool isVISrc_256F64() const { 604 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 605 } 606 607 bool isVISrc_128B16() const { 608 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 609 } 610 611 bool isVISrc_128V2B16() const { 612 return isVISrc_128B16(); 613 } 614 615 bool isVISrc_128B32() const { 616 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 617 } 618 619 bool isVISrc_128F32() const { 620 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 621 } 622 623 bool isVISrc_256V2FP32() const { 624 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 625 } 626 627 bool isVISrc_256V2INT32() const { 628 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 629 } 630 631 bool isVISrc_512B32() const { 632 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 633 } 634 635 bool isVISrc_512B16() const { 636 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 637 } 638 639 bool isVISrc_512V2B16() const { 640 return isVISrc_512B16(); 641 } 642 643 bool isVISrc_512F32() const { 644 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 645 } 646 647 bool isVISrc_512F16() const { 648 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 649 } 650 651 bool isVISrc_512V2F16() const { 652 return isVISrc_512F16() || isVISrc_512B32(); 653 } 654 655 bool isVISrc_1024B32() const { 656 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 657 } 658 659 bool isVISrc_1024B16() const { 660 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 661 } 662 663 bool isVISrc_1024V2B16() const { 664 return isVISrc_1024B16(); 665 } 666 667 bool isVISrc_1024F32() const { 668 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 669 } 670 671 bool isVISrc_1024F16() const { 672 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 673 } 674 675 bool isVISrc_1024V2F16() const { 676 return isVISrc_1024F16() || isVISrc_1024B32(); 677 } 678 679 bool isAISrcB32() const { 680 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 681 } 682 683 bool isAISrcB16() const { 684 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 685 } 686 687 bool isAISrcV2B16() const { 688 return isAISrcB16(); 689 } 690 691 bool isAISrcF32() const { 692 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 693 } 694 695 bool isAISrcF16() const { 696 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 697 } 698 699 bool isAISrcV2F16() const { 700 return isAISrcF16() || isAISrcB32(); 701 } 702 703 bool isAISrc_64B64() const { 704 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 705 } 706 707 bool isAISrc_64F64() const { 708 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 709 } 710 711 bool isAISrc_128B32() const { 712 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 713 } 714 715 bool isAISrc_128B16() const { 716 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 717 } 718 719 bool isAISrc_128V2B16() const { 720 return isAISrc_128B16(); 721 } 722 723 bool isAISrc_128F32() const { 724 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 725 } 726 727 bool isAISrc_128F16() const { 728 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 729 } 730 731 bool isAISrc_128V2F16() const { 732 return isAISrc_128F16() || isAISrc_128B32(); 733 } 734 735 bool isVISrc_128F16() const { 736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 737 } 738 739 bool isVISrc_128V2F16() const { 740 return isVISrc_128F16() || isVISrc_128B32(); 741 } 742 743 bool isAISrc_256B64() const { 744 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 745 } 746 747 bool isAISrc_256F64() const { 748 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 749 } 750 751 bool isAISrc_512B32() const { 752 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 753 } 754 755 bool isAISrc_512B16() const { 756 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 757 } 758 759 bool isAISrc_512V2B16() const { 760 return isAISrc_512B16(); 761 } 762 763 bool isAISrc_512F32() const { 764 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 765 } 766 767 bool isAISrc_512F16() const { 768 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 769 } 770 771 bool isAISrc_512V2F16() const { 772 return isAISrc_512F16() || isAISrc_512B32(); 773 } 774 775 bool isAISrc_1024B32() const { 776 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 777 } 778 779 bool isAISrc_1024B16() const { 780 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 781 } 782 783 bool isAISrc_1024V2B16() const { 784 return isAISrc_1024B16(); 785 } 786 787 bool isAISrc_1024F32() const { 788 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 789 } 790 791 bool isAISrc_1024F16() const { 792 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 793 } 794 795 bool isAISrc_1024V2F16() const { 796 return isAISrc_1024F16() || isAISrc_1024B32(); 797 } 798 799 bool isKImmFP32() const { 800 return isLiteralImm(MVT::f32); 801 } 802 803 bool isKImmFP16() const { 804 return isLiteralImm(MVT::f16); 805 } 806 807 bool isMem() const override { 808 return false; 809 } 810 811 bool isExpr() const { 812 return Kind == Expression; 813 } 814 815 bool isSoppBrTarget() const { 816 return isExpr() || isImm(); 817 } 818 819 bool isSWaitCnt() const; 820 bool isHwreg() const; 821 bool isSendMsg() const; 822 bool isSwizzle() const; 823 bool isSMRDOffset8() const; 824 bool isSMEMOffset() const; 825 bool isSMRDLiteralOffset() const; 826 bool isDPP8() const; 827 bool isDPPCtrl() const; 828 bool isBLGP() const; 829 bool isCBSZ() const; 830 bool isABID() const; 831 bool isGPRIdxMode() const; 832 bool isS16Imm() const; 833 bool isU16Imm() const; 834 bool isEndpgm() const; 835 836 StringRef getExpressionAsToken() const { 837 assert(isExpr()); 838 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 839 return S->getSymbol().getName(); 840 } 841 842 StringRef getToken() const { 843 assert(isToken()); 844 845 if (Kind == Expression) 846 return getExpressionAsToken(); 847 848 return StringRef(Tok.Data, Tok.Length); 849 } 850 851 int64_t getImm() const { 852 assert(isImm()); 853 return Imm.Val; 854 } 855 856 void setImm(int64_t Val) { 857 assert(isImm()); 858 Imm.Val = Val; 859 } 860 861 ImmTy getImmTy() const { 862 assert(isImm()); 863 return Imm.Type; 864 } 865 866 unsigned getReg() const override { 867 assert(isRegKind()); 868 return Reg.RegNo; 869 } 870 871 SMLoc getStartLoc() const override { 872 return StartLoc; 873 } 874 875 SMLoc getEndLoc() const override { 876 return EndLoc; 877 } 878 879 SMRange getLocRange() const { 880 return SMRange(StartLoc, EndLoc); 881 } 882 883 Modifiers getModifiers() const { 884 assert(isRegKind() || isImmTy(ImmTyNone)); 885 return isRegKind() ? Reg.Mods : Imm.Mods; 886 } 887 888 void setModifiers(Modifiers Mods) { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 if (isRegKind()) 891 Reg.Mods = Mods; 892 else 893 Imm.Mods = Mods; 894 } 895 896 bool hasModifiers() const { 897 return getModifiers().hasModifiers(); 898 } 899 900 bool hasFPModifiers() const { 901 return getModifiers().hasFPModifiers(); 902 } 903 904 bool hasIntModifiers() const { 905 return getModifiers().hasIntModifiers(); 906 } 907 908 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 909 910 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 911 912 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 913 914 template <unsigned Bitwidth> 915 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 916 917 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 918 addKImmFPOperands<16>(Inst, N); 919 } 920 921 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 922 addKImmFPOperands<32>(Inst, N); 923 } 924 925 void addRegOperands(MCInst &Inst, unsigned N) const; 926 927 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 928 addRegOperands(Inst, N); 929 } 930 931 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 932 if (isRegKind()) 933 addRegOperands(Inst, N); 934 else if (isExpr()) 935 Inst.addOperand(MCOperand::createExpr(Expr)); 936 else 937 addImmOperands(Inst, N); 938 } 939 940 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 941 Modifiers Mods = getModifiers(); 942 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 943 if (isRegKind()) { 944 addRegOperands(Inst, N); 945 } else { 946 addImmOperands(Inst, N, false); 947 } 948 } 949 950 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 951 assert(!hasIntModifiers()); 952 addRegOrImmWithInputModsOperands(Inst, N); 953 } 954 955 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasFPModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 961 Modifiers Mods = getModifiers(); 962 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 963 assert(isRegKind()); 964 addRegOperands(Inst, N); 965 } 966 967 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 968 assert(!hasIntModifiers()); 969 addRegWithInputModsOperands(Inst, N); 970 } 971 972 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasFPModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 978 if (isImm()) 979 addImmOperands(Inst, N); 980 else { 981 assert(isExpr()); 982 Inst.addOperand(MCOperand::createExpr(Expr)); 983 } 984 } 985 986 static void printImmTy(raw_ostream& OS, ImmTy Type) { 987 switch (Type) { 988 case ImmTyNone: OS << "None"; break; 989 case ImmTyGDS: OS << "GDS"; break; 990 case ImmTyLDS: OS << "LDS"; break; 991 case ImmTyOffen: OS << "Offen"; break; 992 case ImmTyIdxen: OS << "Idxen"; break; 993 case ImmTyAddr64: OS << "Addr64"; break; 994 case ImmTyOffset: OS << "Offset"; break; 995 case ImmTyInstOffset: OS << "InstOffset"; break; 996 case ImmTyOffset0: OS << "Offset0"; break; 997 case ImmTyOffset1: OS << "Offset1"; break; 998 case ImmTyCPol: OS << "CPol"; break; 999 case ImmTySWZ: OS << "SWZ"; break; 1000 case ImmTyTFE: OS << "TFE"; break; 1001 case ImmTyD16: OS << "D16"; break; 1002 case ImmTyFORMAT: OS << "FORMAT"; break; 1003 case ImmTyClampSI: OS << "ClampSI"; break; 1004 case ImmTyOModSI: OS << "OModSI"; break; 1005 case ImmTyDPP8: OS << "DPP8"; break; 1006 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1007 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1008 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1009 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1010 case ImmTyDppFi: OS << "FI"; break; 1011 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1012 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1013 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1014 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1015 case ImmTyDMask: OS << "DMask"; break; 1016 case ImmTyDim: OS << "Dim"; break; 1017 case ImmTyUNorm: OS << "UNorm"; break; 1018 case ImmTyDA: OS << "DA"; break; 1019 case ImmTyR128A16: OS << "R128A16"; break; 1020 case ImmTyA16: OS << "A16"; break; 1021 case ImmTyLWE: OS << "LWE"; break; 1022 case ImmTyOff: OS << "Off"; break; 1023 case ImmTyExpTgt: OS << "ExpTgt"; break; 1024 case ImmTyExpCompr: OS << "ExpCompr"; break; 1025 case ImmTyExpVM: OS << "ExpVM"; break; 1026 case ImmTyHwreg: OS << "Hwreg"; break; 1027 case ImmTySendMsg: OS << "SendMsg"; break; 1028 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1029 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1030 case ImmTyAttrChan: OS << "AttrChan"; break; 1031 case ImmTyOpSel: OS << "OpSel"; break; 1032 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1033 case ImmTyNegLo: OS << "NegLo"; break; 1034 case ImmTyNegHi: OS << "NegHi"; break; 1035 case ImmTySwizzle: OS << "Swizzle"; break; 1036 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1037 case ImmTyHigh: OS << "High"; break; 1038 case ImmTyBLGP: OS << "BLGP"; break; 1039 case ImmTyCBSZ: OS << "CBSZ"; break; 1040 case ImmTyABID: OS << "ABID"; break; 1041 case ImmTyEndpgm: OS << "Endpgm"; break; 1042 } 1043 } 1044 1045 void print(raw_ostream &OS) const override { 1046 switch (Kind) { 1047 case Register: 1048 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1049 break; 1050 case Immediate: 1051 OS << '<' << getImm(); 1052 if (getImmTy() != ImmTyNone) { 1053 OS << " type: "; printImmTy(OS, getImmTy()); 1054 } 1055 OS << " mods: " << Imm.Mods << '>'; 1056 break; 1057 case Token: 1058 OS << '\'' << getToken() << '\''; 1059 break; 1060 case Expression: 1061 OS << "<expr " << *Expr << '>'; 1062 break; 1063 } 1064 } 1065 1066 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1067 int64_t Val, SMLoc Loc, 1068 ImmTy Type = ImmTyNone, 1069 bool IsFPImm = false) { 1070 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1071 Op->Imm.Val = Val; 1072 Op->Imm.IsFPImm = IsFPImm; 1073 Op->Imm.Kind = ImmKindTyNone; 1074 Op->Imm.Type = Type; 1075 Op->Imm.Mods = Modifiers(); 1076 Op->StartLoc = Loc; 1077 Op->EndLoc = Loc; 1078 return Op; 1079 } 1080 1081 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1082 StringRef Str, SMLoc Loc, 1083 bool HasExplicitEncodingSize = true) { 1084 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1085 Res->Tok.Data = Str.data(); 1086 Res->Tok.Length = Str.size(); 1087 Res->StartLoc = Loc; 1088 Res->EndLoc = Loc; 1089 return Res; 1090 } 1091 1092 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1093 unsigned RegNo, SMLoc S, 1094 SMLoc E) { 1095 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1096 Op->Reg.RegNo = RegNo; 1097 Op->Reg.Mods = Modifiers(); 1098 Op->StartLoc = S; 1099 Op->EndLoc = E; 1100 return Op; 1101 } 1102 1103 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1104 const class MCExpr *Expr, SMLoc S) { 1105 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1106 Op->Expr = Expr; 1107 Op->StartLoc = S; 1108 Op->EndLoc = S; 1109 return Op; 1110 } 1111 }; 1112 1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1114 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1115 return OS; 1116 } 1117 1118 //===----------------------------------------------------------------------===// 1119 // AsmParser 1120 //===----------------------------------------------------------------------===// 1121 1122 // Holds info related to the current kernel, e.g. count of SGPRs used. 1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1124 // .amdgpu_hsa_kernel or at EOF. 1125 class KernelScopeInfo { 1126 int SgprIndexUnusedMin = -1; 1127 int VgprIndexUnusedMin = -1; 1128 int AgprIndexUnusedMin = -1; 1129 MCContext *Ctx = nullptr; 1130 MCSubtargetInfo const *MSTI = nullptr; 1131 1132 void usesSgprAt(int i) { 1133 if (i >= SgprIndexUnusedMin) { 1134 SgprIndexUnusedMin = ++i; 1135 if (Ctx) { 1136 MCSymbol* const Sym = 1137 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1138 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1139 } 1140 } 1141 } 1142 1143 void usesVgprAt(int i) { 1144 if (i >= VgprIndexUnusedMin) { 1145 VgprIndexUnusedMin = ++i; 1146 if (Ctx) { 1147 MCSymbol* const Sym = 1148 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1149 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1150 VgprIndexUnusedMin); 1151 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 void usesAgprAt(int i) { 1157 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1158 if (!hasMAIInsts(*MSTI)) 1159 return; 1160 1161 if (i >= AgprIndexUnusedMin) { 1162 AgprIndexUnusedMin = ++i; 1163 if (Ctx) { 1164 MCSymbol* const Sym = 1165 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1166 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1167 1168 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1169 MCSymbol* const vSym = 1170 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1171 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1172 VgprIndexUnusedMin); 1173 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1174 } 1175 } 1176 } 1177 1178 public: 1179 KernelScopeInfo() = default; 1180 1181 void initialize(MCContext &Context) { 1182 Ctx = &Context; 1183 MSTI = Ctx->getSubtargetInfo(); 1184 1185 usesSgprAt(SgprIndexUnusedMin = -1); 1186 usesVgprAt(VgprIndexUnusedMin = -1); 1187 if (hasMAIInsts(*MSTI)) { 1188 usesAgprAt(AgprIndexUnusedMin = -1); 1189 } 1190 } 1191 1192 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1193 switch (RegKind) { 1194 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1195 case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break; 1196 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1197 default: break; 1198 } 1199 } 1200 }; 1201 1202 class AMDGPUAsmParser : public MCTargetAsmParser { 1203 MCAsmParser &Parser; 1204 1205 // Number of extra operands parsed after the first optional operand. 1206 // This may be necessary to skip hardcoded mandatory operands. 1207 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1208 1209 unsigned ForcedEncodingSize = 0; 1210 bool ForcedDPP = false; 1211 bool ForcedSDWA = false; 1212 KernelScopeInfo KernelScope; 1213 unsigned CPolSeen; 1214 1215 /// @name Auto-generated Match Functions 1216 /// { 1217 1218 #define GET_ASSEMBLER_HEADER 1219 #include "AMDGPUGenAsmMatcher.inc" 1220 1221 /// } 1222 1223 private: 1224 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1225 bool OutOfRangeError(SMRange Range); 1226 /// Calculate VGPR/SGPR blocks required for given target, reserved 1227 /// registers, and user-specified NextFreeXGPR values. 1228 /// 1229 /// \param Features [in] Target features, used for bug corrections. 1230 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1231 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1232 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1233 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1234 /// descriptor field, if valid. 1235 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1236 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1237 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1238 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1239 /// \param VGPRBlocks [out] Result VGPR block count. 1240 /// \param SGPRBlocks [out] Result SGPR block count. 1241 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1242 bool FlatScrUsed, bool XNACKUsed, 1243 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1244 SMRange VGPRRange, unsigned NextFreeSGPR, 1245 SMRange SGPRRange, unsigned &VGPRBlocks, 1246 unsigned &SGPRBlocks); 1247 bool ParseDirectiveAMDGCNTarget(); 1248 bool ParseDirectiveAMDHSAKernel(); 1249 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1250 bool ParseDirectiveHSACodeObjectVersion(); 1251 bool ParseDirectiveHSACodeObjectISA(); 1252 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1253 bool ParseDirectiveAMDKernelCodeT(); 1254 // TODO: Possibly make subtargetHasRegister const. 1255 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1256 bool ParseDirectiveAMDGPUHsaKernel(); 1257 1258 bool ParseDirectiveISAVersion(); 1259 bool ParseDirectiveHSAMetadata(); 1260 bool ParseDirectivePALMetadataBegin(); 1261 bool ParseDirectivePALMetadata(); 1262 bool ParseDirectiveAMDGPULDS(); 1263 1264 /// Common code to parse out a block of text (typically YAML) between start and 1265 /// end directives. 1266 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1267 const char *AssemblerDirectiveEnd, 1268 std::string &CollectString); 1269 1270 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1271 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1272 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1273 unsigned &RegNum, unsigned &RegWidth, 1274 bool RestoreOnFailure = false); 1275 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1276 unsigned &RegNum, unsigned &RegWidth, 1277 SmallVectorImpl<AsmToken> &Tokens); 1278 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1279 unsigned &RegWidth, 1280 SmallVectorImpl<AsmToken> &Tokens); 1281 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1282 unsigned &RegWidth, 1283 SmallVectorImpl<AsmToken> &Tokens); 1284 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1285 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1286 bool ParseRegRange(unsigned& Num, unsigned& Width); 1287 unsigned getRegularReg(RegisterKind RegKind, 1288 unsigned RegNum, 1289 unsigned RegWidth, 1290 SMLoc Loc); 1291 1292 bool isRegister(); 1293 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1294 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1295 void initializeGprCountSymbol(RegisterKind RegKind); 1296 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1297 unsigned RegWidth); 1298 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1299 bool IsAtomic, bool IsLds = false); 1300 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1301 bool IsGdsHardcoded); 1302 1303 public: 1304 enum AMDGPUMatchResultTy { 1305 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1306 }; 1307 enum OperandMode { 1308 OperandMode_Default, 1309 OperandMode_NSA, 1310 }; 1311 1312 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1313 1314 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1315 const MCInstrInfo &MII, 1316 const MCTargetOptions &Options) 1317 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1318 MCAsmParserExtension::Initialize(Parser); 1319 1320 if (getFeatureBits().none()) { 1321 // Set default features. 1322 copySTI().ToggleFeature("southern-islands"); 1323 } 1324 1325 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1326 1327 { 1328 // TODO: make those pre-defined variables read-only. 1329 // Currently there is none suitable machinery in the core llvm-mc for this. 1330 // MCSymbol::isRedefinable is intended for another purpose, and 1331 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1332 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1333 MCContext &Ctx = getContext(); 1334 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1335 MCSymbol *Sym = 1336 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1337 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1338 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1339 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1340 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1341 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1342 } else { 1343 MCSymbol *Sym = 1344 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1345 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1346 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1350 } 1351 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1352 initializeGprCountSymbol(IS_VGPR); 1353 initializeGprCountSymbol(IS_SGPR); 1354 } else 1355 KernelScope.initialize(getContext()); 1356 } 1357 } 1358 1359 bool hasMIMG_R128() const { 1360 return AMDGPU::hasMIMG_R128(getSTI()); 1361 } 1362 1363 bool hasPackedD16() const { 1364 return AMDGPU::hasPackedD16(getSTI()); 1365 } 1366 1367 bool hasGFX10A16() const { 1368 return AMDGPU::hasGFX10A16(getSTI()); 1369 } 1370 1371 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1372 1373 bool isSI() const { 1374 return AMDGPU::isSI(getSTI()); 1375 } 1376 1377 bool isCI() const { 1378 return AMDGPU::isCI(getSTI()); 1379 } 1380 1381 bool isVI() const { 1382 return AMDGPU::isVI(getSTI()); 1383 } 1384 1385 bool isGFX9() const { 1386 return AMDGPU::isGFX9(getSTI()); 1387 } 1388 1389 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1390 bool isGFX90A() const { 1391 return AMDGPU::isGFX90A(getSTI()); 1392 } 1393 1394 bool isGFX940() const { 1395 return AMDGPU::isGFX940(getSTI()); 1396 } 1397 1398 bool isGFX9Plus() const { 1399 return AMDGPU::isGFX9Plus(getSTI()); 1400 } 1401 1402 bool isGFX10() const { 1403 return AMDGPU::isGFX10(getSTI()); 1404 } 1405 1406 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1407 1408 bool isGFX10_BEncoding() const { 1409 return AMDGPU::isGFX10_BEncoding(getSTI()); 1410 } 1411 1412 bool hasInv2PiInlineImm() const { 1413 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1414 } 1415 1416 bool hasFlatOffsets() const { 1417 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1418 } 1419 1420 bool hasArchitectedFlatScratch() const { 1421 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1422 } 1423 1424 bool hasSGPR102_SGPR103() const { 1425 return !isVI() && !isGFX9(); 1426 } 1427 1428 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1429 1430 bool hasIntClamp() const { 1431 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1432 } 1433 1434 AMDGPUTargetStreamer &getTargetStreamer() { 1435 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1436 return static_cast<AMDGPUTargetStreamer &>(TS); 1437 } 1438 1439 const MCRegisterInfo *getMRI() const { 1440 // We need this const_cast because for some reason getContext() is not const 1441 // in MCAsmParser. 1442 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1443 } 1444 1445 const MCInstrInfo *getMII() const { 1446 return &MII; 1447 } 1448 1449 const FeatureBitset &getFeatureBits() const { 1450 return getSTI().getFeatureBits(); 1451 } 1452 1453 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1454 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1455 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1456 1457 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1458 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1459 bool isForcedDPP() const { return ForcedDPP; } 1460 bool isForcedSDWA() const { return ForcedSDWA; } 1461 ArrayRef<unsigned> getMatchedVariants() const; 1462 StringRef getMatchedVariantName() const; 1463 1464 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1465 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1466 bool RestoreOnFailure); 1467 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1468 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1469 SMLoc &EndLoc) override; 1470 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1471 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1472 unsigned Kind) override; 1473 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1474 OperandVector &Operands, MCStreamer &Out, 1475 uint64_t &ErrorInfo, 1476 bool MatchingInlineAsm) override; 1477 bool ParseDirective(AsmToken DirectiveID) override; 1478 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1479 OperandMode Mode = OperandMode_Default); 1480 StringRef parseMnemonicSuffix(StringRef Name); 1481 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1482 SMLoc NameLoc, OperandVector &Operands) override; 1483 //bool ProcessInstruction(MCInst &Inst); 1484 1485 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1486 1487 OperandMatchResultTy 1488 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1489 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1490 bool (*ConvertResult)(int64_t &) = nullptr); 1491 1492 OperandMatchResultTy 1493 parseOperandArrayWithPrefix(const char *Prefix, 1494 OperandVector &Operands, 1495 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1496 bool (*ConvertResult)(int64_t&) = nullptr); 1497 1498 OperandMatchResultTy 1499 parseNamedBit(StringRef Name, OperandVector &Operands, 1500 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1501 OperandMatchResultTy parseCPol(OperandVector &Operands); 1502 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1503 StringRef &Value, 1504 SMLoc &StringLoc); 1505 1506 bool isModifier(); 1507 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1508 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1509 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1510 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1511 bool parseSP3NegModifier(); 1512 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1513 OperandMatchResultTy parseReg(OperandVector &Operands); 1514 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1515 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1516 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1517 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1518 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1519 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1520 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1521 OperandMatchResultTy parseUfmt(int64_t &Format); 1522 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1523 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1524 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1525 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1526 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1527 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1528 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1529 1530 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1531 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1532 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1533 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1534 1535 bool parseCnt(int64_t &IntVal); 1536 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1537 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1538 1539 private: 1540 struct OperandInfoTy { 1541 SMLoc Loc; 1542 int64_t Id; 1543 bool IsSymbolic = false; 1544 bool IsDefined = false; 1545 1546 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1547 }; 1548 1549 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1550 bool validateSendMsg(const OperandInfoTy &Msg, 1551 const OperandInfoTy &Op, 1552 const OperandInfoTy &Stream); 1553 1554 bool parseHwregBody(OperandInfoTy &HwReg, 1555 OperandInfoTy &Offset, 1556 OperandInfoTy &Width); 1557 bool validateHwreg(const OperandInfoTy &HwReg, 1558 const OperandInfoTy &Offset, 1559 const OperandInfoTy &Width); 1560 1561 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1562 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1563 1564 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1565 const OperandVector &Operands) const; 1566 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1567 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1568 SMLoc getLitLoc(const OperandVector &Operands) const; 1569 SMLoc getConstLoc(const OperandVector &Operands) const; 1570 1571 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1572 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1573 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1574 bool validateSOPLiteral(const MCInst &Inst) const; 1575 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1576 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1577 bool validateIntClampSupported(const MCInst &Inst); 1578 bool validateMIMGAtomicDMask(const MCInst &Inst); 1579 bool validateMIMGGatherDMask(const MCInst &Inst); 1580 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1581 bool validateMIMGDataSize(const MCInst &Inst); 1582 bool validateMIMGAddrSize(const MCInst &Inst); 1583 bool validateMIMGD16(const MCInst &Inst); 1584 bool validateMIMGDim(const MCInst &Inst); 1585 bool validateMIMGMSAA(const MCInst &Inst); 1586 bool validateOpSel(const MCInst &Inst); 1587 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1588 bool validateVccOperand(unsigned Reg) const; 1589 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1591 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateAGPRLdSt(const MCInst &Inst) const; 1593 bool validateVGPRAlign(const MCInst &Inst) const; 1594 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1595 bool validateDivScale(const MCInst &Inst); 1596 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1597 const SMLoc &IDLoc); 1598 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1599 unsigned getConstantBusLimit(unsigned Opcode) const; 1600 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1601 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1602 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1603 1604 bool isSupportedMnemo(StringRef Mnemo, 1605 const FeatureBitset &FBS); 1606 bool isSupportedMnemo(StringRef Mnemo, 1607 const FeatureBitset &FBS, 1608 ArrayRef<unsigned> Variants); 1609 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1610 1611 bool isId(const StringRef Id) const; 1612 bool isId(const AsmToken &Token, const StringRef Id) const; 1613 bool isToken(const AsmToken::TokenKind Kind) const; 1614 bool trySkipId(const StringRef Id); 1615 bool trySkipId(const StringRef Pref, const StringRef Id); 1616 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1617 bool trySkipToken(const AsmToken::TokenKind Kind); 1618 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1619 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1620 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1621 1622 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1623 AsmToken::TokenKind getTokenKind() const; 1624 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1625 bool parseExpr(OperandVector &Operands); 1626 StringRef getTokenStr() const; 1627 AsmToken peekToken(); 1628 AsmToken getToken() const; 1629 SMLoc getLoc() const; 1630 void lex(); 1631 1632 public: 1633 void onBeginOfFile() override; 1634 1635 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1636 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1637 1638 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1639 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1640 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1641 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1642 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1643 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1644 1645 bool parseSwizzleOperand(int64_t &Op, 1646 const unsigned MinVal, 1647 const unsigned MaxVal, 1648 const StringRef ErrMsg, 1649 SMLoc &Loc); 1650 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1651 const unsigned MinVal, 1652 const unsigned MaxVal, 1653 const StringRef ErrMsg); 1654 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1655 bool parseSwizzleOffset(int64_t &Imm); 1656 bool parseSwizzleMacro(int64_t &Imm); 1657 bool parseSwizzleQuadPerm(int64_t &Imm); 1658 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1659 bool parseSwizzleBroadcast(int64_t &Imm); 1660 bool parseSwizzleSwap(int64_t &Imm); 1661 bool parseSwizzleReverse(int64_t &Imm); 1662 1663 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1664 int64_t parseGPRIdxMacro(); 1665 1666 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1667 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1668 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1669 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1670 1671 AMDGPUOperand::Ptr defaultCPol() const; 1672 1673 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1674 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1675 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1676 AMDGPUOperand::Ptr defaultFlatOffset() const; 1677 1678 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1679 1680 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1681 OptionalImmIndexMap &OptionalIdx); 1682 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1683 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1684 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1685 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1686 OptionalImmIndexMap &OptionalIdx); 1687 1688 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1689 1690 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1691 bool IsAtomic = false); 1692 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1693 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1694 1695 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1696 1697 bool parseDimId(unsigned &Encoding); 1698 OperandMatchResultTy parseDim(OperandVector &Operands); 1699 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1700 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1701 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1702 int64_t parseDPPCtrlSel(StringRef Ctrl); 1703 int64_t parseDPPCtrlPerm(); 1704 AMDGPUOperand::Ptr defaultRowMask() const; 1705 AMDGPUOperand::Ptr defaultBankMask() const; 1706 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1707 AMDGPUOperand::Ptr defaultFI() const; 1708 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1709 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1710 1711 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1712 AMDGPUOperand::ImmTy Type); 1713 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1714 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1715 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1716 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1717 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1718 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1719 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1720 uint64_t BasicInstType, 1721 bool SkipDstVcc = false, 1722 bool SkipSrcVcc = false); 1723 1724 AMDGPUOperand::Ptr defaultBLGP() const; 1725 AMDGPUOperand::Ptr defaultCBSZ() const; 1726 AMDGPUOperand::Ptr defaultABID() const; 1727 1728 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1729 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1730 }; 1731 1732 struct OptionalOperand { 1733 const char *Name; 1734 AMDGPUOperand::ImmTy Type; 1735 bool IsBit; 1736 bool (*ConvertResult)(int64_t&); 1737 }; 1738 1739 } // end anonymous namespace 1740 1741 // May be called with integer type with equivalent bitwidth. 1742 static const fltSemantics *getFltSemantics(unsigned Size) { 1743 switch (Size) { 1744 case 4: 1745 return &APFloat::IEEEsingle(); 1746 case 8: 1747 return &APFloat::IEEEdouble(); 1748 case 2: 1749 return &APFloat::IEEEhalf(); 1750 default: 1751 llvm_unreachable("unsupported fp type"); 1752 } 1753 } 1754 1755 static const fltSemantics *getFltSemantics(MVT VT) { 1756 return getFltSemantics(VT.getSizeInBits() / 8); 1757 } 1758 1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1760 switch (OperandType) { 1761 case AMDGPU::OPERAND_REG_IMM_INT32: 1762 case AMDGPU::OPERAND_REG_IMM_FP32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1771 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1772 case AMDGPU::OPERAND_KIMM32: 1773 return &APFloat::IEEEsingle(); 1774 case AMDGPU::OPERAND_REG_IMM_INT64: 1775 case AMDGPU::OPERAND_REG_IMM_FP64: 1776 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1779 return &APFloat::IEEEdouble(); 1780 case AMDGPU::OPERAND_REG_IMM_INT16: 1781 case AMDGPU::OPERAND_REG_IMM_FP16: 1782 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1783 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1784 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1786 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1787 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1788 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1791 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1792 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1793 case AMDGPU::OPERAND_KIMM16: 1794 return &APFloat::IEEEhalf(); 1795 default: 1796 llvm_unreachable("unsupported fp type"); 1797 } 1798 } 1799 1800 //===----------------------------------------------------------------------===// 1801 // Operand 1802 //===----------------------------------------------------------------------===// 1803 1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1805 bool Lost; 1806 1807 // Convert literal to single precision 1808 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1809 APFloat::rmNearestTiesToEven, 1810 &Lost); 1811 // We allow precision lost but not overflow or underflow 1812 if (Status != APFloat::opOK && 1813 Lost && 1814 ((Status & APFloat::opOverflow) != 0 || 1815 (Status & APFloat::opUnderflow) != 0)) { 1816 return false; 1817 } 1818 1819 return true; 1820 } 1821 1822 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1823 return isUIntN(Size, Val) || isIntN(Size, Val); 1824 } 1825 1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1827 if (VT.getScalarType() == MVT::i16) { 1828 // FP immediate values are broken. 1829 return isInlinableIntLiteral(Val); 1830 } 1831 1832 // f16/v2f16 operands work correctly for all values. 1833 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1834 } 1835 1836 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1837 1838 // This is a hack to enable named inline values like 1839 // shared_base with both 32-bit and 64-bit operands. 1840 // Note that these values are defined as 1841 // 32-bit operands only. 1842 if (isInlineValue()) { 1843 return true; 1844 } 1845 1846 if (!isImmTy(ImmTyNone)) { 1847 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1848 return false; 1849 } 1850 // TODO: We should avoid using host float here. It would be better to 1851 // check the float bit values which is what a few other places do. 1852 // We've had bot failures before due to weird NaN support on mips hosts. 1853 1854 APInt Literal(64, Imm.Val); 1855 1856 if (Imm.IsFPImm) { // We got fp literal token 1857 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1858 return AMDGPU::isInlinableLiteral64(Imm.Val, 1859 AsmParser->hasInv2PiInlineImm()); 1860 } 1861 1862 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1863 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1864 return false; 1865 1866 if (type.getScalarSizeInBits() == 16) { 1867 return isInlineableLiteralOp16( 1868 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1869 type, AsmParser->hasInv2PiInlineImm()); 1870 } 1871 1872 // Check if single precision literal is inlinable 1873 return AMDGPU::isInlinableLiteral32( 1874 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1875 AsmParser->hasInv2PiInlineImm()); 1876 } 1877 1878 // We got int literal token. 1879 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1880 return AMDGPU::isInlinableLiteral64(Imm.Val, 1881 AsmParser->hasInv2PiInlineImm()); 1882 } 1883 1884 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1885 return false; 1886 } 1887 1888 if (type.getScalarSizeInBits() == 16) { 1889 return isInlineableLiteralOp16( 1890 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1891 type, AsmParser->hasInv2PiInlineImm()); 1892 } 1893 1894 return AMDGPU::isInlinableLiteral32( 1895 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1896 AsmParser->hasInv2PiInlineImm()); 1897 } 1898 1899 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1900 // Check that this immediate can be added as literal 1901 if (!isImmTy(ImmTyNone)) { 1902 return false; 1903 } 1904 1905 if (!Imm.IsFPImm) { 1906 // We got int literal token. 1907 1908 if (type == MVT::f64 && hasFPModifiers()) { 1909 // Cannot apply fp modifiers to int literals preserving the same semantics 1910 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1911 // disable these cases. 1912 return false; 1913 } 1914 1915 unsigned Size = type.getSizeInBits(); 1916 if (Size == 64) 1917 Size = 32; 1918 1919 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1920 // types. 1921 return isSafeTruncation(Imm.Val, Size); 1922 } 1923 1924 // We got fp literal token 1925 if (type == MVT::f64) { // Expected 64-bit fp operand 1926 // We would set low 64-bits of literal to zeroes but we accept this literals 1927 return true; 1928 } 1929 1930 if (type == MVT::i64) { // Expected 64-bit int operand 1931 // We don't allow fp literals in 64-bit integer instructions. It is 1932 // unclear how we should encode them. 1933 return false; 1934 } 1935 1936 // We allow fp literals with f16x2 operands assuming that the specified 1937 // literal goes into the lower half and the upper half is zero. We also 1938 // require that the literal may be losslessly converted to f16. 1939 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1940 (type == MVT::v2i16)? MVT::i16 : 1941 (type == MVT::v2f32)? MVT::f32 : type; 1942 1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1944 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1945 } 1946 1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1948 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1949 } 1950 1951 bool AMDGPUOperand::isVRegWithInputMods() const { 1952 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1953 // GFX90A allows DPP on 64-bit operands. 1954 (isRegClass(AMDGPU::VReg_64RegClassID) && 1955 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1956 } 1957 1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1959 if (AsmParser->isVI()) 1960 return isVReg32(); 1961 else if (AsmParser->isGFX9Plus()) 1962 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1963 else 1964 return false; 1965 } 1966 1967 bool AMDGPUOperand::isSDWAFP16Operand() const { 1968 return isSDWAOperand(MVT::f16); 1969 } 1970 1971 bool AMDGPUOperand::isSDWAFP32Operand() const { 1972 return isSDWAOperand(MVT::f32); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAInt16Operand() const { 1976 return isSDWAOperand(MVT::i16); 1977 } 1978 1979 bool AMDGPUOperand::isSDWAInt32Operand() const { 1980 return isSDWAOperand(MVT::i32); 1981 } 1982 1983 bool AMDGPUOperand::isBoolReg() const { 1984 auto FB = AsmParser->getFeatureBits(); 1985 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1986 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1987 } 1988 1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1990 { 1991 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1992 assert(Size == 2 || Size == 4 || Size == 8); 1993 1994 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1995 1996 if (Imm.Mods.Abs) { 1997 Val &= ~FpSignMask; 1998 } 1999 if (Imm.Mods.Neg) { 2000 Val ^= FpSignMask; 2001 } 2002 2003 return Val; 2004 } 2005 2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2007 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2008 Inst.getNumOperands())) { 2009 addLiteralImmOperand(Inst, Imm.Val, 2010 ApplyModifiers & 2011 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2012 } else { 2013 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2014 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2015 setImmKindNone(); 2016 } 2017 } 2018 2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2020 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2021 auto OpNum = Inst.getNumOperands(); 2022 // Check that this operand accepts literals 2023 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2024 2025 if (ApplyModifiers) { 2026 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2027 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2028 Val = applyInputFPModifiers(Val, Size); 2029 } 2030 2031 APInt Literal(64, Val); 2032 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2033 2034 if (Imm.IsFPImm) { // We got fp literal token 2035 switch (OpTy) { 2036 case AMDGPU::OPERAND_REG_IMM_INT64: 2037 case AMDGPU::OPERAND_REG_IMM_FP64: 2038 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2039 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2040 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2041 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2042 AsmParser->hasInv2PiInlineImm())) { 2043 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2044 setImmKindConst(); 2045 return; 2046 } 2047 2048 // Non-inlineable 2049 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2050 // For fp operands we check if low 32 bits are zeros 2051 if (Literal.getLoBits(32) != 0) { 2052 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2053 "Can't encode literal as exact 64-bit floating-point operand. " 2054 "Low 32-bits will be set to zero"); 2055 } 2056 2057 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2058 setImmKindLiteral(); 2059 return; 2060 } 2061 2062 // We don't allow fp literals in 64-bit integer instructions. It is 2063 // unclear how we should encode them. This case should be checked earlier 2064 // in predicate methods (isLiteralImm()) 2065 llvm_unreachable("fp literal in 64-bit integer instruction."); 2066 2067 case AMDGPU::OPERAND_REG_IMM_INT32: 2068 case AMDGPU::OPERAND_REG_IMM_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2070 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2071 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2073 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2074 case AMDGPU::OPERAND_REG_IMM_INT16: 2075 case AMDGPU::OPERAND_REG_IMM_FP16: 2076 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2077 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2078 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2079 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2080 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2085 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2086 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2087 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2088 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2090 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2091 case AMDGPU::OPERAND_KIMM32: 2092 case AMDGPU::OPERAND_KIMM16: { 2093 bool lost; 2094 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2095 // Convert literal to single precision 2096 FPLiteral.convert(*getOpFltSemantics(OpTy), 2097 APFloat::rmNearestTiesToEven, &lost); 2098 // We allow precision lost but not overflow or underflow. This should be 2099 // checked earlier in isLiteralImm() 2100 2101 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2102 Inst.addOperand(MCOperand::createImm(ImmVal)); 2103 setImmKindLiteral(); 2104 return; 2105 } 2106 default: 2107 llvm_unreachable("invalid operand size"); 2108 } 2109 2110 return; 2111 } 2112 2113 // We got int literal token. 2114 // Only sign extend inline immediates. 2115 switch (OpTy) { 2116 case AMDGPU::OPERAND_REG_IMM_INT32: 2117 case AMDGPU::OPERAND_REG_IMM_FP32: 2118 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2119 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2120 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2123 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2124 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2125 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2126 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2127 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2128 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2129 if (isSafeTruncation(Val, 32) && 2130 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2131 AsmParser->hasInv2PiInlineImm())) { 2132 Inst.addOperand(MCOperand::createImm(Val)); 2133 setImmKindConst(); 2134 return; 2135 } 2136 2137 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2138 setImmKindLiteral(); 2139 return; 2140 2141 case AMDGPU::OPERAND_REG_IMM_INT64: 2142 case AMDGPU::OPERAND_REG_IMM_FP64: 2143 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2144 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2145 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2146 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2147 Inst.addOperand(MCOperand::createImm(Val)); 2148 setImmKindConst(); 2149 return; 2150 } 2151 2152 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2153 setImmKindLiteral(); 2154 return; 2155 2156 case AMDGPU::OPERAND_REG_IMM_INT16: 2157 case AMDGPU::OPERAND_REG_IMM_FP16: 2158 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2159 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2160 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2163 if (isSafeTruncation(Val, 16) && 2164 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2165 AsmParser->hasInv2PiInlineImm())) { 2166 Inst.addOperand(MCOperand::createImm(Val)); 2167 setImmKindConst(); 2168 return; 2169 } 2170 2171 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2172 setImmKindLiteral(); 2173 return; 2174 2175 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2176 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2177 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2179 assert(isSafeTruncation(Val, 16)); 2180 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2181 AsmParser->hasInv2PiInlineImm())); 2182 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 return; 2185 } 2186 case AMDGPU::OPERAND_KIMM32: 2187 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2188 setImmKindNone(); 2189 return; 2190 case AMDGPU::OPERAND_KIMM16: 2191 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2192 setImmKindNone(); 2193 return; 2194 default: 2195 llvm_unreachable("invalid operand size"); 2196 } 2197 } 2198 2199 template <unsigned Bitwidth> 2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2201 APInt Literal(64, Imm.Val); 2202 setImmKindNone(); 2203 2204 if (!Imm.IsFPImm) { 2205 // We got int literal token. 2206 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2207 return; 2208 } 2209 2210 bool Lost; 2211 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2212 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2213 APFloat::rmNearestTiesToEven, &Lost); 2214 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2215 } 2216 2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2218 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2219 } 2220 2221 static bool isInlineValue(unsigned Reg) { 2222 switch (Reg) { 2223 case AMDGPU::SRC_SHARED_BASE: 2224 case AMDGPU::SRC_SHARED_LIMIT: 2225 case AMDGPU::SRC_PRIVATE_BASE: 2226 case AMDGPU::SRC_PRIVATE_LIMIT: 2227 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2228 return true; 2229 case AMDGPU::SRC_VCCZ: 2230 case AMDGPU::SRC_EXECZ: 2231 case AMDGPU::SRC_SCC: 2232 return true; 2233 case AMDGPU::SGPR_NULL: 2234 return true; 2235 default: 2236 return false; 2237 } 2238 } 2239 2240 bool AMDGPUOperand::isInlineValue() const { 2241 return isRegKind() && ::isInlineValue(getReg()); 2242 } 2243 2244 //===----------------------------------------------------------------------===// 2245 // AsmParser 2246 //===----------------------------------------------------------------------===// 2247 2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2249 if (Is == IS_VGPR) { 2250 switch (RegWidth) { 2251 default: return -1; 2252 case 1: return AMDGPU::VGPR_32RegClassID; 2253 case 2: return AMDGPU::VReg_64RegClassID; 2254 case 3: return AMDGPU::VReg_96RegClassID; 2255 case 4: return AMDGPU::VReg_128RegClassID; 2256 case 5: return AMDGPU::VReg_160RegClassID; 2257 case 6: return AMDGPU::VReg_192RegClassID; 2258 case 7: return AMDGPU::VReg_224RegClassID; 2259 case 8: return AMDGPU::VReg_256RegClassID; 2260 case 16: return AMDGPU::VReg_512RegClassID; 2261 case 32: return AMDGPU::VReg_1024RegClassID; 2262 } 2263 } else if (Is == IS_TTMP) { 2264 switch (RegWidth) { 2265 default: return -1; 2266 case 1: return AMDGPU::TTMP_32RegClassID; 2267 case 2: return AMDGPU::TTMP_64RegClassID; 2268 case 4: return AMDGPU::TTMP_128RegClassID; 2269 case 8: return AMDGPU::TTMP_256RegClassID; 2270 case 16: return AMDGPU::TTMP_512RegClassID; 2271 } 2272 } else if (Is == IS_SGPR) { 2273 switch (RegWidth) { 2274 default: return -1; 2275 case 1: return AMDGPU::SGPR_32RegClassID; 2276 case 2: return AMDGPU::SGPR_64RegClassID; 2277 case 3: return AMDGPU::SGPR_96RegClassID; 2278 case 4: return AMDGPU::SGPR_128RegClassID; 2279 case 5: return AMDGPU::SGPR_160RegClassID; 2280 case 6: return AMDGPU::SGPR_192RegClassID; 2281 case 7: return AMDGPU::SGPR_224RegClassID; 2282 case 8: return AMDGPU::SGPR_256RegClassID; 2283 case 16: return AMDGPU::SGPR_512RegClassID; 2284 } 2285 } else if (Is == IS_AGPR) { 2286 switch (RegWidth) { 2287 default: return -1; 2288 case 1: return AMDGPU::AGPR_32RegClassID; 2289 case 2: return AMDGPU::AReg_64RegClassID; 2290 case 3: return AMDGPU::AReg_96RegClassID; 2291 case 4: return AMDGPU::AReg_128RegClassID; 2292 case 5: return AMDGPU::AReg_160RegClassID; 2293 case 6: return AMDGPU::AReg_192RegClassID; 2294 case 7: return AMDGPU::AReg_224RegClassID; 2295 case 8: return AMDGPU::AReg_256RegClassID; 2296 case 16: return AMDGPU::AReg_512RegClassID; 2297 case 32: return AMDGPU::AReg_1024RegClassID; 2298 } 2299 } 2300 return -1; 2301 } 2302 2303 static unsigned getSpecialRegForName(StringRef RegName) { 2304 return StringSwitch<unsigned>(RegName) 2305 .Case("exec", AMDGPU::EXEC) 2306 .Case("vcc", AMDGPU::VCC) 2307 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2308 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2309 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2310 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2311 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2312 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2313 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2314 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2315 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2316 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2317 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2318 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2319 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2320 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2321 .Case("m0", AMDGPU::M0) 2322 .Case("vccz", AMDGPU::SRC_VCCZ) 2323 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2324 .Case("execz", AMDGPU::SRC_EXECZ) 2325 .Case("src_execz", AMDGPU::SRC_EXECZ) 2326 .Case("scc", AMDGPU::SRC_SCC) 2327 .Case("src_scc", AMDGPU::SRC_SCC) 2328 .Case("tba", AMDGPU::TBA) 2329 .Case("tma", AMDGPU::TMA) 2330 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2331 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2332 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2333 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2334 .Case("vcc_lo", AMDGPU::VCC_LO) 2335 .Case("vcc_hi", AMDGPU::VCC_HI) 2336 .Case("exec_lo", AMDGPU::EXEC_LO) 2337 .Case("exec_hi", AMDGPU::EXEC_HI) 2338 .Case("tma_lo", AMDGPU::TMA_LO) 2339 .Case("tma_hi", AMDGPU::TMA_HI) 2340 .Case("tba_lo", AMDGPU::TBA_LO) 2341 .Case("tba_hi", AMDGPU::TBA_HI) 2342 .Case("pc", AMDGPU::PC_REG) 2343 .Case("null", AMDGPU::SGPR_NULL) 2344 .Default(AMDGPU::NoRegister); 2345 } 2346 2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2348 SMLoc &EndLoc, bool RestoreOnFailure) { 2349 auto R = parseRegister(); 2350 if (!R) return true; 2351 assert(R->isReg()); 2352 RegNo = R->getReg(); 2353 StartLoc = R->getStartLoc(); 2354 EndLoc = R->getEndLoc(); 2355 return false; 2356 } 2357 2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2359 SMLoc &EndLoc) { 2360 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2361 } 2362 2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2364 SMLoc &StartLoc, 2365 SMLoc &EndLoc) { 2366 bool Result = 2367 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2368 bool PendingErrors = getParser().hasPendingError(); 2369 getParser().clearPendingErrors(); 2370 if (PendingErrors) 2371 return MatchOperand_ParseFail; 2372 if (Result) 2373 return MatchOperand_NoMatch; 2374 return MatchOperand_Success; 2375 } 2376 2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2378 RegisterKind RegKind, unsigned Reg1, 2379 SMLoc Loc) { 2380 switch (RegKind) { 2381 case IS_SPECIAL: 2382 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2383 Reg = AMDGPU::EXEC; 2384 RegWidth = 2; 2385 return true; 2386 } 2387 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2388 Reg = AMDGPU::FLAT_SCR; 2389 RegWidth = 2; 2390 return true; 2391 } 2392 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2393 Reg = AMDGPU::XNACK_MASK; 2394 RegWidth = 2; 2395 return true; 2396 } 2397 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2398 Reg = AMDGPU::VCC; 2399 RegWidth = 2; 2400 return true; 2401 } 2402 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2403 Reg = AMDGPU::TBA; 2404 RegWidth = 2; 2405 return true; 2406 } 2407 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2408 Reg = AMDGPU::TMA; 2409 RegWidth = 2; 2410 return true; 2411 } 2412 Error(Loc, "register does not fit in the list"); 2413 return false; 2414 case IS_VGPR: 2415 case IS_SGPR: 2416 case IS_AGPR: 2417 case IS_TTMP: 2418 if (Reg1 != Reg + RegWidth) { 2419 Error(Loc, "registers in a list must have consecutive indices"); 2420 return false; 2421 } 2422 RegWidth++; 2423 return true; 2424 default: 2425 llvm_unreachable("unexpected register kind"); 2426 } 2427 } 2428 2429 struct RegInfo { 2430 StringLiteral Name; 2431 RegisterKind Kind; 2432 }; 2433 2434 static constexpr RegInfo RegularRegisters[] = { 2435 {{"v"}, IS_VGPR}, 2436 {{"s"}, IS_SGPR}, 2437 {{"ttmp"}, IS_TTMP}, 2438 {{"acc"}, IS_AGPR}, 2439 {{"a"}, IS_AGPR}, 2440 }; 2441 2442 static bool isRegularReg(RegisterKind Kind) { 2443 return Kind == IS_VGPR || 2444 Kind == IS_SGPR || 2445 Kind == IS_TTMP || 2446 Kind == IS_AGPR; 2447 } 2448 2449 static const RegInfo* getRegularRegInfo(StringRef Str) { 2450 for (const RegInfo &Reg : RegularRegisters) 2451 if (Str.startswith(Reg.Name)) 2452 return &Reg; 2453 return nullptr; 2454 } 2455 2456 static bool getRegNum(StringRef Str, unsigned& Num) { 2457 return !Str.getAsInteger(10, Num); 2458 } 2459 2460 bool 2461 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2462 const AsmToken &NextToken) const { 2463 2464 // A list of consecutive registers: [s0,s1,s2,s3] 2465 if (Token.is(AsmToken::LBrac)) 2466 return true; 2467 2468 if (!Token.is(AsmToken::Identifier)) 2469 return false; 2470 2471 // A single register like s0 or a range of registers like s[0:1] 2472 2473 StringRef Str = Token.getString(); 2474 const RegInfo *Reg = getRegularRegInfo(Str); 2475 if (Reg) { 2476 StringRef RegName = Reg->Name; 2477 StringRef RegSuffix = Str.substr(RegName.size()); 2478 if (!RegSuffix.empty()) { 2479 unsigned Num; 2480 // A single register with an index: rXX 2481 if (getRegNum(RegSuffix, Num)) 2482 return true; 2483 } else { 2484 // A range of registers: r[XX:YY]. 2485 if (NextToken.is(AsmToken::LBrac)) 2486 return true; 2487 } 2488 } 2489 2490 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2491 } 2492 2493 bool 2494 AMDGPUAsmParser::isRegister() 2495 { 2496 return isRegister(getToken(), peekToken()); 2497 } 2498 2499 unsigned 2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2501 unsigned RegNum, 2502 unsigned RegWidth, 2503 SMLoc Loc) { 2504 2505 assert(isRegularReg(RegKind)); 2506 2507 unsigned AlignSize = 1; 2508 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2509 // SGPR and TTMP registers must be aligned. 2510 // Max required alignment is 4 dwords. 2511 AlignSize = std::min(RegWidth, 4u); 2512 } 2513 2514 if (RegNum % AlignSize != 0) { 2515 Error(Loc, "invalid register alignment"); 2516 return AMDGPU::NoRegister; 2517 } 2518 2519 unsigned RegIdx = RegNum / AlignSize; 2520 int RCID = getRegClass(RegKind, RegWidth); 2521 if (RCID == -1) { 2522 Error(Loc, "invalid or unsupported register size"); 2523 return AMDGPU::NoRegister; 2524 } 2525 2526 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2527 const MCRegisterClass RC = TRI->getRegClass(RCID); 2528 if (RegIdx >= RC.getNumRegs()) { 2529 Error(Loc, "register index is out of range"); 2530 return AMDGPU::NoRegister; 2531 } 2532 2533 return RC.getRegister(RegIdx); 2534 } 2535 2536 bool 2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2538 int64_t RegLo, RegHi; 2539 if (!skipToken(AsmToken::LBrac, "missing register index")) 2540 return false; 2541 2542 SMLoc FirstIdxLoc = getLoc(); 2543 SMLoc SecondIdxLoc; 2544 2545 if (!parseExpr(RegLo)) 2546 return false; 2547 2548 if (trySkipToken(AsmToken::Colon)) { 2549 SecondIdxLoc = getLoc(); 2550 if (!parseExpr(RegHi)) 2551 return false; 2552 } else { 2553 RegHi = RegLo; 2554 } 2555 2556 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2557 return false; 2558 2559 if (!isUInt<32>(RegLo)) { 2560 Error(FirstIdxLoc, "invalid register index"); 2561 return false; 2562 } 2563 2564 if (!isUInt<32>(RegHi)) { 2565 Error(SecondIdxLoc, "invalid register index"); 2566 return false; 2567 } 2568 2569 if (RegLo > RegHi) { 2570 Error(FirstIdxLoc, "first register index should not exceed second index"); 2571 return false; 2572 } 2573 2574 Num = static_cast<unsigned>(RegLo); 2575 Width = (RegHi - RegLo) + 1; 2576 return true; 2577 } 2578 2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2580 unsigned &RegNum, unsigned &RegWidth, 2581 SmallVectorImpl<AsmToken> &Tokens) { 2582 assert(isToken(AsmToken::Identifier)); 2583 unsigned Reg = getSpecialRegForName(getTokenStr()); 2584 if (Reg) { 2585 RegNum = 0; 2586 RegWidth = 1; 2587 RegKind = IS_SPECIAL; 2588 Tokens.push_back(getToken()); 2589 lex(); // skip register name 2590 } 2591 return Reg; 2592 } 2593 2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2595 unsigned &RegNum, unsigned &RegWidth, 2596 SmallVectorImpl<AsmToken> &Tokens) { 2597 assert(isToken(AsmToken::Identifier)); 2598 StringRef RegName = getTokenStr(); 2599 auto Loc = getLoc(); 2600 2601 const RegInfo *RI = getRegularRegInfo(RegName); 2602 if (!RI) { 2603 Error(Loc, "invalid register name"); 2604 return AMDGPU::NoRegister; 2605 } 2606 2607 Tokens.push_back(getToken()); 2608 lex(); // skip register name 2609 2610 RegKind = RI->Kind; 2611 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2612 if (!RegSuffix.empty()) { 2613 // Single 32-bit register: vXX. 2614 if (!getRegNum(RegSuffix, RegNum)) { 2615 Error(Loc, "invalid register index"); 2616 return AMDGPU::NoRegister; 2617 } 2618 RegWidth = 1; 2619 } else { 2620 // Range of registers: v[XX:YY]. ":YY" is optional. 2621 if (!ParseRegRange(RegNum, RegWidth)) 2622 return AMDGPU::NoRegister; 2623 } 2624 2625 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2626 } 2627 2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2629 unsigned &RegWidth, 2630 SmallVectorImpl<AsmToken> &Tokens) { 2631 unsigned Reg = AMDGPU::NoRegister; 2632 auto ListLoc = getLoc(); 2633 2634 if (!skipToken(AsmToken::LBrac, 2635 "expected a register or a list of registers")) { 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2640 2641 auto Loc = getLoc(); 2642 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2643 return AMDGPU::NoRegister; 2644 if (RegWidth != 1) { 2645 Error(Loc, "expected a single 32-bit register"); 2646 return AMDGPU::NoRegister; 2647 } 2648 2649 for (; trySkipToken(AsmToken::Comma); ) { 2650 RegisterKind NextRegKind; 2651 unsigned NextReg, NextRegNum, NextRegWidth; 2652 Loc = getLoc(); 2653 2654 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2655 NextRegNum, NextRegWidth, 2656 Tokens)) { 2657 return AMDGPU::NoRegister; 2658 } 2659 if (NextRegWidth != 1) { 2660 Error(Loc, "expected a single 32-bit register"); 2661 return AMDGPU::NoRegister; 2662 } 2663 if (NextRegKind != RegKind) { 2664 Error(Loc, "registers in a list must be of the same kind"); 2665 return AMDGPU::NoRegister; 2666 } 2667 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 if (!skipToken(AsmToken::RBrac, 2672 "expected a comma or a closing square bracket")) { 2673 return AMDGPU::NoRegister; 2674 } 2675 2676 if (isRegularReg(RegKind)) 2677 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2678 2679 return Reg; 2680 } 2681 2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2683 unsigned &RegNum, unsigned &RegWidth, 2684 SmallVectorImpl<AsmToken> &Tokens) { 2685 auto Loc = getLoc(); 2686 Reg = AMDGPU::NoRegister; 2687 2688 if (isToken(AsmToken::Identifier)) { 2689 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2690 if (Reg == AMDGPU::NoRegister) 2691 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2692 } else { 2693 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2694 } 2695 2696 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2697 if (Reg == AMDGPU::NoRegister) { 2698 assert(Parser.hasPendingError()); 2699 return false; 2700 } 2701 2702 if (!subtargetHasRegister(*TRI, Reg)) { 2703 if (Reg == AMDGPU::SGPR_NULL) { 2704 Error(Loc, "'null' operand is not supported on this GPU"); 2705 } else { 2706 Error(Loc, "register not available on this GPU"); 2707 } 2708 return false; 2709 } 2710 2711 return true; 2712 } 2713 2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2715 unsigned &RegNum, unsigned &RegWidth, 2716 bool RestoreOnFailure /*=false*/) { 2717 Reg = AMDGPU::NoRegister; 2718 2719 SmallVector<AsmToken, 1> Tokens; 2720 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2721 if (RestoreOnFailure) { 2722 while (!Tokens.empty()) { 2723 getLexer().UnLex(Tokens.pop_back_val()); 2724 } 2725 } 2726 return true; 2727 } 2728 return false; 2729 } 2730 2731 Optional<StringRef> 2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2733 switch (RegKind) { 2734 case IS_VGPR: 2735 return StringRef(".amdgcn.next_free_vgpr"); 2736 case IS_SGPR: 2737 return StringRef(".amdgcn.next_free_sgpr"); 2738 default: 2739 return None; 2740 } 2741 } 2742 2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2744 auto SymbolName = getGprCountSymbolName(RegKind); 2745 assert(SymbolName && "initializing invalid register kind"); 2746 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2747 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2748 } 2749 2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2751 unsigned DwordRegIndex, 2752 unsigned RegWidth) { 2753 // Symbols are only defined for GCN targets 2754 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2755 return true; 2756 2757 auto SymbolName = getGprCountSymbolName(RegKind); 2758 if (!SymbolName) 2759 return true; 2760 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2761 2762 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2763 int64_t OldCount; 2764 2765 if (!Sym->isVariable()) 2766 return !Error(getLoc(), 2767 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2768 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2769 return !Error( 2770 getLoc(), 2771 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2772 2773 if (OldCount <= NewMax) 2774 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2775 2776 return true; 2777 } 2778 2779 std::unique_ptr<AMDGPUOperand> 2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2781 const auto &Tok = getToken(); 2782 SMLoc StartLoc = Tok.getLoc(); 2783 SMLoc EndLoc = Tok.getEndLoc(); 2784 RegisterKind RegKind; 2785 unsigned Reg, RegNum, RegWidth; 2786 2787 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2788 return nullptr; 2789 } 2790 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2791 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2792 return nullptr; 2793 } else 2794 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2795 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2800 // TODO: add syntactic sugar for 1/(2*PI) 2801 2802 assert(!isRegister()); 2803 assert(!isModifier()); 2804 2805 const auto& Tok = getToken(); 2806 const auto& NextTok = peekToken(); 2807 bool IsReal = Tok.is(AsmToken::Real); 2808 SMLoc S = getLoc(); 2809 bool Negate = false; 2810 2811 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2812 lex(); 2813 IsReal = true; 2814 Negate = true; 2815 } 2816 2817 if (IsReal) { 2818 // Floating-point expressions are not supported. 2819 // Can only allow floating-point literals with an 2820 // optional sign. 2821 2822 StringRef Num = getTokenStr(); 2823 lex(); 2824 2825 APFloat RealVal(APFloat::IEEEdouble()); 2826 auto roundMode = APFloat::rmNearestTiesToEven; 2827 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2828 return MatchOperand_ParseFail; 2829 } 2830 if (Negate) 2831 RealVal.changeSign(); 2832 2833 Operands.push_back( 2834 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2835 AMDGPUOperand::ImmTyNone, true)); 2836 2837 return MatchOperand_Success; 2838 2839 } else { 2840 int64_t IntVal; 2841 const MCExpr *Expr; 2842 SMLoc S = getLoc(); 2843 2844 if (HasSP3AbsModifier) { 2845 // This is a workaround for handling expressions 2846 // as arguments of SP3 'abs' modifier, for example: 2847 // |1.0| 2848 // |-1| 2849 // |1+x| 2850 // This syntax is not compatible with syntax of standard 2851 // MC expressions (due to the trailing '|'). 2852 SMLoc EndLoc; 2853 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2854 return MatchOperand_ParseFail; 2855 } else { 2856 if (Parser.parseExpression(Expr)) 2857 return MatchOperand_ParseFail; 2858 } 2859 2860 if (Expr->evaluateAsAbsolute(IntVal)) { 2861 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2862 } else { 2863 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2864 } 2865 2866 return MatchOperand_Success; 2867 } 2868 2869 return MatchOperand_NoMatch; 2870 } 2871 2872 OperandMatchResultTy 2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2874 if (!isRegister()) 2875 return MatchOperand_NoMatch; 2876 2877 if (auto R = parseRegister()) { 2878 assert(R->isReg()); 2879 Operands.push_back(std::move(R)); 2880 return MatchOperand_Success; 2881 } 2882 return MatchOperand_ParseFail; 2883 } 2884 2885 OperandMatchResultTy 2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2887 auto res = parseReg(Operands); 2888 if (res != MatchOperand_NoMatch) { 2889 return res; 2890 } else if (isModifier()) { 2891 return MatchOperand_NoMatch; 2892 } else { 2893 return parseImm(Operands, HasSP3AbsMod); 2894 } 2895 } 2896 2897 bool 2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2899 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2900 const auto &str = Token.getString(); 2901 return str == "abs" || str == "neg" || str == "sext"; 2902 } 2903 return false; 2904 } 2905 2906 bool 2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2908 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2909 } 2910 2911 bool 2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2913 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2914 } 2915 2916 bool 2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2918 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2919 } 2920 2921 // Check if this is an operand modifier or an opcode modifier 2922 // which may look like an expression but it is not. We should 2923 // avoid parsing these modifiers as expressions. Currently 2924 // recognized sequences are: 2925 // |...| 2926 // abs(...) 2927 // neg(...) 2928 // sext(...) 2929 // -reg 2930 // -|...| 2931 // -abs(...) 2932 // name:... 2933 // Note that simple opcode modifiers like 'gds' may be parsed as 2934 // expressions; this is a special case. See getExpressionAsToken. 2935 // 2936 bool 2937 AMDGPUAsmParser::isModifier() { 2938 2939 AsmToken Tok = getToken(); 2940 AsmToken NextToken[2]; 2941 peekTokens(NextToken); 2942 2943 return isOperandModifier(Tok, NextToken[0]) || 2944 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2945 isOpcodeModifierWithVal(Tok, NextToken[0]); 2946 } 2947 2948 // Check if the current token is an SP3 'neg' modifier. 2949 // Currently this modifier is allowed in the following context: 2950 // 2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2952 // 2. Before an 'abs' modifier: -abs(...) 2953 // 3. Before an SP3 'abs' modifier: -|...| 2954 // 2955 // In all other cases "-" is handled as a part 2956 // of an expression that follows the sign. 2957 // 2958 // Note: When "-" is followed by an integer literal, 2959 // this is interpreted as integer negation rather 2960 // than a floating-point NEG modifier applied to N. 2961 // Beside being contr-intuitive, such use of floating-point 2962 // NEG modifier would have resulted in different meaning 2963 // of integer literals used with VOP1/2/C and VOP3, 2964 // for example: 2965 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2966 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2967 // Negative fp literals with preceding "-" are 2968 // handled likewise for uniformity 2969 // 2970 bool 2971 AMDGPUAsmParser::parseSP3NegModifier() { 2972 2973 AsmToken NextToken[2]; 2974 peekTokens(NextToken); 2975 2976 if (isToken(AsmToken::Minus) && 2977 (isRegister(NextToken[0], NextToken[1]) || 2978 NextToken[0].is(AsmToken::Pipe) || 2979 isId(NextToken[0], "abs"))) { 2980 lex(); 2981 return true; 2982 } 2983 2984 return false; 2985 } 2986 2987 OperandMatchResultTy 2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2989 bool AllowImm) { 2990 bool Neg, SP3Neg; 2991 bool Abs, SP3Abs; 2992 SMLoc Loc; 2993 2994 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2995 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2996 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2997 return MatchOperand_ParseFail; 2998 } 2999 3000 SP3Neg = parseSP3NegModifier(); 3001 3002 Loc = getLoc(); 3003 Neg = trySkipId("neg"); 3004 if (Neg && SP3Neg) { 3005 Error(Loc, "expected register or immediate"); 3006 return MatchOperand_ParseFail; 3007 } 3008 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3009 return MatchOperand_ParseFail; 3010 3011 Abs = trySkipId("abs"); 3012 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3013 return MatchOperand_ParseFail; 3014 3015 Loc = getLoc(); 3016 SP3Abs = trySkipToken(AsmToken::Pipe); 3017 if (Abs && SP3Abs) { 3018 Error(Loc, "expected register or immediate"); 3019 return MatchOperand_ParseFail; 3020 } 3021 3022 OperandMatchResultTy Res; 3023 if (AllowImm) { 3024 Res = parseRegOrImm(Operands, SP3Abs); 3025 } else { 3026 Res = parseReg(Operands); 3027 } 3028 if (Res != MatchOperand_Success) { 3029 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3030 } 3031 3032 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3033 return MatchOperand_ParseFail; 3034 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3035 return MatchOperand_ParseFail; 3036 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3037 return MatchOperand_ParseFail; 3038 3039 AMDGPUOperand::Modifiers Mods; 3040 Mods.Abs = Abs || SP3Abs; 3041 Mods.Neg = Neg || SP3Neg; 3042 3043 if (Mods.hasFPModifiers()) { 3044 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3045 if (Op.isExpr()) { 3046 Error(Op.getStartLoc(), "expected an absolute expression"); 3047 return MatchOperand_ParseFail; 3048 } 3049 Op.setModifiers(Mods); 3050 } 3051 return MatchOperand_Success; 3052 } 3053 3054 OperandMatchResultTy 3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3056 bool AllowImm) { 3057 bool Sext = trySkipId("sext"); 3058 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3059 return MatchOperand_ParseFail; 3060 3061 OperandMatchResultTy Res; 3062 if (AllowImm) { 3063 Res = parseRegOrImm(Operands); 3064 } else { 3065 Res = parseReg(Operands); 3066 } 3067 if (Res != MatchOperand_Success) { 3068 return Sext? MatchOperand_ParseFail : Res; 3069 } 3070 3071 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3072 return MatchOperand_ParseFail; 3073 3074 AMDGPUOperand::Modifiers Mods; 3075 Mods.Sext = Sext; 3076 3077 if (Mods.hasIntModifiers()) { 3078 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3079 if (Op.isExpr()) { 3080 Error(Op.getStartLoc(), "expected an absolute expression"); 3081 return MatchOperand_ParseFail; 3082 } 3083 Op.setModifiers(Mods); 3084 } 3085 3086 return MatchOperand_Success; 3087 } 3088 3089 OperandMatchResultTy 3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3091 return parseRegOrImmWithFPInputMods(Operands, false); 3092 } 3093 3094 OperandMatchResultTy 3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3096 return parseRegOrImmWithIntInputMods(Operands, false); 3097 } 3098 3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3100 auto Loc = getLoc(); 3101 if (trySkipId("off")) { 3102 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3103 AMDGPUOperand::ImmTyOff, false)); 3104 return MatchOperand_Success; 3105 } 3106 3107 if (!isRegister()) 3108 return MatchOperand_NoMatch; 3109 3110 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3111 if (Reg) { 3112 Operands.push_back(std::move(Reg)); 3113 return MatchOperand_Success; 3114 } 3115 3116 return MatchOperand_ParseFail; 3117 3118 } 3119 3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3122 3123 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3124 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3125 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3126 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3127 return Match_InvalidOperand; 3128 3129 if ((TSFlags & SIInstrFlags::VOP3) && 3130 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3131 getForcedEncodingSize() != 64) 3132 return Match_PreferE32; 3133 3134 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3135 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3136 // v_mac_f32/16 allow only dst_sel == DWORD; 3137 auto OpNum = 3138 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3139 const auto &Op = Inst.getOperand(OpNum); 3140 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3141 return Match_InvalidOperand; 3142 } 3143 } 3144 3145 return Match_Success; 3146 } 3147 3148 static ArrayRef<unsigned> getAllVariants() { 3149 static const unsigned Variants[] = { 3150 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3151 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3152 }; 3153 3154 return makeArrayRef(Variants); 3155 } 3156 3157 // What asm variants we should check 3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3159 if (getForcedEncodingSize() == 32) { 3160 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3161 return makeArrayRef(Variants); 3162 } 3163 3164 if (isForcedVOP3()) { 3165 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3166 return makeArrayRef(Variants); 3167 } 3168 3169 if (isForcedSDWA()) { 3170 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3171 AMDGPUAsmVariants::SDWA9}; 3172 return makeArrayRef(Variants); 3173 } 3174 3175 if (isForcedDPP()) { 3176 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3177 return makeArrayRef(Variants); 3178 } 3179 3180 return getAllVariants(); 3181 } 3182 3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3184 if (getForcedEncodingSize() == 32) 3185 return "e32"; 3186 3187 if (isForcedVOP3()) 3188 return "e64"; 3189 3190 if (isForcedSDWA()) 3191 return "sdwa"; 3192 3193 if (isForcedDPP()) 3194 return "dpp"; 3195 3196 return ""; 3197 } 3198 3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3200 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3201 const unsigned Num = Desc.getNumImplicitUses(); 3202 for (unsigned i = 0; i < Num; ++i) { 3203 unsigned Reg = Desc.ImplicitUses[i]; 3204 switch (Reg) { 3205 case AMDGPU::FLAT_SCR: 3206 case AMDGPU::VCC: 3207 case AMDGPU::VCC_LO: 3208 case AMDGPU::VCC_HI: 3209 case AMDGPU::M0: 3210 return Reg; 3211 default: 3212 break; 3213 } 3214 } 3215 return AMDGPU::NoRegister; 3216 } 3217 3218 // NB: This code is correct only when used to check constant 3219 // bus limitations because GFX7 support no f16 inline constants. 3220 // Note that there are no cases when a GFX7 opcode violates 3221 // constant bus limitations due to the use of an f16 constant. 3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3223 unsigned OpIdx) const { 3224 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3225 3226 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3227 return false; 3228 } 3229 3230 const MCOperand &MO = Inst.getOperand(OpIdx); 3231 3232 int64_t Val = MO.getImm(); 3233 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3234 3235 switch (OpSize) { // expected operand size 3236 case 8: 3237 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3238 case 4: 3239 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3240 case 2: { 3241 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3242 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3243 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3244 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3245 return AMDGPU::isInlinableIntLiteral(Val); 3246 3247 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3248 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3249 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3250 return AMDGPU::isInlinableIntLiteralV216(Val); 3251 3252 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3253 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3254 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3255 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3256 3257 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3258 } 3259 default: 3260 llvm_unreachable("invalid operand size"); 3261 } 3262 } 3263 3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3265 if (!isGFX10Plus()) 3266 return 1; 3267 3268 switch (Opcode) { 3269 // 64-bit shift instructions can use only one scalar value input 3270 case AMDGPU::V_LSHLREV_B64_e64: 3271 case AMDGPU::V_LSHLREV_B64_gfx10: 3272 case AMDGPU::V_LSHRREV_B64_e64: 3273 case AMDGPU::V_LSHRREV_B64_gfx10: 3274 case AMDGPU::V_ASHRREV_I64_e64: 3275 case AMDGPU::V_ASHRREV_I64_gfx10: 3276 case AMDGPU::V_LSHL_B64_e64: 3277 case AMDGPU::V_LSHR_B64_e64: 3278 case AMDGPU::V_ASHR_I64_e64: 3279 return 1; 3280 default: 3281 return 2; 3282 } 3283 } 3284 3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3286 const MCOperand &MO = Inst.getOperand(OpIdx); 3287 if (MO.isImm()) { 3288 return !isInlineConstant(Inst, OpIdx); 3289 } else if (MO.isReg()) { 3290 auto Reg = MO.getReg(); 3291 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3292 auto PReg = mc2PseudoReg(Reg); 3293 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3294 } else { 3295 return true; 3296 } 3297 } 3298 3299 bool 3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3301 const OperandVector &Operands) { 3302 const unsigned Opcode = Inst.getOpcode(); 3303 const MCInstrDesc &Desc = MII.get(Opcode); 3304 unsigned LastSGPR = AMDGPU::NoRegister; 3305 unsigned ConstantBusUseCount = 0; 3306 unsigned NumLiterals = 0; 3307 unsigned LiteralSize; 3308 3309 if (Desc.TSFlags & 3310 (SIInstrFlags::VOPC | 3311 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3312 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3313 SIInstrFlags::SDWA)) { 3314 // Check special imm operands (used by madmk, etc) 3315 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3316 ++NumLiterals; 3317 LiteralSize = 4; 3318 } 3319 3320 SmallDenseSet<unsigned> SGPRsUsed; 3321 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3322 if (SGPRUsed != AMDGPU::NoRegister) { 3323 SGPRsUsed.insert(SGPRUsed); 3324 ++ConstantBusUseCount; 3325 } 3326 3327 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3328 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3329 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3330 3331 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3332 3333 for (int OpIdx : OpIndices) { 3334 if (OpIdx == -1) break; 3335 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (usesConstantBus(Inst, OpIdx)) { 3338 if (MO.isReg()) { 3339 LastSGPR = mc2PseudoReg(MO.getReg()); 3340 // Pairs of registers with a partial intersections like these 3341 // s0, s[0:1] 3342 // flat_scratch_lo, flat_scratch 3343 // flat_scratch_lo, flat_scratch_hi 3344 // are theoretically valid but they are disabled anyway. 3345 // Note that this code mimics SIInstrInfo::verifyInstruction 3346 if (!SGPRsUsed.count(LastSGPR)) { 3347 SGPRsUsed.insert(LastSGPR); 3348 ++ConstantBusUseCount; 3349 } 3350 } else { // Expression or a literal 3351 3352 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3353 continue; // special operand like VINTERP attr_chan 3354 3355 // An instruction may use only one literal. 3356 // This has been validated on the previous step. 3357 // See validateVOPLiteral. 3358 // This literal may be used as more than one operand. 3359 // If all these operands are of the same size, 3360 // this literal counts as one scalar value. 3361 // Otherwise it counts as 2 scalar values. 3362 // See "GFX10 Shader Programming", section 3.6.2.3. 3363 3364 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3365 if (Size < 4) Size = 4; 3366 3367 if (NumLiterals == 0) { 3368 NumLiterals = 1; 3369 LiteralSize = Size; 3370 } else if (LiteralSize != Size) { 3371 NumLiterals = 2; 3372 } 3373 } 3374 } 3375 } 3376 } 3377 ConstantBusUseCount += NumLiterals; 3378 3379 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3380 return true; 3381 3382 SMLoc LitLoc = getLitLoc(Operands); 3383 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3384 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3385 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3386 return false; 3387 } 3388 3389 bool 3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3391 const OperandVector &Operands) { 3392 const unsigned Opcode = Inst.getOpcode(); 3393 const MCInstrDesc &Desc = MII.get(Opcode); 3394 3395 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3396 if (DstIdx == -1 || 3397 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3398 return true; 3399 } 3400 3401 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3402 3403 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3404 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3405 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3406 3407 assert(DstIdx != -1); 3408 const MCOperand &Dst = Inst.getOperand(DstIdx); 3409 assert(Dst.isReg()); 3410 3411 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3412 3413 for (int SrcIdx : SrcIndices) { 3414 if (SrcIdx == -1) break; 3415 const MCOperand &Src = Inst.getOperand(SrcIdx); 3416 if (Src.isReg()) { 3417 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3418 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3419 Error(getRegLoc(SrcReg, Operands), 3420 "destination must be different than all sources"); 3421 return false; 3422 } 3423 } 3424 } 3425 3426 return true; 3427 } 3428 3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3430 3431 const unsigned Opc = Inst.getOpcode(); 3432 const MCInstrDesc &Desc = MII.get(Opc); 3433 3434 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3435 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3436 assert(ClampIdx != -1); 3437 return Inst.getOperand(ClampIdx).getImm() == 0; 3438 } 3439 3440 return true; 3441 } 3442 3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3444 3445 const unsigned Opc = Inst.getOpcode(); 3446 const MCInstrDesc &Desc = MII.get(Opc); 3447 3448 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3449 return true; 3450 3451 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3452 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3453 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3454 3455 assert(VDataIdx != -1); 3456 3457 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3458 return true; 3459 3460 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3461 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3462 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3463 if (DMask == 0) 3464 DMask = 1; 3465 3466 unsigned DataSize = 3467 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3468 if (hasPackedD16()) { 3469 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3470 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3471 DataSize = (DataSize + 1) / 2; 3472 } 3473 3474 return (VDataSize / 4) == DataSize + TFESize; 3475 } 3476 3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3478 const unsigned Opc = Inst.getOpcode(); 3479 const MCInstrDesc &Desc = MII.get(Opc); 3480 3481 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3482 return true; 3483 3484 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3485 3486 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3487 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3488 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3489 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3490 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3491 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3492 3493 assert(VAddr0Idx != -1); 3494 assert(SrsrcIdx != -1); 3495 assert(SrsrcIdx > VAddr0Idx); 3496 3497 if (DimIdx == -1) 3498 return true; // intersect_ray 3499 3500 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3501 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3502 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3503 unsigned ActualAddrSize = 3504 IsNSA ? SrsrcIdx - VAddr0Idx 3505 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3506 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3507 3508 unsigned ExpectedAddrSize = 3509 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3510 3511 if (!IsNSA) { 3512 if (ExpectedAddrSize > 8) 3513 ExpectedAddrSize = 16; 3514 3515 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3516 // This provides backward compatibility for assembly created 3517 // before 160b/192b/224b types were directly supported. 3518 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3519 return true; 3520 } 3521 3522 return ActualAddrSize == ExpectedAddrSize; 3523 } 3524 3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3526 3527 const unsigned Opc = Inst.getOpcode(); 3528 const MCInstrDesc &Desc = MII.get(Opc); 3529 3530 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3531 return true; 3532 if (!Desc.mayLoad() || !Desc.mayStore()) 3533 return true; // Not atomic 3534 3535 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3536 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3537 3538 // This is an incomplete check because image_atomic_cmpswap 3539 // may only use 0x3 and 0xf while other atomic operations 3540 // may use 0x1 and 0x3. However these limitations are 3541 // verified when we check that dmask matches dst size. 3542 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3543 } 3544 3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3546 3547 const unsigned Opc = Inst.getOpcode(); 3548 const MCInstrDesc &Desc = MII.get(Opc); 3549 3550 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3551 return true; 3552 3553 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3554 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3555 3556 // GATHER4 instructions use dmask in a different fashion compared to 3557 // other MIMG instructions. The only useful DMASK values are 3558 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3559 // (red,red,red,red) etc.) The ISA document doesn't mention 3560 // this. 3561 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3562 } 3563 3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3569 return true; 3570 3571 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3572 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3573 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3574 3575 if (!BaseOpcode->MSAA) 3576 return true; 3577 3578 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3579 assert(DimIdx != -1); 3580 3581 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3582 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3583 3584 return DimInfo->MSAA; 3585 } 3586 3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3588 { 3589 switch (Opcode) { 3590 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3591 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3592 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3593 return true; 3594 default: 3595 return false; 3596 } 3597 } 3598 3599 // movrels* opcodes should only allow VGPRS as src0. 3600 // This is specified in .td description for vop1/vop3, 3601 // but sdwa is handled differently. See isSDWAOperand. 3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3603 const OperandVector &Operands) { 3604 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3609 return true; 3610 3611 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3612 assert(Src0Idx != -1); 3613 3614 SMLoc ErrLoc; 3615 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3616 if (Src0.isReg()) { 3617 auto Reg = mc2PseudoReg(Src0.getReg()); 3618 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3619 if (!isSGPR(Reg, TRI)) 3620 return true; 3621 ErrLoc = getRegLoc(Reg, Operands); 3622 } else { 3623 ErrLoc = getConstLoc(Operands); 3624 } 3625 3626 Error(ErrLoc, "source operand must be a VGPR"); 3627 return false; 3628 } 3629 3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3631 const OperandVector &Operands) { 3632 3633 const unsigned Opc = Inst.getOpcode(); 3634 3635 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3636 return true; 3637 3638 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3639 assert(Src0Idx != -1); 3640 3641 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3642 if (!Src0.isReg()) 3643 return true; 3644 3645 auto Reg = mc2PseudoReg(Src0.getReg()); 3646 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3647 if (isSGPR(Reg, TRI)) { 3648 Error(getRegLoc(Reg, Operands), 3649 "source operand must be either a VGPR or an inline constant"); 3650 return false; 3651 } 3652 3653 return true; 3654 } 3655 3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3657 const OperandVector &Operands) { 3658 const unsigned Opc = Inst.getOpcode(); 3659 const MCInstrDesc &Desc = MII.get(Opc); 3660 3661 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3662 return true; 3663 3664 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3665 if (Src2Idx == -1) 3666 return true; 3667 3668 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3669 if (!Src2.isReg()) 3670 return true; 3671 3672 MCRegister Src2Reg = Src2.getReg(); 3673 MCRegister DstReg = Inst.getOperand(0).getReg(); 3674 if (Src2Reg == DstReg) 3675 return true; 3676 3677 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3678 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3679 return true; 3680 3681 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3682 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3683 "source 2 operand must not partially overlap with dst"); 3684 return false; 3685 } 3686 3687 return true; 3688 } 3689 3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3691 switch (Inst.getOpcode()) { 3692 default: 3693 return true; 3694 case V_DIV_SCALE_F32_gfx6_gfx7: 3695 case V_DIV_SCALE_F32_vi: 3696 case V_DIV_SCALE_F32_gfx10: 3697 case V_DIV_SCALE_F64_gfx6_gfx7: 3698 case V_DIV_SCALE_F64_vi: 3699 case V_DIV_SCALE_F64_gfx10: 3700 break; 3701 } 3702 3703 // TODO: Check that src0 = src1 or src2. 3704 3705 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3706 AMDGPU::OpName::src2_modifiers, 3707 AMDGPU::OpName::src2_modifiers}) { 3708 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3709 .getImm() & 3710 SISrcMods::ABS) { 3711 return false; 3712 } 3713 } 3714 3715 return true; 3716 } 3717 3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3719 3720 const unsigned Opc = Inst.getOpcode(); 3721 const MCInstrDesc &Desc = MII.get(Opc); 3722 3723 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3724 return true; 3725 3726 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3727 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3728 if (isCI() || isSI()) 3729 return false; 3730 } 3731 3732 return true; 3733 } 3734 3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3736 const unsigned Opc = Inst.getOpcode(); 3737 const MCInstrDesc &Desc = MII.get(Opc); 3738 3739 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3740 return true; 3741 3742 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3743 if (DimIdx < 0) 3744 return true; 3745 3746 long Imm = Inst.getOperand(DimIdx).getImm(); 3747 if (Imm < 0 || Imm >= 8) 3748 return false; 3749 3750 return true; 3751 } 3752 3753 static bool IsRevOpcode(const unsigned Opcode) 3754 { 3755 switch (Opcode) { 3756 case AMDGPU::V_SUBREV_F32_e32: 3757 case AMDGPU::V_SUBREV_F32_e64: 3758 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3759 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3760 case AMDGPU::V_SUBREV_F32_e32_vi: 3761 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3762 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3763 case AMDGPU::V_SUBREV_F32_e64_vi: 3764 3765 case AMDGPU::V_SUBREV_CO_U32_e32: 3766 case AMDGPU::V_SUBREV_CO_U32_e64: 3767 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3768 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3769 3770 case AMDGPU::V_SUBBREV_U32_e32: 3771 case AMDGPU::V_SUBBREV_U32_e64: 3772 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3773 case AMDGPU::V_SUBBREV_U32_e32_vi: 3774 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3775 case AMDGPU::V_SUBBREV_U32_e64_vi: 3776 3777 case AMDGPU::V_SUBREV_U32_e32: 3778 case AMDGPU::V_SUBREV_U32_e64: 3779 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3780 case AMDGPU::V_SUBREV_U32_e32_vi: 3781 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3782 case AMDGPU::V_SUBREV_U32_e64_vi: 3783 3784 case AMDGPU::V_SUBREV_F16_e32: 3785 case AMDGPU::V_SUBREV_F16_e64: 3786 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3787 case AMDGPU::V_SUBREV_F16_e32_vi: 3788 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3789 case AMDGPU::V_SUBREV_F16_e64_vi: 3790 3791 case AMDGPU::V_SUBREV_U16_e32: 3792 case AMDGPU::V_SUBREV_U16_e64: 3793 case AMDGPU::V_SUBREV_U16_e32_vi: 3794 case AMDGPU::V_SUBREV_U16_e64_vi: 3795 3796 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3797 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3798 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3799 3800 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3801 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3802 3803 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3804 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3805 3806 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3807 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3808 3809 case AMDGPU::V_LSHRREV_B32_e32: 3810 case AMDGPU::V_LSHRREV_B32_e64: 3811 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3812 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3813 case AMDGPU::V_LSHRREV_B32_e32_vi: 3814 case AMDGPU::V_LSHRREV_B32_e64_vi: 3815 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3816 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3817 3818 case AMDGPU::V_ASHRREV_I32_e32: 3819 case AMDGPU::V_ASHRREV_I32_e64: 3820 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3821 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3822 case AMDGPU::V_ASHRREV_I32_e32_vi: 3823 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3824 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3825 case AMDGPU::V_ASHRREV_I32_e64_vi: 3826 3827 case AMDGPU::V_LSHLREV_B32_e32: 3828 case AMDGPU::V_LSHLREV_B32_e64: 3829 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3830 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3831 case AMDGPU::V_LSHLREV_B32_e32_vi: 3832 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3833 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3834 case AMDGPU::V_LSHLREV_B32_e64_vi: 3835 3836 case AMDGPU::V_LSHLREV_B16_e32: 3837 case AMDGPU::V_LSHLREV_B16_e64: 3838 case AMDGPU::V_LSHLREV_B16_e32_vi: 3839 case AMDGPU::V_LSHLREV_B16_e64_vi: 3840 case AMDGPU::V_LSHLREV_B16_gfx10: 3841 3842 case AMDGPU::V_LSHRREV_B16_e32: 3843 case AMDGPU::V_LSHRREV_B16_e64: 3844 case AMDGPU::V_LSHRREV_B16_e32_vi: 3845 case AMDGPU::V_LSHRREV_B16_e64_vi: 3846 case AMDGPU::V_LSHRREV_B16_gfx10: 3847 3848 case AMDGPU::V_ASHRREV_I16_e32: 3849 case AMDGPU::V_ASHRREV_I16_e64: 3850 case AMDGPU::V_ASHRREV_I16_e32_vi: 3851 case AMDGPU::V_ASHRREV_I16_e64_vi: 3852 case AMDGPU::V_ASHRREV_I16_gfx10: 3853 3854 case AMDGPU::V_LSHLREV_B64_e64: 3855 case AMDGPU::V_LSHLREV_B64_gfx10: 3856 case AMDGPU::V_LSHLREV_B64_vi: 3857 3858 case AMDGPU::V_LSHRREV_B64_e64: 3859 case AMDGPU::V_LSHRREV_B64_gfx10: 3860 case AMDGPU::V_LSHRREV_B64_vi: 3861 3862 case AMDGPU::V_ASHRREV_I64_e64: 3863 case AMDGPU::V_ASHRREV_I64_gfx10: 3864 case AMDGPU::V_ASHRREV_I64_vi: 3865 3866 case AMDGPU::V_PK_LSHLREV_B16: 3867 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3868 case AMDGPU::V_PK_LSHLREV_B16_vi: 3869 3870 case AMDGPU::V_PK_LSHRREV_B16: 3871 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3872 case AMDGPU::V_PK_LSHRREV_B16_vi: 3873 case AMDGPU::V_PK_ASHRREV_I16: 3874 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3875 case AMDGPU::V_PK_ASHRREV_I16_vi: 3876 return true; 3877 default: 3878 return false; 3879 } 3880 } 3881 3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3883 3884 using namespace SIInstrFlags; 3885 const unsigned Opcode = Inst.getOpcode(); 3886 const MCInstrDesc &Desc = MII.get(Opcode); 3887 3888 // lds_direct register is defined so that it can be used 3889 // with 9-bit operands only. Ignore encodings which do not accept these. 3890 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3891 if ((Desc.TSFlags & Enc) == 0) 3892 return None; 3893 3894 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3895 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3896 if (SrcIdx == -1) 3897 break; 3898 const auto &Src = Inst.getOperand(SrcIdx); 3899 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3900 3901 if (isGFX90A()) 3902 return StringRef("lds_direct is not supported on this GPU"); 3903 3904 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3905 return StringRef("lds_direct cannot be used with this instruction"); 3906 3907 if (SrcName != OpName::src0) 3908 return StringRef("lds_direct may be used as src0 only"); 3909 } 3910 } 3911 3912 return None; 3913 } 3914 3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3916 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3917 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3918 if (Op.isFlatOffset()) 3919 return Op.getStartLoc(); 3920 } 3921 return getLoc(); 3922 } 3923 3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3925 const OperandVector &Operands) { 3926 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3927 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3928 return true; 3929 3930 auto Opcode = Inst.getOpcode(); 3931 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3932 assert(OpNum != -1); 3933 3934 const auto &Op = Inst.getOperand(OpNum); 3935 if (!hasFlatOffsets() && Op.getImm() != 0) { 3936 Error(getFlatOffsetLoc(Operands), 3937 "flat offset modifier is not supported on this GPU"); 3938 return false; 3939 } 3940 3941 // For FLAT segment the offset must be positive; 3942 // MSB is ignored and forced to zero. 3943 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3944 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3945 if (!isIntN(OffsetSize, Op.getImm())) { 3946 Error(getFlatOffsetLoc(Operands), 3947 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3948 return false; 3949 } 3950 } else { 3951 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3952 if (!isUIntN(OffsetSize, Op.getImm())) { 3953 Error(getFlatOffsetLoc(Operands), 3954 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3955 return false; 3956 } 3957 } 3958 3959 return true; 3960 } 3961 3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3963 // Start with second operand because SMEM Offset cannot be dst or src0. 3964 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3965 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3966 if (Op.isSMEMOffset()) 3967 return Op.getStartLoc(); 3968 } 3969 return getLoc(); 3970 } 3971 3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3973 const OperandVector &Operands) { 3974 if (isCI() || isSI()) 3975 return true; 3976 3977 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3978 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3979 return true; 3980 3981 auto Opcode = Inst.getOpcode(); 3982 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3983 if (OpNum == -1) 3984 return true; 3985 3986 const auto &Op = Inst.getOperand(OpNum); 3987 if (!Op.isImm()) 3988 return true; 3989 3990 uint64_t Offset = Op.getImm(); 3991 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3992 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3993 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3994 return true; 3995 3996 Error(getSMEMOffsetLoc(Operands), 3997 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3998 "expected a 21-bit signed offset"); 3999 4000 return false; 4001 } 4002 4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4004 unsigned Opcode = Inst.getOpcode(); 4005 const MCInstrDesc &Desc = MII.get(Opcode); 4006 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4007 return true; 4008 4009 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4010 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4011 4012 const int OpIndices[] = { Src0Idx, Src1Idx }; 4013 4014 unsigned NumExprs = 0; 4015 unsigned NumLiterals = 0; 4016 uint32_t LiteralValue; 4017 4018 for (int OpIdx : OpIndices) { 4019 if (OpIdx == -1) break; 4020 4021 const MCOperand &MO = Inst.getOperand(OpIdx); 4022 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4023 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4024 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4025 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4026 if (NumLiterals == 0 || LiteralValue != Value) { 4027 LiteralValue = Value; 4028 ++NumLiterals; 4029 } 4030 } else if (MO.isExpr()) { 4031 ++NumExprs; 4032 } 4033 } 4034 } 4035 4036 return NumLiterals + NumExprs <= 1; 4037 } 4038 4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4040 const unsigned Opc = Inst.getOpcode(); 4041 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4042 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4043 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4044 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4045 4046 if (OpSel & ~3) 4047 return false; 4048 } 4049 return true; 4050 } 4051 4052 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4053 const OperandVector &Operands) { 4054 const unsigned Opc = Inst.getOpcode(); 4055 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4056 if (DppCtrlIdx < 0) 4057 return true; 4058 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4059 4060 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4061 // DPP64 is supported for row_newbcast only. 4062 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4063 if (Src0Idx >= 0 && 4064 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4065 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4066 Error(S, "64 bit dpp only supports row_newbcast"); 4067 return false; 4068 } 4069 } 4070 4071 return true; 4072 } 4073 4074 // Check if VCC register matches wavefront size 4075 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4076 auto FB = getFeatureBits(); 4077 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4078 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4079 } 4080 4081 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4082 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4083 const OperandVector &Operands) { 4084 unsigned Opcode = Inst.getOpcode(); 4085 const MCInstrDesc &Desc = MII.get(Opcode); 4086 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4087 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4088 ImmIdx == -1) 4089 return true; 4090 4091 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4092 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4093 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4094 4095 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4096 4097 unsigned NumExprs = 0; 4098 unsigned NumLiterals = 0; 4099 uint32_t LiteralValue; 4100 4101 for (int OpIdx : OpIndices) { 4102 if (OpIdx == -1) 4103 continue; 4104 4105 const MCOperand &MO = Inst.getOperand(OpIdx); 4106 if (!MO.isImm() && !MO.isExpr()) 4107 continue; 4108 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4109 continue; 4110 4111 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4112 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4113 Error(getConstLoc(Operands), 4114 "inline constants are not allowed for this operand"); 4115 return false; 4116 } 4117 4118 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4119 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4120 if (NumLiterals == 0 || LiteralValue != Value) { 4121 LiteralValue = Value; 4122 ++NumLiterals; 4123 } 4124 } else if (MO.isExpr()) { 4125 ++NumExprs; 4126 } 4127 } 4128 NumLiterals += NumExprs; 4129 4130 if (!NumLiterals) 4131 return true; 4132 4133 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4134 Error(getLitLoc(Operands), "literal operands are not supported"); 4135 return false; 4136 } 4137 4138 if (NumLiterals > 1) { 4139 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4140 return false; 4141 } 4142 4143 return true; 4144 } 4145 4146 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4147 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4148 const MCRegisterInfo *MRI) { 4149 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4150 if (OpIdx < 0) 4151 return -1; 4152 4153 const MCOperand &Op = Inst.getOperand(OpIdx); 4154 if (!Op.isReg()) 4155 return -1; 4156 4157 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4158 auto Reg = Sub ? Sub : Op.getReg(); 4159 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4160 return AGPR32.contains(Reg) ? 1 : 0; 4161 } 4162 4163 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4164 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4165 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4166 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4167 SIInstrFlags::DS)) == 0) 4168 return true; 4169 4170 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4171 : AMDGPU::OpName::vdata; 4172 4173 const MCRegisterInfo *MRI = getMRI(); 4174 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4175 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4176 4177 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4178 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4179 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4180 return false; 4181 } 4182 4183 auto FB = getFeatureBits(); 4184 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4185 if (DataAreg < 0 || DstAreg < 0) 4186 return true; 4187 return DstAreg == DataAreg; 4188 } 4189 4190 return DstAreg < 1 && DataAreg < 1; 4191 } 4192 4193 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4194 auto FB = getFeatureBits(); 4195 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4196 return true; 4197 4198 const MCRegisterInfo *MRI = getMRI(); 4199 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4200 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4201 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4202 const MCOperand &Op = Inst.getOperand(I); 4203 if (!Op.isReg()) 4204 continue; 4205 4206 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4207 if (!Sub) 4208 continue; 4209 4210 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4211 return false; 4212 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4213 return false; 4214 } 4215 4216 return true; 4217 } 4218 4219 // gfx90a has an undocumented limitation: 4220 // DS_GWS opcodes must use even aligned registers. 4221 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4222 const OperandVector &Operands) { 4223 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4224 return true; 4225 4226 int Opc = Inst.getOpcode(); 4227 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4228 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4229 return true; 4230 4231 const MCRegisterInfo *MRI = getMRI(); 4232 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4233 int Data0Pos = 4234 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4235 assert(Data0Pos != -1); 4236 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4237 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4238 if (RegIdx & 1) { 4239 SMLoc RegLoc = getRegLoc(Reg, Operands); 4240 Error(RegLoc, "vgpr must be even aligned"); 4241 return false; 4242 } 4243 4244 return true; 4245 } 4246 4247 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4248 const OperandVector &Operands, 4249 const SMLoc &IDLoc) { 4250 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4251 AMDGPU::OpName::cpol); 4252 if (CPolPos == -1) 4253 return true; 4254 4255 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4256 4257 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4258 if ((TSFlags & (SIInstrFlags::SMRD)) && 4259 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4260 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4261 return false; 4262 } 4263 4264 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4265 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4266 StringRef CStr(S.getPointer()); 4267 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4268 Error(S, "scc is not supported on this GPU"); 4269 return false; 4270 } 4271 4272 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4273 return true; 4274 4275 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4276 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4277 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4278 : "instruction must use glc"); 4279 return false; 4280 } 4281 } else { 4282 if (CPol & CPol::GLC) { 4283 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4284 StringRef CStr(S.getPointer()); 4285 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4286 Error(S, isGFX940() ? "instruction must not use sc0" 4287 : "instruction must not use glc"); 4288 return false; 4289 } 4290 } 4291 4292 return true; 4293 } 4294 4295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4296 const SMLoc &IDLoc, 4297 const OperandVector &Operands) { 4298 if (auto ErrMsg = validateLdsDirect(Inst)) { 4299 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4300 return false; 4301 } 4302 if (!validateSOPLiteral(Inst)) { 4303 Error(getLitLoc(Operands), 4304 "only one literal operand is allowed"); 4305 return false; 4306 } 4307 if (!validateVOPLiteral(Inst, Operands)) { 4308 return false; 4309 } 4310 if (!validateConstantBusLimitations(Inst, Operands)) { 4311 return false; 4312 } 4313 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4314 return false; 4315 } 4316 if (!validateIntClampSupported(Inst)) { 4317 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4318 "integer clamping is not supported on this GPU"); 4319 return false; 4320 } 4321 if (!validateOpSel(Inst)) { 4322 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4323 "invalid op_sel operand"); 4324 return false; 4325 } 4326 if (!validateDPP(Inst, Operands)) { 4327 return false; 4328 } 4329 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4330 if (!validateMIMGD16(Inst)) { 4331 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4332 "d16 modifier is not supported on this GPU"); 4333 return false; 4334 } 4335 if (!validateMIMGDim(Inst)) { 4336 Error(IDLoc, "dim modifier is required on this GPU"); 4337 return false; 4338 } 4339 if (!validateMIMGMSAA(Inst)) { 4340 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4341 "invalid dim; must be MSAA type"); 4342 return false; 4343 } 4344 if (!validateMIMGDataSize(Inst)) { 4345 Error(IDLoc, 4346 "image data size does not match dmask and tfe"); 4347 return false; 4348 } 4349 if (!validateMIMGAddrSize(Inst)) { 4350 Error(IDLoc, 4351 "image address size does not match dim and a16"); 4352 return false; 4353 } 4354 if (!validateMIMGAtomicDMask(Inst)) { 4355 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4356 "invalid atomic image dmask"); 4357 return false; 4358 } 4359 if (!validateMIMGGatherDMask(Inst)) { 4360 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4361 "invalid image_gather dmask: only one bit must be set"); 4362 return false; 4363 } 4364 if (!validateMovrels(Inst, Operands)) { 4365 return false; 4366 } 4367 if (!validateFlatOffset(Inst, Operands)) { 4368 return false; 4369 } 4370 if (!validateSMEMOffset(Inst, Operands)) { 4371 return false; 4372 } 4373 if (!validateMAIAccWrite(Inst, Operands)) { 4374 return false; 4375 } 4376 if (!validateMFMA(Inst, Operands)) { 4377 return false; 4378 } 4379 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4380 return false; 4381 } 4382 4383 if (!validateAGPRLdSt(Inst)) { 4384 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4385 ? "invalid register class: data and dst should be all VGPR or AGPR" 4386 : "invalid register class: agpr loads and stores not supported on this GPU" 4387 ); 4388 return false; 4389 } 4390 if (!validateVGPRAlign(Inst)) { 4391 Error(IDLoc, 4392 "invalid register class: vgpr tuples must be 64 bit aligned"); 4393 return false; 4394 } 4395 if (!validateGWS(Inst, Operands)) { 4396 return false; 4397 } 4398 4399 if (!validateDivScale(Inst)) { 4400 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4401 return false; 4402 } 4403 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4404 return false; 4405 } 4406 4407 return true; 4408 } 4409 4410 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4411 const FeatureBitset &FBS, 4412 unsigned VariantID = 0); 4413 4414 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4415 const FeatureBitset &AvailableFeatures, 4416 unsigned VariantID); 4417 4418 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4419 const FeatureBitset &FBS) { 4420 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4421 } 4422 4423 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4424 const FeatureBitset &FBS, 4425 ArrayRef<unsigned> Variants) { 4426 for (auto Variant : Variants) { 4427 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4428 return true; 4429 } 4430 4431 return false; 4432 } 4433 4434 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4435 const SMLoc &IDLoc) { 4436 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4437 4438 // Check if requested instruction variant is supported. 4439 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4440 return false; 4441 4442 // This instruction is not supported. 4443 // Clear any other pending errors because they are no longer relevant. 4444 getParser().clearPendingErrors(); 4445 4446 // Requested instruction variant is not supported. 4447 // Check if any other variants are supported. 4448 StringRef VariantName = getMatchedVariantName(); 4449 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4450 return Error(IDLoc, 4451 Twine(VariantName, 4452 " variant of this instruction is not supported")); 4453 } 4454 4455 // Finally check if this instruction is supported on any other GPU. 4456 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4457 return Error(IDLoc, "instruction not supported on this GPU"); 4458 } 4459 4460 // Instruction not supported on any GPU. Probably a typo. 4461 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4462 return Error(IDLoc, "invalid instruction" + Suggestion); 4463 } 4464 4465 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4466 OperandVector &Operands, 4467 MCStreamer &Out, 4468 uint64_t &ErrorInfo, 4469 bool MatchingInlineAsm) { 4470 MCInst Inst; 4471 unsigned Result = Match_Success; 4472 for (auto Variant : getMatchedVariants()) { 4473 uint64_t EI; 4474 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4475 Variant); 4476 // We order match statuses from least to most specific. We use most specific 4477 // status as resulting 4478 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4479 if ((R == Match_Success) || 4480 (R == Match_PreferE32) || 4481 (R == Match_MissingFeature && Result != Match_PreferE32) || 4482 (R == Match_InvalidOperand && Result != Match_MissingFeature 4483 && Result != Match_PreferE32) || 4484 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4485 && Result != Match_MissingFeature 4486 && Result != Match_PreferE32)) { 4487 Result = R; 4488 ErrorInfo = EI; 4489 } 4490 if (R == Match_Success) 4491 break; 4492 } 4493 4494 if (Result == Match_Success) { 4495 if (!validateInstruction(Inst, IDLoc, Operands)) { 4496 return true; 4497 } 4498 Inst.setLoc(IDLoc); 4499 Out.emitInstruction(Inst, getSTI()); 4500 return false; 4501 } 4502 4503 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4504 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4505 return true; 4506 } 4507 4508 switch (Result) { 4509 default: break; 4510 case Match_MissingFeature: 4511 // It has been verified that the specified instruction 4512 // mnemonic is valid. A match was found but it requires 4513 // features which are not supported on this GPU. 4514 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4515 4516 case Match_InvalidOperand: { 4517 SMLoc ErrorLoc = IDLoc; 4518 if (ErrorInfo != ~0ULL) { 4519 if (ErrorInfo >= Operands.size()) { 4520 return Error(IDLoc, "too few operands for instruction"); 4521 } 4522 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4523 if (ErrorLoc == SMLoc()) 4524 ErrorLoc = IDLoc; 4525 } 4526 return Error(ErrorLoc, "invalid operand for instruction"); 4527 } 4528 4529 case Match_PreferE32: 4530 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4531 "should be encoded as e32"); 4532 case Match_MnemonicFail: 4533 llvm_unreachable("Invalid instructions should have been handled already"); 4534 } 4535 llvm_unreachable("Implement any new match types added!"); 4536 } 4537 4538 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4539 int64_t Tmp = -1; 4540 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4541 return true; 4542 } 4543 if (getParser().parseAbsoluteExpression(Tmp)) { 4544 return true; 4545 } 4546 Ret = static_cast<uint32_t>(Tmp); 4547 return false; 4548 } 4549 4550 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4551 uint32_t &Minor) { 4552 if (ParseAsAbsoluteExpression(Major)) 4553 return TokError("invalid major version"); 4554 4555 if (!trySkipToken(AsmToken::Comma)) 4556 return TokError("minor version number required, comma expected"); 4557 4558 if (ParseAsAbsoluteExpression(Minor)) 4559 return TokError("invalid minor version"); 4560 4561 return false; 4562 } 4563 4564 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4565 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4566 return TokError("directive only supported for amdgcn architecture"); 4567 4568 std::string TargetIDDirective; 4569 SMLoc TargetStart = getTok().getLoc(); 4570 if (getParser().parseEscapedString(TargetIDDirective)) 4571 return true; 4572 4573 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4574 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4575 return getParser().Error(TargetRange.Start, 4576 (Twine(".amdgcn_target directive's target id ") + 4577 Twine(TargetIDDirective) + 4578 Twine(" does not match the specified target id ") + 4579 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4580 4581 return false; 4582 } 4583 4584 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4585 return Error(Range.Start, "value out of range", Range); 4586 } 4587 4588 bool AMDGPUAsmParser::calculateGPRBlocks( 4589 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4590 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4591 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4592 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4593 // TODO(scott.linder): These calculations are duplicated from 4594 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4595 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4596 4597 unsigned NumVGPRs = NextFreeVGPR; 4598 unsigned NumSGPRs = NextFreeSGPR; 4599 4600 if (Version.Major >= 10) 4601 NumSGPRs = 0; 4602 else { 4603 unsigned MaxAddressableNumSGPRs = 4604 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4605 4606 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4607 NumSGPRs > MaxAddressableNumSGPRs) 4608 return OutOfRangeError(SGPRRange); 4609 4610 NumSGPRs += 4611 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4612 4613 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4614 NumSGPRs > MaxAddressableNumSGPRs) 4615 return OutOfRangeError(SGPRRange); 4616 4617 if (Features.test(FeatureSGPRInitBug)) 4618 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4619 } 4620 4621 VGPRBlocks = 4622 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4623 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4624 4625 return false; 4626 } 4627 4628 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4629 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4630 return TokError("directive only supported for amdgcn architecture"); 4631 4632 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4633 return TokError("directive only supported for amdhsa OS"); 4634 4635 StringRef KernelName; 4636 if (getParser().parseIdentifier(KernelName)) 4637 return true; 4638 4639 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4640 4641 StringSet<> Seen; 4642 4643 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4644 4645 SMRange VGPRRange; 4646 uint64_t NextFreeVGPR = 0; 4647 uint64_t AccumOffset = 0; 4648 SMRange SGPRRange; 4649 uint64_t NextFreeSGPR = 0; 4650 4651 // Count the number of user SGPRs implied from the enabled feature bits. 4652 unsigned ImpliedUserSGPRCount = 0; 4653 4654 // Track if the asm explicitly contains the directive for the user SGPR 4655 // count. 4656 Optional<unsigned> ExplicitUserSGPRCount; 4657 bool ReserveVCC = true; 4658 bool ReserveFlatScr = true; 4659 Optional<bool> EnableWavefrontSize32; 4660 4661 while (true) { 4662 while (trySkipToken(AsmToken::EndOfStatement)); 4663 4664 StringRef ID; 4665 SMRange IDRange = getTok().getLocRange(); 4666 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4667 return true; 4668 4669 if (ID == ".end_amdhsa_kernel") 4670 break; 4671 4672 if (Seen.find(ID) != Seen.end()) 4673 return TokError(".amdhsa_ directives cannot be repeated"); 4674 Seen.insert(ID); 4675 4676 SMLoc ValStart = getLoc(); 4677 int64_t IVal; 4678 if (getParser().parseAbsoluteExpression(IVal)) 4679 return true; 4680 SMLoc ValEnd = getLoc(); 4681 SMRange ValRange = SMRange(ValStart, ValEnd); 4682 4683 if (IVal < 0) 4684 return OutOfRangeError(ValRange); 4685 4686 uint64_t Val = IVal; 4687 4688 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4689 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4690 return OutOfRangeError(RANGE); \ 4691 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4692 4693 if (ID == ".amdhsa_group_segment_fixed_size") { 4694 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4695 return OutOfRangeError(ValRange); 4696 KD.group_segment_fixed_size = Val; 4697 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4698 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4699 return OutOfRangeError(ValRange); 4700 KD.private_segment_fixed_size = Val; 4701 } else if (ID == ".amdhsa_kernarg_size") { 4702 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4703 return OutOfRangeError(ValRange); 4704 KD.kernarg_size = Val; 4705 } else if (ID == ".amdhsa_user_sgpr_count") { 4706 ExplicitUserSGPRCount = Val; 4707 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4708 if (hasArchitectedFlatScratch()) 4709 return Error(IDRange.Start, 4710 "directive is not supported with architected flat scratch", 4711 IDRange); 4712 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4713 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4714 Val, ValRange); 4715 if (Val) 4716 ImpliedUserSGPRCount += 4; 4717 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4718 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4719 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4720 ValRange); 4721 if (Val) 4722 ImpliedUserSGPRCount += 2; 4723 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4724 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4725 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4726 ValRange); 4727 if (Val) 4728 ImpliedUserSGPRCount += 2; 4729 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4730 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4731 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4732 Val, ValRange); 4733 if (Val) 4734 ImpliedUserSGPRCount += 2; 4735 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4736 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4738 ValRange); 4739 if (Val) 4740 ImpliedUserSGPRCount += 2; 4741 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4742 if (hasArchitectedFlatScratch()) 4743 return Error(IDRange.Start, 4744 "directive is not supported with architected flat scratch", 4745 IDRange); 4746 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4747 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4748 ValRange); 4749 if (Val) 4750 ImpliedUserSGPRCount += 2; 4751 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4752 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4753 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4754 Val, ValRange); 4755 if (Val) 4756 ImpliedUserSGPRCount += 1; 4757 } else if (ID == ".amdhsa_wavefront_size32") { 4758 if (IVersion.Major < 10) 4759 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4760 EnableWavefrontSize32 = Val; 4761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4762 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4763 Val, ValRange); 4764 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4765 if (hasArchitectedFlatScratch()) 4766 return Error(IDRange.Start, 4767 "directive is not supported with architected flat scratch", 4768 IDRange); 4769 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4770 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4771 } else if (ID == ".amdhsa_enable_private_segment") { 4772 if (!hasArchitectedFlatScratch()) 4773 return Error( 4774 IDRange.Start, 4775 "directive is not supported without architected flat scratch", 4776 IDRange); 4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4778 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4779 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4781 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4782 ValRange); 4783 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4785 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4786 ValRange); 4787 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4789 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4790 ValRange); 4791 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4792 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4793 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4794 ValRange); 4795 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4796 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4797 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4798 ValRange); 4799 } else if (ID == ".amdhsa_next_free_vgpr") { 4800 VGPRRange = ValRange; 4801 NextFreeVGPR = Val; 4802 } else if (ID == ".amdhsa_next_free_sgpr") { 4803 SGPRRange = ValRange; 4804 NextFreeSGPR = Val; 4805 } else if (ID == ".amdhsa_accum_offset") { 4806 if (!isGFX90A()) 4807 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4808 AccumOffset = Val; 4809 } else if (ID == ".amdhsa_reserve_vcc") { 4810 if (!isUInt<1>(Val)) 4811 return OutOfRangeError(ValRange); 4812 ReserveVCC = Val; 4813 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4814 if (IVersion.Major < 7) 4815 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4816 if (hasArchitectedFlatScratch()) 4817 return Error(IDRange.Start, 4818 "directive is not supported with architected flat scratch", 4819 IDRange); 4820 if (!isUInt<1>(Val)) 4821 return OutOfRangeError(ValRange); 4822 ReserveFlatScr = Val; 4823 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4824 if (IVersion.Major < 8) 4825 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4826 if (!isUInt<1>(Val)) 4827 return OutOfRangeError(ValRange); 4828 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4829 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4830 IDRange); 4831 } else if (ID == ".amdhsa_float_round_mode_32") { 4832 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4833 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4834 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4835 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4836 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4837 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4839 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4840 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4841 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4842 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4843 ValRange); 4844 } else if (ID == ".amdhsa_dx10_clamp") { 4845 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4846 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4847 } else if (ID == ".amdhsa_ieee_mode") { 4848 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4849 Val, ValRange); 4850 } else if (ID == ".amdhsa_fp16_overflow") { 4851 if (IVersion.Major < 9) 4852 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4853 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4854 ValRange); 4855 } else if (ID == ".amdhsa_tg_split") { 4856 if (!isGFX90A()) 4857 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4858 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4859 ValRange); 4860 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4861 if (IVersion.Major < 10) 4862 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4864 ValRange); 4865 } else if (ID == ".amdhsa_memory_ordered") { 4866 if (IVersion.Major < 10) 4867 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4868 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4869 ValRange); 4870 } else if (ID == ".amdhsa_forward_progress") { 4871 if (IVersion.Major < 10) 4872 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4874 ValRange); 4875 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4876 PARSE_BITS_ENTRY( 4877 KD.compute_pgm_rsrc2, 4878 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4879 ValRange); 4880 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4881 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4882 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4883 Val, ValRange); 4884 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4885 PARSE_BITS_ENTRY( 4886 KD.compute_pgm_rsrc2, 4887 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4888 ValRange); 4889 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4890 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4891 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4892 Val, ValRange); 4893 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4895 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4896 Val, ValRange); 4897 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4899 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4900 Val, ValRange); 4901 } else if (ID == ".amdhsa_exception_int_div_zero") { 4902 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4903 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4904 Val, ValRange); 4905 } else { 4906 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4907 } 4908 4909 #undef PARSE_BITS_ENTRY 4910 } 4911 4912 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4913 return TokError(".amdhsa_next_free_vgpr directive is required"); 4914 4915 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4916 return TokError(".amdhsa_next_free_sgpr directive is required"); 4917 4918 unsigned VGPRBlocks; 4919 unsigned SGPRBlocks; 4920 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4921 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4922 EnableWavefrontSize32, NextFreeVGPR, 4923 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4924 SGPRBlocks)) 4925 return true; 4926 4927 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4928 VGPRBlocks)) 4929 return OutOfRangeError(VGPRRange); 4930 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4931 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4932 4933 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4934 SGPRBlocks)) 4935 return OutOfRangeError(SGPRRange); 4936 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4937 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4938 SGPRBlocks); 4939 4940 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4941 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4942 "enabled user SGPRs"); 4943 4944 unsigned UserSGPRCount = 4945 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4946 4947 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4948 return TokError("too many user SGPRs enabled"); 4949 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4950 UserSGPRCount); 4951 4952 if (isGFX90A()) { 4953 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4954 return TokError(".amdhsa_accum_offset directive is required"); 4955 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4956 return TokError("accum_offset should be in range [4..256] in " 4957 "increments of 4"); 4958 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4959 return TokError("accum_offset exceeds total VGPR allocation"); 4960 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4961 (AccumOffset / 4 - 1)); 4962 } 4963 4964 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4965 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4966 ReserveFlatScr); 4967 return false; 4968 } 4969 4970 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4971 uint32_t Major; 4972 uint32_t Minor; 4973 4974 if (ParseDirectiveMajorMinor(Major, Minor)) 4975 return true; 4976 4977 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4978 return false; 4979 } 4980 4981 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4982 uint32_t Major; 4983 uint32_t Minor; 4984 uint32_t Stepping; 4985 StringRef VendorName; 4986 StringRef ArchName; 4987 4988 // If this directive has no arguments, then use the ISA version for the 4989 // targeted GPU. 4990 if (isToken(AsmToken::EndOfStatement)) { 4991 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4992 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4993 ISA.Stepping, 4994 "AMD", "AMDGPU"); 4995 return false; 4996 } 4997 4998 if (ParseDirectiveMajorMinor(Major, Minor)) 4999 return true; 5000 5001 if (!trySkipToken(AsmToken::Comma)) 5002 return TokError("stepping version number required, comma expected"); 5003 5004 if (ParseAsAbsoluteExpression(Stepping)) 5005 return TokError("invalid stepping version"); 5006 5007 if (!trySkipToken(AsmToken::Comma)) 5008 return TokError("vendor name required, comma expected"); 5009 5010 if (!parseString(VendorName, "invalid vendor name")) 5011 return true; 5012 5013 if (!trySkipToken(AsmToken::Comma)) 5014 return TokError("arch name required, comma expected"); 5015 5016 if (!parseString(ArchName, "invalid arch name")) 5017 return true; 5018 5019 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5020 VendorName, ArchName); 5021 return false; 5022 } 5023 5024 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5025 amd_kernel_code_t &Header) { 5026 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5027 // assembly for backwards compatibility. 5028 if (ID == "max_scratch_backing_memory_byte_size") { 5029 Parser.eatToEndOfStatement(); 5030 return false; 5031 } 5032 5033 SmallString<40> ErrStr; 5034 raw_svector_ostream Err(ErrStr); 5035 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5036 return TokError(Err.str()); 5037 } 5038 Lex(); 5039 5040 if (ID == "enable_wavefront_size32") { 5041 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5042 if (!isGFX10Plus()) 5043 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5044 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5045 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5046 } else { 5047 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5048 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5049 } 5050 } 5051 5052 if (ID == "wavefront_size") { 5053 if (Header.wavefront_size == 5) { 5054 if (!isGFX10Plus()) 5055 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5056 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5057 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5058 } else if (Header.wavefront_size == 6) { 5059 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5060 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5061 } 5062 } 5063 5064 if (ID == "enable_wgp_mode") { 5065 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5066 !isGFX10Plus()) 5067 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5068 } 5069 5070 if (ID == "enable_mem_ordered") { 5071 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5072 !isGFX10Plus()) 5073 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5074 } 5075 5076 if (ID == "enable_fwd_progress") { 5077 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5078 !isGFX10Plus()) 5079 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5080 } 5081 5082 return false; 5083 } 5084 5085 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5086 amd_kernel_code_t Header; 5087 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5088 5089 while (true) { 5090 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5091 // will set the current token to EndOfStatement. 5092 while(trySkipToken(AsmToken::EndOfStatement)); 5093 5094 StringRef ID; 5095 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5096 return true; 5097 5098 if (ID == ".end_amd_kernel_code_t") 5099 break; 5100 5101 if (ParseAMDKernelCodeTValue(ID, Header)) 5102 return true; 5103 } 5104 5105 getTargetStreamer().EmitAMDKernelCodeT(Header); 5106 5107 return false; 5108 } 5109 5110 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5111 StringRef KernelName; 5112 if (!parseId(KernelName, "expected symbol name")) 5113 return true; 5114 5115 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5116 ELF::STT_AMDGPU_HSA_KERNEL); 5117 5118 KernelScope.initialize(getContext()); 5119 return false; 5120 } 5121 5122 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5123 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5124 return Error(getLoc(), 5125 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5126 "architectures"); 5127 } 5128 5129 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5130 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5131 return Error(getParser().getTok().getLoc(), "target id must match options"); 5132 5133 getTargetStreamer().EmitISAVersion(); 5134 Lex(); 5135 5136 return false; 5137 } 5138 5139 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5140 const char *AssemblerDirectiveBegin; 5141 const char *AssemblerDirectiveEnd; 5142 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5143 isHsaAbiVersion3AndAbove(&getSTI()) 5144 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5145 HSAMD::V3::AssemblerDirectiveEnd) 5146 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5147 HSAMD::AssemblerDirectiveEnd); 5148 5149 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5150 return Error(getLoc(), 5151 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5152 "not available on non-amdhsa OSes")).str()); 5153 } 5154 5155 std::string HSAMetadataString; 5156 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5157 HSAMetadataString)) 5158 return true; 5159 5160 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5161 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5162 return Error(getLoc(), "invalid HSA metadata"); 5163 } else { 5164 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5165 return Error(getLoc(), "invalid HSA metadata"); 5166 } 5167 5168 return false; 5169 } 5170 5171 /// Common code to parse out a block of text (typically YAML) between start and 5172 /// end directives. 5173 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5174 const char *AssemblerDirectiveEnd, 5175 std::string &CollectString) { 5176 5177 raw_string_ostream CollectStream(CollectString); 5178 5179 getLexer().setSkipSpace(false); 5180 5181 bool FoundEnd = false; 5182 while (!isToken(AsmToken::Eof)) { 5183 while (isToken(AsmToken::Space)) { 5184 CollectStream << getTokenStr(); 5185 Lex(); 5186 } 5187 5188 if (trySkipId(AssemblerDirectiveEnd)) { 5189 FoundEnd = true; 5190 break; 5191 } 5192 5193 CollectStream << Parser.parseStringToEndOfStatement() 5194 << getContext().getAsmInfo()->getSeparatorString(); 5195 5196 Parser.eatToEndOfStatement(); 5197 } 5198 5199 getLexer().setSkipSpace(true); 5200 5201 if (isToken(AsmToken::Eof) && !FoundEnd) { 5202 return TokError(Twine("expected directive ") + 5203 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5204 } 5205 5206 CollectStream.flush(); 5207 return false; 5208 } 5209 5210 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5211 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5212 std::string String; 5213 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5214 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5215 return true; 5216 5217 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5218 if (!PALMetadata->setFromString(String)) 5219 return Error(getLoc(), "invalid PAL metadata"); 5220 return false; 5221 } 5222 5223 /// Parse the assembler directive for old linear-format PAL metadata. 5224 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5225 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5226 return Error(getLoc(), 5227 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5228 "not available on non-amdpal OSes")).str()); 5229 } 5230 5231 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5232 PALMetadata->setLegacy(); 5233 for (;;) { 5234 uint32_t Key, Value; 5235 if (ParseAsAbsoluteExpression(Key)) { 5236 return TokError(Twine("invalid value in ") + 5237 Twine(PALMD::AssemblerDirective)); 5238 } 5239 if (!trySkipToken(AsmToken::Comma)) { 5240 return TokError(Twine("expected an even number of values in ") + 5241 Twine(PALMD::AssemblerDirective)); 5242 } 5243 if (ParseAsAbsoluteExpression(Value)) { 5244 return TokError(Twine("invalid value in ") + 5245 Twine(PALMD::AssemblerDirective)); 5246 } 5247 PALMetadata->setRegister(Key, Value); 5248 if (!trySkipToken(AsmToken::Comma)) 5249 break; 5250 } 5251 return false; 5252 } 5253 5254 /// ParseDirectiveAMDGPULDS 5255 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5256 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5257 if (getParser().checkForValidSection()) 5258 return true; 5259 5260 StringRef Name; 5261 SMLoc NameLoc = getLoc(); 5262 if (getParser().parseIdentifier(Name)) 5263 return TokError("expected identifier in directive"); 5264 5265 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5266 if (parseToken(AsmToken::Comma, "expected ','")) 5267 return true; 5268 5269 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5270 5271 int64_t Size; 5272 SMLoc SizeLoc = getLoc(); 5273 if (getParser().parseAbsoluteExpression(Size)) 5274 return true; 5275 if (Size < 0) 5276 return Error(SizeLoc, "size must be non-negative"); 5277 if (Size > LocalMemorySize) 5278 return Error(SizeLoc, "size is too large"); 5279 5280 int64_t Alignment = 4; 5281 if (trySkipToken(AsmToken::Comma)) { 5282 SMLoc AlignLoc = getLoc(); 5283 if (getParser().parseAbsoluteExpression(Alignment)) 5284 return true; 5285 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5286 return Error(AlignLoc, "alignment must be a power of two"); 5287 5288 // Alignment larger than the size of LDS is possible in theory, as long 5289 // as the linker manages to place to symbol at address 0, but we do want 5290 // to make sure the alignment fits nicely into a 32-bit integer. 5291 if (Alignment >= 1u << 31) 5292 return Error(AlignLoc, "alignment is too large"); 5293 } 5294 5295 if (parseToken(AsmToken::EndOfStatement, 5296 "unexpected token in '.amdgpu_lds' directive")) 5297 return true; 5298 5299 Symbol->redefineIfPossible(); 5300 if (!Symbol->isUndefined()) 5301 return Error(NameLoc, "invalid symbol redefinition"); 5302 5303 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5304 return false; 5305 } 5306 5307 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5308 StringRef IDVal = DirectiveID.getString(); 5309 5310 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5311 if (IDVal == ".amdhsa_kernel") 5312 return ParseDirectiveAMDHSAKernel(); 5313 5314 // TODO: Restructure/combine with PAL metadata directive. 5315 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5316 return ParseDirectiveHSAMetadata(); 5317 } else { 5318 if (IDVal == ".hsa_code_object_version") 5319 return ParseDirectiveHSACodeObjectVersion(); 5320 5321 if (IDVal == ".hsa_code_object_isa") 5322 return ParseDirectiveHSACodeObjectISA(); 5323 5324 if (IDVal == ".amd_kernel_code_t") 5325 return ParseDirectiveAMDKernelCodeT(); 5326 5327 if (IDVal == ".amdgpu_hsa_kernel") 5328 return ParseDirectiveAMDGPUHsaKernel(); 5329 5330 if (IDVal == ".amd_amdgpu_isa") 5331 return ParseDirectiveISAVersion(); 5332 5333 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5334 return ParseDirectiveHSAMetadata(); 5335 } 5336 5337 if (IDVal == ".amdgcn_target") 5338 return ParseDirectiveAMDGCNTarget(); 5339 5340 if (IDVal == ".amdgpu_lds") 5341 return ParseDirectiveAMDGPULDS(); 5342 5343 if (IDVal == PALMD::AssemblerDirectiveBegin) 5344 return ParseDirectivePALMetadataBegin(); 5345 5346 if (IDVal == PALMD::AssemblerDirective) 5347 return ParseDirectivePALMetadata(); 5348 5349 return true; 5350 } 5351 5352 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5353 unsigned RegNo) { 5354 5355 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5356 return isGFX9Plus(); 5357 5358 // GFX10 has 2 more SGPRs 104 and 105. 5359 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5360 return hasSGPR104_SGPR105(); 5361 5362 switch (RegNo) { 5363 case AMDGPU::SRC_SHARED_BASE: 5364 case AMDGPU::SRC_SHARED_LIMIT: 5365 case AMDGPU::SRC_PRIVATE_BASE: 5366 case AMDGPU::SRC_PRIVATE_LIMIT: 5367 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5368 return isGFX9Plus(); 5369 case AMDGPU::TBA: 5370 case AMDGPU::TBA_LO: 5371 case AMDGPU::TBA_HI: 5372 case AMDGPU::TMA: 5373 case AMDGPU::TMA_LO: 5374 case AMDGPU::TMA_HI: 5375 return !isGFX9Plus(); 5376 case AMDGPU::XNACK_MASK: 5377 case AMDGPU::XNACK_MASK_LO: 5378 case AMDGPU::XNACK_MASK_HI: 5379 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5380 case AMDGPU::SGPR_NULL: 5381 return isGFX10Plus(); 5382 default: 5383 break; 5384 } 5385 5386 if (isCI()) 5387 return true; 5388 5389 if (isSI() || isGFX10Plus()) { 5390 // No flat_scr on SI. 5391 // On GFX10 flat scratch is not a valid register operand and can only be 5392 // accessed with s_setreg/s_getreg. 5393 switch (RegNo) { 5394 case AMDGPU::FLAT_SCR: 5395 case AMDGPU::FLAT_SCR_LO: 5396 case AMDGPU::FLAT_SCR_HI: 5397 return false; 5398 default: 5399 return true; 5400 } 5401 } 5402 5403 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5404 // SI/CI have. 5405 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5406 return hasSGPR102_SGPR103(); 5407 5408 return true; 5409 } 5410 5411 OperandMatchResultTy 5412 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5413 OperandMode Mode) { 5414 // Try to parse with a custom parser 5415 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5416 5417 // If we successfully parsed the operand or if there as an error parsing, 5418 // we are done. 5419 // 5420 // If we are parsing after we reach EndOfStatement then this means we 5421 // are appending default values to the Operands list. This is only done 5422 // by custom parser, so we shouldn't continue on to the generic parsing. 5423 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5424 isToken(AsmToken::EndOfStatement)) 5425 return ResTy; 5426 5427 SMLoc RBraceLoc; 5428 SMLoc LBraceLoc = getLoc(); 5429 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5430 unsigned Prefix = Operands.size(); 5431 5432 for (;;) { 5433 auto Loc = getLoc(); 5434 ResTy = parseReg(Operands); 5435 if (ResTy == MatchOperand_NoMatch) 5436 Error(Loc, "expected a register"); 5437 if (ResTy != MatchOperand_Success) 5438 return MatchOperand_ParseFail; 5439 5440 RBraceLoc = getLoc(); 5441 if (trySkipToken(AsmToken::RBrac)) 5442 break; 5443 5444 if (!skipToken(AsmToken::Comma, 5445 "expected a comma or a closing square bracket")) { 5446 return MatchOperand_ParseFail; 5447 } 5448 } 5449 5450 if (Operands.size() - Prefix > 1) { 5451 Operands.insert(Operands.begin() + Prefix, 5452 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5453 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5454 } 5455 5456 return MatchOperand_Success; 5457 } 5458 5459 return parseRegOrImm(Operands); 5460 } 5461 5462 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5463 // Clear any forced encodings from the previous instruction. 5464 setForcedEncodingSize(0); 5465 setForcedDPP(false); 5466 setForcedSDWA(false); 5467 5468 if (Name.endswith("_e64")) { 5469 setForcedEncodingSize(64); 5470 return Name.substr(0, Name.size() - 4); 5471 } else if (Name.endswith("_e32")) { 5472 setForcedEncodingSize(32); 5473 return Name.substr(0, Name.size() - 4); 5474 } else if (Name.endswith("_dpp")) { 5475 setForcedDPP(true); 5476 return Name.substr(0, Name.size() - 4); 5477 } else if (Name.endswith("_sdwa")) { 5478 setForcedSDWA(true); 5479 return Name.substr(0, Name.size() - 5); 5480 } 5481 return Name; 5482 } 5483 5484 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5485 StringRef Name, 5486 SMLoc NameLoc, OperandVector &Operands) { 5487 // Add the instruction mnemonic 5488 Name = parseMnemonicSuffix(Name); 5489 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5490 5491 bool IsMIMG = Name.startswith("image_"); 5492 5493 while (!trySkipToken(AsmToken::EndOfStatement)) { 5494 OperandMode Mode = OperandMode_Default; 5495 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5496 Mode = OperandMode_NSA; 5497 CPolSeen = 0; 5498 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5499 5500 if (Res != MatchOperand_Success) { 5501 checkUnsupportedInstruction(Name, NameLoc); 5502 if (!Parser.hasPendingError()) { 5503 // FIXME: use real operand location rather than the current location. 5504 StringRef Msg = 5505 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5506 "not a valid operand."; 5507 Error(getLoc(), Msg); 5508 } 5509 while (!trySkipToken(AsmToken::EndOfStatement)) { 5510 lex(); 5511 } 5512 return true; 5513 } 5514 5515 // Eat the comma or space if there is one. 5516 trySkipToken(AsmToken::Comma); 5517 } 5518 5519 return false; 5520 } 5521 5522 //===----------------------------------------------------------------------===// 5523 // Utility functions 5524 //===----------------------------------------------------------------------===// 5525 5526 OperandMatchResultTy 5527 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5528 5529 if (!trySkipId(Prefix, AsmToken::Colon)) 5530 return MatchOperand_NoMatch; 5531 5532 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5533 } 5534 5535 OperandMatchResultTy 5536 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5537 AMDGPUOperand::ImmTy ImmTy, 5538 bool (*ConvertResult)(int64_t&)) { 5539 SMLoc S = getLoc(); 5540 int64_t Value = 0; 5541 5542 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5543 if (Res != MatchOperand_Success) 5544 return Res; 5545 5546 if (ConvertResult && !ConvertResult(Value)) { 5547 Error(S, "invalid " + StringRef(Prefix) + " value."); 5548 } 5549 5550 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5551 return MatchOperand_Success; 5552 } 5553 5554 OperandMatchResultTy 5555 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5556 OperandVector &Operands, 5557 AMDGPUOperand::ImmTy ImmTy, 5558 bool (*ConvertResult)(int64_t&)) { 5559 SMLoc S = getLoc(); 5560 if (!trySkipId(Prefix, AsmToken::Colon)) 5561 return MatchOperand_NoMatch; 5562 5563 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5564 return MatchOperand_ParseFail; 5565 5566 unsigned Val = 0; 5567 const unsigned MaxSize = 4; 5568 5569 // FIXME: How to verify the number of elements matches the number of src 5570 // operands? 5571 for (int I = 0; ; ++I) { 5572 int64_t Op; 5573 SMLoc Loc = getLoc(); 5574 if (!parseExpr(Op)) 5575 return MatchOperand_ParseFail; 5576 5577 if (Op != 0 && Op != 1) { 5578 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5579 return MatchOperand_ParseFail; 5580 } 5581 5582 Val |= (Op << I); 5583 5584 if (trySkipToken(AsmToken::RBrac)) 5585 break; 5586 5587 if (I + 1 == MaxSize) { 5588 Error(getLoc(), "expected a closing square bracket"); 5589 return MatchOperand_ParseFail; 5590 } 5591 5592 if (!skipToken(AsmToken::Comma, "expected a comma")) 5593 return MatchOperand_ParseFail; 5594 } 5595 5596 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5597 return MatchOperand_Success; 5598 } 5599 5600 OperandMatchResultTy 5601 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5602 AMDGPUOperand::ImmTy ImmTy) { 5603 int64_t Bit; 5604 SMLoc S = getLoc(); 5605 5606 if (trySkipId(Name)) { 5607 Bit = 1; 5608 } else if (trySkipId("no", Name)) { 5609 Bit = 0; 5610 } else { 5611 return MatchOperand_NoMatch; 5612 } 5613 5614 if (Name == "r128" && !hasMIMG_R128()) { 5615 Error(S, "r128 modifier is not supported on this GPU"); 5616 return MatchOperand_ParseFail; 5617 } 5618 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5619 Error(S, "a16 modifier is not supported on this GPU"); 5620 return MatchOperand_ParseFail; 5621 } 5622 5623 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5624 ImmTy = AMDGPUOperand::ImmTyR128A16; 5625 5626 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5627 return MatchOperand_Success; 5628 } 5629 5630 OperandMatchResultTy 5631 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5632 unsigned CPolOn = 0; 5633 unsigned CPolOff = 0; 5634 SMLoc S = getLoc(); 5635 5636 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5637 if (isGFX940() && !Mnemo.startswith("s_")) { 5638 if (trySkipId("sc0")) 5639 CPolOn = AMDGPU::CPol::SC0; 5640 else if (trySkipId("nosc0")) 5641 CPolOff = AMDGPU::CPol::SC0; 5642 else if (trySkipId("nt")) 5643 CPolOn = AMDGPU::CPol::NT; 5644 else if (trySkipId("nont")) 5645 CPolOff = AMDGPU::CPol::NT; 5646 else if (trySkipId("sc1")) 5647 CPolOn = AMDGPU::CPol::SC1; 5648 else if (trySkipId("nosc1")) 5649 CPolOff = AMDGPU::CPol::SC1; 5650 else 5651 return MatchOperand_NoMatch; 5652 } 5653 else if (trySkipId("glc")) 5654 CPolOn = AMDGPU::CPol::GLC; 5655 else if (trySkipId("noglc")) 5656 CPolOff = AMDGPU::CPol::GLC; 5657 else if (trySkipId("slc")) 5658 CPolOn = AMDGPU::CPol::SLC; 5659 else if (trySkipId("noslc")) 5660 CPolOff = AMDGPU::CPol::SLC; 5661 else if (trySkipId("dlc")) 5662 CPolOn = AMDGPU::CPol::DLC; 5663 else if (trySkipId("nodlc")) 5664 CPolOff = AMDGPU::CPol::DLC; 5665 else if (trySkipId("scc")) 5666 CPolOn = AMDGPU::CPol::SCC; 5667 else if (trySkipId("noscc")) 5668 CPolOff = AMDGPU::CPol::SCC; 5669 else 5670 return MatchOperand_NoMatch; 5671 5672 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5673 Error(S, "dlc modifier is not supported on this GPU"); 5674 return MatchOperand_ParseFail; 5675 } 5676 5677 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5678 Error(S, "scc modifier is not supported on this GPU"); 5679 return MatchOperand_ParseFail; 5680 } 5681 5682 if (CPolSeen & (CPolOn | CPolOff)) { 5683 Error(S, "duplicate cache policy modifier"); 5684 return MatchOperand_ParseFail; 5685 } 5686 5687 CPolSeen |= (CPolOn | CPolOff); 5688 5689 for (unsigned I = 1; I != Operands.size(); ++I) { 5690 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5691 if (Op.isCPol()) { 5692 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5693 return MatchOperand_Success; 5694 } 5695 } 5696 5697 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5698 AMDGPUOperand::ImmTyCPol)); 5699 5700 return MatchOperand_Success; 5701 } 5702 5703 static void addOptionalImmOperand( 5704 MCInst& Inst, const OperandVector& Operands, 5705 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5706 AMDGPUOperand::ImmTy ImmT, 5707 int64_t Default = 0) { 5708 auto i = OptionalIdx.find(ImmT); 5709 if (i != OptionalIdx.end()) { 5710 unsigned Idx = i->second; 5711 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5712 } else { 5713 Inst.addOperand(MCOperand::createImm(Default)); 5714 } 5715 } 5716 5717 OperandMatchResultTy 5718 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5719 StringRef &Value, 5720 SMLoc &StringLoc) { 5721 if (!trySkipId(Prefix, AsmToken::Colon)) 5722 return MatchOperand_NoMatch; 5723 5724 StringLoc = getLoc(); 5725 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5726 : MatchOperand_ParseFail; 5727 } 5728 5729 //===----------------------------------------------------------------------===// 5730 // MTBUF format 5731 //===----------------------------------------------------------------------===// 5732 5733 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5734 int64_t MaxVal, 5735 int64_t &Fmt) { 5736 int64_t Val; 5737 SMLoc Loc = getLoc(); 5738 5739 auto Res = parseIntWithPrefix(Pref, Val); 5740 if (Res == MatchOperand_ParseFail) 5741 return false; 5742 if (Res == MatchOperand_NoMatch) 5743 return true; 5744 5745 if (Val < 0 || Val > MaxVal) { 5746 Error(Loc, Twine("out of range ", StringRef(Pref))); 5747 return false; 5748 } 5749 5750 Fmt = Val; 5751 return true; 5752 } 5753 5754 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5755 // values to live in a joint format operand in the MCInst encoding. 5756 OperandMatchResultTy 5757 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5758 using namespace llvm::AMDGPU::MTBUFFormat; 5759 5760 int64_t Dfmt = DFMT_UNDEF; 5761 int64_t Nfmt = NFMT_UNDEF; 5762 5763 // dfmt and nfmt can appear in either order, and each is optional. 5764 for (int I = 0; I < 2; ++I) { 5765 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5766 return MatchOperand_ParseFail; 5767 5768 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5769 return MatchOperand_ParseFail; 5770 } 5771 // Skip optional comma between dfmt/nfmt 5772 // but guard against 2 commas following each other. 5773 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5774 !peekToken().is(AsmToken::Comma)) { 5775 trySkipToken(AsmToken::Comma); 5776 } 5777 } 5778 5779 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5780 return MatchOperand_NoMatch; 5781 5782 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5783 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5784 5785 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5786 return MatchOperand_Success; 5787 } 5788 5789 OperandMatchResultTy 5790 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5791 using namespace llvm::AMDGPU::MTBUFFormat; 5792 5793 int64_t Fmt = UFMT_UNDEF; 5794 5795 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5796 return MatchOperand_ParseFail; 5797 5798 if (Fmt == UFMT_UNDEF) 5799 return MatchOperand_NoMatch; 5800 5801 Format = Fmt; 5802 return MatchOperand_Success; 5803 } 5804 5805 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5806 int64_t &Nfmt, 5807 StringRef FormatStr, 5808 SMLoc Loc) { 5809 using namespace llvm::AMDGPU::MTBUFFormat; 5810 int64_t Format; 5811 5812 Format = getDfmt(FormatStr); 5813 if (Format != DFMT_UNDEF) { 5814 Dfmt = Format; 5815 return true; 5816 } 5817 5818 Format = getNfmt(FormatStr, getSTI()); 5819 if (Format != NFMT_UNDEF) { 5820 Nfmt = Format; 5821 return true; 5822 } 5823 5824 Error(Loc, "unsupported format"); 5825 return false; 5826 } 5827 5828 OperandMatchResultTy 5829 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5830 SMLoc FormatLoc, 5831 int64_t &Format) { 5832 using namespace llvm::AMDGPU::MTBUFFormat; 5833 5834 int64_t Dfmt = DFMT_UNDEF; 5835 int64_t Nfmt = NFMT_UNDEF; 5836 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5837 return MatchOperand_ParseFail; 5838 5839 if (trySkipToken(AsmToken::Comma)) { 5840 StringRef Str; 5841 SMLoc Loc = getLoc(); 5842 if (!parseId(Str, "expected a format string") || 5843 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5844 return MatchOperand_ParseFail; 5845 } 5846 if (Dfmt == DFMT_UNDEF) { 5847 Error(Loc, "duplicate numeric format"); 5848 return MatchOperand_ParseFail; 5849 } else if (Nfmt == NFMT_UNDEF) { 5850 Error(Loc, "duplicate data format"); 5851 return MatchOperand_ParseFail; 5852 } 5853 } 5854 5855 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5856 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5857 5858 if (isGFX10Plus()) { 5859 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5860 if (Ufmt == UFMT_UNDEF) { 5861 Error(FormatLoc, "unsupported format"); 5862 return MatchOperand_ParseFail; 5863 } 5864 Format = Ufmt; 5865 } else { 5866 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5867 } 5868 5869 return MatchOperand_Success; 5870 } 5871 5872 OperandMatchResultTy 5873 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5874 SMLoc Loc, 5875 int64_t &Format) { 5876 using namespace llvm::AMDGPU::MTBUFFormat; 5877 5878 auto Id = getUnifiedFormat(FormatStr); 5879 if (Id == UFMT_UNDEF) 5880 return MatchOperand_NoMatch; 5881 5882 if (!isGFX10Plus()) { 5883 Error(Loc, "unified format is not supported on this GPU"); 5884 return MatchOperand_ParseFail; 5885 } 5886 5887 Format = Id; 5888 return MatchOperand_Success; 5889 } 5890 5891 OperandMatchResultTy 5892 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5893 using namespace llvm::AMDGPU::MTBUFFormat; 5894 SMLoc Loc = getLoc(); 5895 5896 if (!parseExpr(Format)) 5897 return MatchOperand_ParseFail; 5898 if (!isValidFormatEncoding(Format, getSTI())) { 5899 Error(Loc, "out of range format"); 5900 return MatchOperand_ParseFail; 5901 } 5902 5903 return MatchOperand_Success; 5904 } 5905 5906 OperandMatchResultTy 5907 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5908 using namespace llvm::AMDGPU::MTBUFFormat; 5909 5910 if (!trySkipId("format", AsmToken::Colon)) 5911 return MatchOperand_NoMatch; 5912 5913 if (trySkipToken(AsmToken::LBrac)) { 5914 StringRef FormatStr; 5915 SMLoc Loc = getLoc(); 5916 if (!parseId(FormatStr, "expected a format string")) 5917 return MatchOperand_ParseFail; 5918 5919 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5920 if (Res == MatchOperand_NoMatch) 5921 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5922 if (Res != MatchOperand_Success) 5923 return Res; 5924 5925 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5926 return MatchOperand_ParseFail; 5927 5928 return MatchOperand_Success; 5929 } 5930 5931 return parseNumericFormat(Format); 5932 } 5933 5934 OperandMatchResultTy 5935 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5936 using namespace llvm::AMDGPU::MTBUFFormat; 5937 5938 int64_t Format = getDefaultFormatEncoding(getSTI()); 5939 OperandMatchResultTy Res; 5940 SMLoc Loc = getLoc(); 5941 5942 // Parse legacy format syntax. 5943 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5944 if (Res == MatchOperand_ParseFail) 5945 return Res; 5946 5947 bool FormatFound = (Res == MatchOperand_Success); 5948 5949 Operands.push_back( 5950 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5951 5952 if (FormatFound) 5953 trySkipToken(AsmToken::Comma); 5954 5955 if (isToken(AsmToken::EndOfStatement)) { 5956 // We are expecting an soffset operand, 5957 // but let matcher handle the error. 5958 return MatchOperand_Success; 5959 } 5960 5961 // Parse soffset. 5962 Res = parseRegOrImm(Operands); 5963 if (Res != MatchOperand_Success) 5964 return Res; 5965 5966 trySkipToken(AsmToken::Comma); 5967 5968 if (!FormatFound) { 5969 Res = parseSymbolicOrNumericFormat(Format); 5970 if (Res == MatchOperand_ParseFail) 5971 return Res; 5972 if (Res == MatchOperand_Success) { 5973 auto Size = Operands.size(); 5974 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5975 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5976 Op.setImm(Format); 5977 } 5978 return MatchOperand_Success; 5979 } 5980 5981 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5982 Error(getLoc(), "duplicate format"); 5983 return MatchOperand_ParseFail; 5984 } 5985 return MatchOperand_Success; 5986 } 5987 5988 //===----------------------------------------------------------------------===// 5989 // ds 5990 //===----------------------------------------------------------------------===// 5991 5992 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5993 const OperandVector &Operands) { 5994 OptionalImmIndexMap OptionalIdx; 5995 5996 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5998 5999 // Add the register arguments 6000 if (Op.isReg()) { 6001 Op.addRegOperands(Inst, 1); 6002 continue; 6003 } 6004 6005 // Handle optional arguments 6006 OptionalIdx[Op.getImmTy()] = i; 6007 } 6008 6009 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6012 6013 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6014 } 6015 6016 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6017 bool IsGdsHardcoded) { 6018 OptionalImmIndexMap OptionalIdx; 6019 6020 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6021 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6022 6023 // Add the register arguments 6024 if (Op.isReg()) { 6025 Op.addRegOperands(Inst, 1); 6026 continue; 6027 } 6028 6029 if (Op.isToken() && Op.getToken() == "gds") { 6030 IsGdsHardcoded = true; 6031 continue; 6032 } 6033 6034 // Handle optional arguments 6035 OptionalIdx[Op.getImmTy()] = i; 6036 } 6037 6038 AMDGPUOperand::ImmTy OffsetType = 6039 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6040 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6041 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6042 AMDGPUOperand::ImmTyOffset; 6043 6044 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6045 6046 if (!IsGdsHardcoded) { 6047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6048 } 6049 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6050 } 6051 6052 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6053 OptionalImmIndexMap OptionalIdx; 6054 6055 unsigned OperandIdx[4]; 6056 unsigned EnMask = 0; 6057 int SrcIdx = 0; 6058 6059 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6060 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6061 6062 // Add the register arguments 6063 if (Op.isReg()) { 6064 assert(SrcIdx < 4); 6065 OperandIdx[SrcIdx] = Inst.size(); 6066 Op.addRegOperands(Inst, 1); 6067 ++SrcIdx; 6068 continue; 6069 } 6070 6071 if (Op.isOff()) { 6072 assert(SrcIdx < 4); 6073 OperandIdx[SrcIdx] = Inst.size(); 6074 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6075 ++SrcIdx; 6076 continue; 6077 } 6078 6079 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6080 Op.addImmOperands(Inst, 1); 6081 continue; 6082 } 6083 6084 if (Op.isToken() && Op.getToken() == "done") 6085 continue; 6086 6087 // Handle optional arguments 6088 OptionalIdx[Op.getImmTy()] = i; 6089 } 6090 6091 assert(SrcIdx == 4); 6092 6093 bool Compr = false; 6094 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6095 Compr = true; 6096 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6097 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6098 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6099 } 6100 6101 for (auto i = 0; i < SrcIdx; ++i) { 6102 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6103 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6104 } 6105 } 6106 6107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6109 6110 Inst.addOperand(MCOperand::createImm(EnMask)); 6111 } 6112 6113 //===----------------------------------------------------------------------===// 6114 // s_waitcnt 6115 //===----------------------------------------------------------------------===// 6116 6117 static bool 6118 encodeCnt( 6119 const AMDGPU::IsaVersion ISA, 6120 int64_t &IntVal, 6121 int64_t CntVal, 6122 bool Saturate, 6123 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6124 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6125 { 6126 bool Failed = false; 6127 6128 IntVal = encode(ISA, IntVal, CntVal); 6129 if (CntVal != decode(ISA, IntVal)) { 6130 if (Saturate) { 6131 IntVal = encode(ISA, IntVal, -1); 6132 } else { 6133 Failed = true; 6134 } 6135 } 6136 return Failed; 6137 } 6138 6139 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6140 6141 SMLoc CntLoc = getLoc(); 6142 StringRef CntName = getTokenStr(); 6143 6144 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6145 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6146 return false; 6147 6148 int64_t CntVal; 6149 SMLoc ValLoc = getLoc(); 6150 if (!parseExpr(CntVal)) 6151 return false; 6152 6153 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6154 6155 bool Failed = true; 6156 bool Sat = CntName.endswith("_sat"); 6157 6158 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6159 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6160 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6161 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6162 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6163 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6164 } else { 6165 Error(CntLoc, "invalid counter name " + CntName); 6166 return false; 6167 } 6168 6169 if (Failed) { 6170 Error(ValLoc, "too large value for " + CntName); 6171 return false; 6172 } 6173 6174 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6175 return false; 6176 6177 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6178 if (isToken(AsmToken::EndOfStatement)) { 6179 Error(getLoc(), "expected a counter name"); 6180 return false; 6181 } 6182 } 6183 6184 return true; 6185 } 6186 6187 OperandMatchResultTy 6188 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6189 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6190 int64_t Waitcnt = getWaitcntBitMask(ISA); 6191 SMLoc S = getLoc(); 6192 6193 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6194 while (!isToken(AsmToken::EndOfStatement)) { 6195 if (!parseCnt(Waitcnt)) 6196 return MatchOperand_ParseFail; 6197 } 6198 } else { 6199 if (!parseExpr(Waitcnt)) 6200 return MatchOperand_ParseFail; 6201 } 6202 6203 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6204 return MatchOperand_Success; 6205 } 6206 6207 bool 6208 AMDGPUOperand::isSWaitCnt() const { 6209 return isImm(); 6210 } 6211 6212 //===----------------------------------------------------------------------===// 6213 // hwreg 6214 //===----------------------------------------------------------------------===// 6215 6216 bool 6217 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6218 OperandInfoTy &Offset, 6219 OperandInfoTy &Width) { 6220 using namespace llvm::AMDGPU::Hwreg; 6221 6222 // The register may be specified by name or using a numeric code 6223 HwReg.Loc = getLoc(); 6224 if (isToken(AsmToken::Identifier) && 6225 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) { 6226 HwReg.IsSymbolic = true; 6227 lex(); // skip register name 6228 } else if (!parseExpr(HwReg.Id, "a register name")) { 6229 return false; 6230 } 6231 6232 if (trySkipToken(AsmToken::RParen)) 6233 return true; 6234 6235 // parse optional params 6236 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6237 return false; 6238 6239 Offset.Loc = getLoc(); 6240 if (!parseExpr(Offset.Id)) 6241 return false; 6242 6243 if (!skipToken(AsmToken::Comma, "expected a comma")) 6244 return false; 6245 6246 Width.Loc = getLoc(); 6247 return parseExpr(Width.Id) && 6248 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6249 } 6250 6251 bool 6252 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6253 const OperandInfoTy &Offset, 6254 const OperandInfoTy &Width) { 6255 6256 using namespace llvm::AMDGPU::Hwreg; 6257 6258 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6259 Error(HwReg.Loc, 6260 "specified hardware register is not supported on this GPU"); 6261 return false; 6262 } 6263 if (!isValidHwreg(HwReg.Id)) { 6264 Error(HwReg.Loc, 6265 "invalid code of hardware register: only 6-bit values are legal"); 6266 return false; 6267 } 6268 if (!isValidHwregOffset(Offset.Id)) { 6269 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6270 return false; 6271 } 6272 if (!isValidHwregWidth(Width.Id)) { 6273 Error(Width.Loc, 6274 "invalid bitfield width: only values from 1 to 32 are legal"); 6275 return false; 6276 } 6277 return true; 6278 } 6279 6280 OperandMatchResultTy 6281 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6282 using namespace llvm::AMDGPU::Hwreg; 6283 6284 int64_t ImmVal = 0; 6285 SMLoc Loc = getLoc(); 6286 6287 if (trySkipId("hwreg", AsmToken::LParen)) { 6288 OperandInfoTy HwReg(ID_UNKNOWN_); 6289 OperandInfoTy Offset(OFFSET_DEFAULT_); 6290 OperandInfoTy Width(WIDTH_DEFAULT_); 6291 if (parseHwregBody(HwReg, Offset, Width) && 6292 validateHwreg(HwReg, Offset, Width)) { 6293 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6294 } else { 6295 return MatchOperand_ParseFail; 6296 } 6297 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6298 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6299 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6300 return MatchOperand_ParseFail; 6301 } 6302 } else { 6303 return MatchOperand_ParseFail; 6304 } 6305 6306 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6307 return MatchOperand_Success; 6308 } 6309 6310 bool AMDGPUOperand::isHwreg() const { 6311 return isImmTy(ImmTyHwreg); 6312 } 6313 6314 //===----------------------------------------------------------------------===// 6315 // sendmsg 6316 //===----------------------------------------------------------------------===// 6317 6318 bool 6319 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6320 OperandInfoTy &Op, 6321 OperandInfoTy &Stream) { 6322 using namespace llvm::AMDGPU::SendMsg; 6323 6324 Msg.Loc = getLoc(); 6325 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6326 Msg.IsSymbolic = true; 6327 lex(); // skip message name 6328 } else if (!parseExpr(Msg.Id, "a message name")) { 6329 return false; 6330 } 6331 6332 if (trySkipToken(AsmToken::Comma)) { 6333 Op.IsDefined = true; 6334 Op.Loc = getLoc(); 6335 if (isToken(AsmToken::Identifier) && 6336 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6337 lex(); // skip operation name 6338 } else if (!parseExpr(Op.Id, "an operation name")) { 6339 return false; 6340 } 6341 6342 if (trySkipToken(AsmToken::Comma)) { 6343 Stream.IsDefined = true; 6344 Stream.Loc = getLoc(); 6345 if (!parseExpr(Stream.Id)) 6346 return false; 6347 } 6348 } 6349 6350 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6351 } 6352 6353 bool 6354 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6355 const OperandInfoTy &Op, 6356 const OperandInfoTy &Stream) { 6357 using namespace llvm::AMDGPU::SendMsg; 6358 6359 // Validation strictness depends on whether message is specified 6360 // in a symbolic or in a numeric form. In the latter case 6361 // only encoding possibility is checked. 6362 bool Strict = Msg.IsSymbolic; 6363 6364 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6365 Error(Msg.Loc, "invalid message id"); 6366 return false; 6367 } 6368 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6369 if (Op.IsDefined) { 6370 Error(Op.Loc, "message does not support operations"); 6371 } else { 6372 Error(Msg.Loc, "missing message operation"); 6373 } 6374 return false; 6375 } 6376 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6377 Error(Op.Loc, "invalid operation id"); 6378 return false; 6379 } 6380 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6381 Error(Stream.Loc, "message operation does not support streams"); 6382 return false; 6383 } 6384 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6385 Error(Stream.Loc, "invalid message stream id"); 6386 return false; 6387 } 6388 return true; 6389 } 6390 6391 OperandMatchResultTy 6392 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6393 using namespace llvm::AMDGPU::SendMsg; 6394 6395 int64_t ImmVal = 0; 6396 SMLoc Loc = getLoc(); 6397 6398 if (trySkipId("sendmsg", AsmToken::LParen)) { 6399 OperandInfoTy Msg(ID_UNKNOWN_); 6400 OperandInfoTy Op(OP_NONE_); 6401 OperandInfoTy Stream(STREAM_ID_NONE_); 6402 if (parseSendMsgBody(Msg, Op, Stream) && 6403 validateSendMsg(Msg, Op, Stream)) { 6404 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6405 } else { 6406 return MatchOperand_ParseFail; 6407 } 6408 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6409 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6410 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6411 return MatchOperand_ParseFail; 6412 } 6413 } else { 6414 return MatchOperand_ParseFail; 6415 } 6416 6417 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6418 return MatchOperand_Success; 6419 } 6420 6421 bool AMDGPUOperand::isSendMsg() const { 6422 return isImmTy(ImmTySendMsg); 6423 } 6424 6425 //===----------------------------------------------------------------------===// 6426 // v_interp 6427 //===----------------------------------------------------------------------===// 6428 6429 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6430 StringRef Str; 6431 SMLoc S = getLoc(); 6432 6433 if (!parseId(Str)) 6434 return MatchOperand_NoMatch; 6435 6436 int Slot = StringSwitch<int>(Str) 6437 .Case("p10", 0) 6438 .Case("p20", 1) 6439 .Case("p0", 2) 6440 .Default(-1); 6441 6442 if (Slot == -1) { 6443 Error(S, "invalid interpolation slot"); 6444 return MatchOperand_ParseFail; 6445 } 6446 6447 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6448 AMDGPUOperand::ImmTyInterpSlot)); 6449 return MatchOperand_Success; 6450 } 6451 6452 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6453 StringRef Str; 6454 SMLoc S = getLoc(); 6455 6456 if (!parseId(Str)) 6457 return MatchOperand_NoMatch; 6458 6459 if (!Str.startswith("attr")) { 6460 Error(S, "invalid interpolation attribute"); 6461 return MatchOperand_ParseFail; 6462 } 6463 6464 StringRef Chan = Str.take_back(2); 6465 int AttrChan = StringSwitch<int>(Chan) 6466 .Case(".x", 0) 6467 .Case(".y", 1) 6468 .Case(".z", 2) 6469 .Case(".w", 3) 6470 .Default(-1); 6471 if (AttrChan == -1) { 6472 Error(S, "invalid or missing interpolation attribute channel"); 6473 return MatchOperand_ParseFail; 6474 } 6475 6476 Str = Str.drop_back(2).drop_front(4); 6477 6478 uint8_t Attr; 6479 if (Str.getAsInteger(10, Attr)) { 6480 Error(S, "invalid or missing interpolation attribute number"); 6481 return MatchOperand_ParseFail; 6482 } 6483 6484 if (Attr > 63) { 6485 Error(S, "out of bounds interpolation attribute number"); 6486 return MatchOperand_ParseFail; 6487 } 6488 6489 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6490 6491 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6492 AMDGPUOperand::ImmTyInterpAttr)); 6493 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6494 AMDGPUOperand::ImmTyAttrChan)); 6495 return MatchOperand_Success; 6496 } 6497 6498 //===----------------------------------------------------------------------===// 6499 // exp 6500 //===----------------------------------------------------------------------===// 6501 6502 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6503 using namespace llvm::AMDGPU::Exp; 6504 6505 StringRef Str; 6506 SMLoc S = getLoc(); 6507 6508 if (!parseId(Str)) 6509 return MatchOperand_NoMatch; 6510 6511 unsigned Id = getTgtId(Str); 6512 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6513 Error(S, (Id == ET_INVALID) ? 6514 "invalid exp target" : 6515 "exp target is not supported on this GPU"); 6516 return MatchOperand_ParseFail; 6517 } 6518 6519 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6520 AMDGPUOperand::ImmTyExpTgt)); 6521 return MatchOperand_Success; 6522 } 6523 6524 //===----------------------------------------------------------------------===// 6525 // parser helpers 6526 //===----------------------------------------------------------------------===// 6527 6528 bool 6529 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6530 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6531 } 6532 6533 bool 6534 AMDGPUAsmParser::isId(const StringRef Id) const { 6535 return isId(getToken(), Id); 6536 } 6537 6538 bool 6539 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6540 return getTokenKind() == Kind; 6541 } 6542 6543 bool 6544 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6545 if (isId(Id)) { 6546 lex(); 6547 return true; 6548 } 6549 return false; 6550 } 6551 6552 bool 6553 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6554 if (isToken(AsmToken::Identifier)) { 6555 StringRef Tok = getTokenStr(); 6556 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6557 lex(); 6558 return true; 6559 } 6560 } 6561 return false; 6562 } 6563 6564 bool 6565 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6566 if (isId(Id) && peekToken().is(Kind)) { 6567 lex(); 6568 lex(); 6569 return true; 6570 } 6571 return false; 6572 } 6573 6574 bool 6575 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6576 if (isToken(Kind)) { 6577 lex(); 6578 return true; 6579 } 6580 return false; 6581 } 6582 6583 bool 6584 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6585 const StringRef ErrMsg) { 6586 if (!trySkipToken(Kind)) { 6587 Error(getLoc(), ErrMsg); 6588 return false; 6589 } 6590 return true; 6591 } 6592 6593 bool 6594 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6595 SMLoc S = getLoc(); 6596 6597 const MCExpr *Expr; 6598 if (Parser.parseExpression(Expr)) 6599 return false; 6600 6601 if (Expr->evaluateAsAbsolute(Imm)) 6602 return true; 6603 6604 if (Expected.empty()) { 6605 Error(S, "expected absolute expression"); 6606 } else { 6607 Error(S, Twine("expected ", Expected) + 6608 Twine(" or an absolute expression")); 6609 } 6610 return false; 6611 } 6612 6613 bool 6614 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6615 SMLoc S = getLoc(); 6616 6617 const MCExpr *Expr; 6618 if (Parser.parseExpression(Expr)) 6619 return false; 6620 6621 int64_t IntVal; 6622 if (Expr->evaluateAsAbsolute(IntVal)) { 6623 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6624 } else { 6625 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6626 } 6627 return true; 6628 } 6629 6630 bool 6631 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6632 if (isToken(AsmToken::String)) { 6633 Val = getToken().getStringContents(); 6634 lex(); 6635 return true; 6636 } else { 6637 Error(getLoc(), ErrMsg); 6638 return false; 6639 } 6640 } 6641 6642 bool 6643 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6644 if (isToken(AsmToken::Identifier)) { 6645 Val = getTokenStr(); 6646 lex(); 6647 return true; 6648 } else { 6649 if (!ErrMsg.empty()) 6650 Error(getLoc(), ErrMsg); 6651 return false; 6652 } 6653 } 6654 6655 AsmToken 6656 AMDGPUAsmParser::getToken() const { 6657 return Parser.getTok(); 6658 } 6659 6660 AsmToken 6661 AMDGPUAsmParser::peekToken() { 6662 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6663 } 6664 6665 void 6666 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6667 auto TokCount = getLexer().peekTokens(Tokens); 6668 6669 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6670 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6671 } 6672 6673 AsmToken::TokenKind 6674 AMDGPUAsmParser::getTokenKind() const { 6675 return getLexer().getKind(); 6676 } 6677 6678 SMLoc 6679 AMDGPUAsmParser::getLoc() const { 6680 return getToken().getLoc(); 6681 } 6682 6683 StringRef 6684 AMDGPUAsmParser::getTokenStr() const { 6685 return getToken().getString(); 6686 } 6687 6688 void 6689 AMDGPUAsmParser::lex() { 6690 Parser.Lex(); 6691 } 6692 6693 SMLoc 6694 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6695 const OperandVector &Operands) const { 6696 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6697 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6698 if (Test(Op)) 6699 return Op.getStartLoc(); 6700 } 6701 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6702 } 6703 6704 SMLoc 6705 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6706 const OperandVector &Operands) const { 6707 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6708 return getOperandLoc(Test, Operands); 6709 } 6710 6711 SMLoc 6712 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6713 const OperandVector &Operands) const { 6714 auto Test = [=](const AMDGPUOperand& Op) { 6715 return Op.isRegKind() && Op.getReg() == Reg; 6716 }; 6717 return getOperandLoc(Test, Operands); 6718 } 6719 6720 SMLoc 6721 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6722 auto Test = [](const AMDGPUOperand& Op) { 6723 return Op.IsImmKindLiteral() || Op.isExpr(); 6724 }; 6725 return getOperandLoc(Test, Operands); 6726 } 6727 6728 SMLoc 6729 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6730 auto Test = [](const AMDGPUOperand& Op) { 6731 return Op.isImmKindConst(); 6732 }; 6733 return getOperandLoc(Test, Operands); 6734 } 6735 6736 //===----------------------------------------------------------------------===// 6737 // swizzle 6738 //===----------------------------------------------------------------------===// 6739 6740 LLVM_READNONE 6741 static unsigned 6742 encodeBitmaskPerm(const unsigned AndMask, 6743 const unsigned OrMask, 6744 const unsigned XorMask) { 6745 using namespace llvm::AMDGPU::Swizzle; 6746 6747 return BITMASK_PERM_ENC | 6748 (AndMask << BITMASK_AND_SHIFT) | 6749 (OrMask << BITMASK_OR_SHIFT) | 6750 (XorMask << BITMASK_XOR_SHIFT); 6751 } 6752 6753 bool 6754 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6755 const unsigned MinVal, 6756 const unsigned MaxVal, 6757 const StringRef ErrMsg, 6758 SMLoc &Loc) { 6759 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6760 return false; 6761 } 6762 Loc = getLoc(); 6763 if (!parseExpr(Op)) { 6764 return false; 6765 } 6766 if (Op < MinVal || Op > MaxVal) { 6767 Error(Loc, ErrMsg); 6768 return false; 6769 } 6770 6771 return true; 6772 } 6773 6774 bool 6775 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6776 const unsigned MinVal, 6777 const unsigned MaxVal, 6778 const StringRef ErrMsg) { 6779 SMLoc Loc; 6780 for (unsigned i = 0; i < OpNum; ++i) { 6781 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6782 return false; 6783 } 6784 6785 return true; 6786 } 6787 6788 bool 6789 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6790 using namespace llvm::AMDGPU::Swizzle; 6791 6792 int64_t Lane[LANE_NUM]; 6793 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6794 "expected a 2-bit lane id")) { 6795 Imm = QUAD_PERM_ENC; 6796 for (unsigned I = 0; I < LANE_NUM; ++I) { 6797 Imm |= Lane[I] << (LANE_SHIFT * I); 6798 } 6799 return true; 6800 } 6801 return false; 6802 } 6803 6804 bool 6805 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6806 using namespace llvm::AMDGPU::Swizzle; 6807 6808 SMLoc Loc; 6809 int64_t GroupSize; 6810 int64_t LaneIdx; 6811 6812 if (!parseSwizzleOperand(GroupSize, 6813 2, 32, 6814 "group size must be in the interval [2,32]", 6815 Loc)) { 6816 return false; 6817 } 6818 if (!isPowerOf2_64(GroupSize)) { 6819 Error(Loc, "group size must be a power of two"); 6820 return false; 6821 } 6822 if (parseSwizzleOperand(LaneIdx, 6823 0, GroupSize - 1, 6824 "lane id must be in the interval [0,group size - 1]", 6825 Loc)) { 6826 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6827 return true; 6828 } 6829 return false; 6830 } 6831 6832 bool 6833 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6834 using namespace llvm::AMDGPU::Swizzle; 6835 6836 SMLoc Loc; 6837 int64_t GroupSize; 6838 6839 if (!parseSwizzleOperand(GroupSize, 6840 2, 32, 6841 "group size must be in the interval [2,32]", 6842 Loc)) { 6843 return false; 6844 } 6845 if (!isPowerOf2_64(GroupSize)) { 6846 Error(Loc, "group size must be a power of two"); 6847 return false; 6848 } 6849 6850 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6851 return true; 6852 } 6853 6854 bool 6855 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6856 using namespace llvm::AMDGPU::Swizzle; 6857 6858 SMLoc Loc; 6859 int64_t GroupSize; 6860 6861 if (!parseSwizzleOperand(GroupSize, 6862 1, 16, 6863 "group size must be in the interval [1,16]", 6864 Loc)) { 6865 return false; 6866 } 6867 if (!isPowerOf2_64(GroupSize)) { 6868 Error(Loc, "group size must be a power of two"); 6869 return false; 6870 } 6871 6872 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6873 return true; 6874 } 6875 6876 bool 6877 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6878 using namespace llvm::AMDGPU::Swizzle; 6879 6880 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6881 return false; 6882 } 6883 6884 StringRef Ctl; 6885 SMLoc StrLoc = getLoc(); 6886 if (!parseString(Ctl)) { 6887 return false; 6888 } 6889 if (Ctl.size() != BITMASK_WIDTH) { 6890 Error(StrLoc, "expected a 5-character mask"); 6891 return false; 6892 } 6893 6894 unsigned AndMask = 0; 6895 unsigned OrMask = 0; 6896 unsigned XorMask = 0; 6897 6898 for (size_t i = 0; i < Ctl.size(); ++i) { 6899 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6900 switch(Ctl[i]) { 6901 default: 6902 Error(StrLoc, "invalid mask"); 6903 return false; 6904 case '0': 6905 break; 6906 case '1': 6907 OrMask |= Mask; 6908 break; 6909 case 'p': 6910 AndMask |= Mask; 6911 break; 6912 case 'i': 6913 AndMask |= Mask; 6914 XorMask |= Mask; 6915 break; 6916 } 6917 } 6918 6919 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6920 return true; 6921 } 6922 6923 bool 6924 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6925 6926 SMLoc OffsetLoc = getLoc(); 6927 6928 if (!parseExpr(Imm, "a swizzle macro")) { 6929 return false; 6930 } 6931 if (!isUInt<16>(Imm)) { 6932 Error(OffsetLoc, "expected a 16-bit offset"); 6933 return false; 6934 } 6935 return true; 6936 } 6937 6938 bool 6939 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6940 using namespace llvm::AMDGPU::Swizzle; 6941 6942 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6943 6944 SMLoc ModeLoc = getLoc(); 6945 bool Ok = false; 6946 6947 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6948 Ok = parseSwizzleQuadPerm(Imm); 6949 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6950 Ok = parseSwizzleBitmaskPerm(Imm); 6951 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6952 Ok = parseSwizzleBroadcast(Imm); 6953 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6954 Ok = parseSwizzleSwap(Imm); 6955 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6956 Ok = parseSwizzleReverse(Imm); 6957 } else { 6958 Error(ModeLoc, "expected a swizzle mode"); 6959 } 6960 6961 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6962 } 6963 6964 return false; 6965 } 6966 6967 OperandMatchResultTy 6968 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6969 SMLoc S = getLoc(); 6970 int64_t Imm = 0; 6971 6972 if (trySkipId("offset")) { 6973 6974 bool Ok = false; 6975 if (skipToken(AsmToken::Colon, "expected a colon")) { 6976 if (trySkipId("swizzle")) { 6977 Ok = parseSwizzleMacro(Imm); 6978 } else { 6979 Ok = parseSwizzleOffset(Imm); 6980 } 6981 } 6982 6983 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6984 6985 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6986 } else { 6987 // Swizzle "offset" operand is optional. 6988 // If it is omitted, try parsing other optional operands. 6989 return parseOptionalOpr(Operands); 6990 } 6991 } 6992 6993 bool 6994 AMDGPUOperand::isSwizzle() const { 6995 return isImmTy(ImmTySwizzle); 6996 } 6997 6998 //===----------------------------------------------------------------------===// 6999 // VGPR Index Mode 7000 //===----------------------------------------------------------------------===// 7001 7002 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7003 7004 using namespace llvm::AMDGPU::VGPRIndexMode; 7005 7006 if (trySkipToken(AsmToken::RParen)) { 7007 return OFF; 7008 } 7009 7010 int64_t Imm = 0; 7011 7012 while (true) { 7013 unsigned Mode = 0; 7014 SMLoc S = getLoc(); 7015 7016 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7017 if (trySkipId(IdSymbolic[ModeId])) { 7018 Mode = 1 << ModeId; 7019 break; 7020 } 7021 } 7022 7023 if (Mode == 0) { 7024 Error(S, (Imm == 0)? 7025 "expected a VGPR index mode or a closing parenthesis" : 7026 "expected a VGPR index mode"); 7027 return UNDEF; 7028 } 7029 7030 if (Imm & Mode) { 7031 Error(S, "duplicate VGPR index mode"); 7032 return UNDEF; 7033 } 7034 Imm |= Mode; 7035 7036 if (trySkipToken(AsmToken::RParen)) 7037 break; 7038 if (!skipToken(AsmToken::Comma, 7039 "expected a comma or a closing parenthesis")) 7040 return UNDEF; 7041 } 7042 7043 return Imm; 7044 } 7045 7046 OperandMatchResultTy 7047 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7048 7049 using namespace llvm::AMDGPU::VGPRIndexMode; 7050 7051 int64_t Imm = 0; 7052 SMLoc S = getLoc(); 7053 7054 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7055 Imm = parseGPRIdxMacro(); 7056 if (Imm == UNDEF) 7057 return MatchOperand_ParseFail; 7058 } else { 7059 if (getParser().parseAbsoluteExpression(Imm)) 7060 return MatchOperand_ParseFail; 7061 if (Imm < 0 || !isUInt<4>(Imm)) { 7062 Error(S, "invalid immediate: only 4-bit values are legal"); 7063 return MatchOperand_ParseFail; 7064 } 7065 } 7066 7067 Operands.push_back( 7068 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7069 return MatchOperand_Success; 7070 } 7071 7072 bool AMDGPUOperand::isGPRIdxMode() const { 7073 return isImmTy(ImmTyGprIdxMode); 7074 } 7075 7076 //===----------------------------------------------------------------------===// 7077 // sopp branch targets 7078 //===----------------------------------------------------------------------===// 7079 7080 OperandMatchResultTy 7081 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7082 7083 // Make sure we are not parsing something 7084 // that looks like a label or an expression but is not. 7085 // This will improve error messages. 7086 if (isRegister() || isModifier()) 7087 return MatchOperand_NoMatch; 7088 7089 if (!parseExpr(Operands)) 7090 return MatchOperand_ParseFail; 7091 7092 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7093 assert(Opr.isImm() || Opr.isExpr()); 7094 SMLoc Loc = Opr.getStartLoc(); 7095 7096 // Currently we do not support arbitrary expressions as branch targets. 7097 // Only labels and absolute expressions are accepted. 7098 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7099 Error(Loc, "expected an absolute expression or a label"); 7100 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7101 Error(Loc, "expected a 16-bit signed jump offset"); 7102 } 7103 7104 return MatchOperand_Success; 7105 } 7106 7107 //===----------------------------------------------------------------------===// 7108 // Boolean holding registers 7109 //===----------------------------------------------------------------------===// 7110 7111 OperandMatchResultTy 7112 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7113 return parseReg(Operands); 7114 } 7115 7116 //===----------------------------------------------------------------------===// 7117 // mubuf 7118 //===----------------------------------------------------------------------===// 7119 7120 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7121 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7122 } 7123 7124 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7125 const OperandVector &Operands, 7126 bool IsAtomic, 7127 bool IsLds) { 7128 bool IsLdsOpcode = IsLds; 7129 bool HasLdsModifier = false; 7130 OptionalImmIndexMap OptionalIdx; 7131 unsigned FirstOperandIdx = 1; 7132 bool IsAtomicReturn = false; 7133 7134 if (IsAtomic) { 7135 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7136 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7137 if (!Op.isCPol()) 7138 continue; 7139 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7140 break; 7141 } 7142 7143 if (!IsAtomicReturn) { 7144 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7145 if (NewOpc != -1) 7146 Inst.setOpcode(NewOpc); 7147 } 7148 7149 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7150 SIInstrFlags::IsAtomicRet; 7151 } 7152 7153 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7154 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7155 7156 // Add the register arguments 7157 if (Op.isReg()) { 7158 Op.addRegOperands(Inst, 1); 7159 // Insert a tied src for atomic return dst. 7160 // This cannot be postponed as subsequent calls to 7161 // addImmOperands rely on correct number of MC operands. 7162 if (IsAtomicReturn && i == FirstOperandIdx) 7163 Op.addRegOperands(Inst, 1); 7164 continue; 7165 } 7166 7167 // Handle the case where soffset is an immediate 7168 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7169 Op.addImmOperands(Inst, 1); 7170 continue; 7171 } 7172 7173 HasLdsModifier |= Op.isLDS(); 7174 7175 // Handle tokens like 'offen' which are sometimes hard-coded into the 7176 // asm string. There are no MCInst operands for these. 7177 if (Op.isToken()) { 7178 continue; 7179 } 7180 assert(Op.isImm()); 7181 7182 // Handle optional arguments 7183 OptionalIdx[Op.getImmTy()] = i; 7184 } 7185 7186 // This is a workaround for an llvm quirk which may result in an 7187 // incorrect instruction selection. Lds and non-lds versions of 7188 // MUBUF instructions are identical except that lds versions 7189 // have mandatory 'lds' modifier. However this modifier follows 7190 // optional modifiers and llvm asm matcher regards this 'lds' 7191 // modifier as an optional one. As a result, an lds version 7192 // of opcode may be selected even if it has no 'lds' modifier. 7193 if (IsLdsOpcode && !HasLdsModifier) { 7194 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7195 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7196 Inst.setOpcode(NoLdsOpcode); 7197 IsLdsOpcode = false; 7198 } 7199 } 7200 7201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7203 7204 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7206 } 7207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7208 } 7209 7210 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7211 OptionalImmIndexMap OptionalIdx; 7212 7213 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7214 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7215 7216 // Add the register arguments 7217 if (Op.isReg()) { 7218 Op.addRegOperands(Inst, 1); 7219 continue; 7220 } 7221 7222 // Handle the case where soffset is an immediate 7223 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7224 Op.addImmOperands(Inst, 1); 7225 continue; 7226 } 7227 7228 // Handle tokens like 'offen' which are sometimes hard-coded into the 7229 // asm string. There are no MCInst operands for these. 7230 if (Op.isToken()) { 7231 continue; 7232 } 7233 assert(Op.isImm()); 7234 7235 // Handle optional arguments 7236 OptionalIdx[Op.getImmTy()] = i; 7237 } 7238 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7240 AMDGPUOperand::ImmTyOffset); 7241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7245 } 7246 7247 //===----------------------------------------------------------------------===// 7248 // mimg 7249 //===----------------------------------------------------------------------===// 7250 7251 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7252 bool IsAtomic) { 7253 unsigned I = 1; 7254 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7255 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7256 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7257 } 7258 7259 if (IsAtomic) { 7260 // Add src, same as dst 7261 assert(Desc.getNumDefs() == 1); 7262 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7263 } 7264 7265 OptionalImmIndexMap OptionalIdx; 7266 7267 for (unsigned E = Operands.size(); I != E; ++I) { 7268 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7269 7270 // Add the register arguments 7271 if (Op.isReg()) { 7272 Op.addRegOperands(Inst, 1); 7273 } else if (Op.isImmModifier()) { 7274 OptionalIdx[Op.getImmTy()] = I; 7275 } else if (!Op.isToken()) { 7276 llvm_unreachable("unexpected operand type"); 7277 } 7278 } 7279 7280 bool IsGFX10Plus = isGFX10Plus(); 7281 7282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7283 if (IsGFX10Plus) 7284 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7288 if (IsGFX10Plus) 7289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7290 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7293 if (!IsGFX10Plus) 7294 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7296 } 7297 7298 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7299 cvtMIMG(Inst, Operands, true); 7300 } 7301 7302 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7303 OptionalImmIndexMap OptionalIdx; 7304 bool IsAtomicReturn = false; 7305 7306 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7307 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7308 if (!Op.isCPol()) 7309 continue; 7310 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7311 break; 7312 } 7313 7314 if (!IsAtomicReturn) { 7315 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7316 if (NewOpc != -1) 7317 Inst.setOpcode(NewOpc); 7318 } 7319 7320 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7321 SIInstrFlags::IsAtomicRet; 7322 7323 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7324 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7325 7326 // Add the register arguments 7327 if (Op.isReg()) { 7328 Op.addRegOperands(Inst, 1); 7329 if (IsAtomicReturn && i == 1) 7330 Op.addRegOperands(Inst, 1); 7331 continue; 7332 } 7333 7334 // Handle the case where soffset is an immediate 7335 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7336 Op.addImmOperands(Inst, 1); 7337 continue; 7338 } 7339 7340 // Handle tokens like 'offen' which are sometimes hard-coded into the 7341 // asm string. There are no MCInst operands for these. 7342 if (Op.isToken()) { 7343 continue; 7344 } 7345 assert(Op.isImm()); 7346 7347 // Handle optional arguments 7348 OptionalIdx[Op.getImmTy()] = i; 7349 } 7350 7351 if ((int)Inst.getNumOperands() <= 7352 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7355 } 7356 7357 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7358 const OperandVector &Operands) { 7359 for (unsigned I = 1; I < Operands.size(); ++I) { 7360 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7361 if (Operand.isReg()) 7362 Operand.addRegOperands(Inst, 1); 7363 } 7364 7365 Inst.addOperand(MCOperand::createImm(1)); // a16 7366 } 7367 7368 //===----------------------------------------------------------------------===// 7369 // smrd 7370 //===----------------------------------------------------------------------===// 7371 7372 bool AMDGPUOperand::isSMRDOffset8() const { 7373 return isImm() && isUInt<8>(getImm()); 7374 } 7375 7376 bool AMDGPUOperand::isSMEMOffset() const { 7377 return isImm(); // Offset range is checked later by validator. 7378 } 7379 7380 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7381 // 32-bit literals are only supported on CI and we only want to use them 7382 // when the offset is > 8-bits. 7383 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7384 } 7385 7386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7387 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7388 } 7389 7390 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7391 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7392 } 7393 7394 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7395 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7396 } 7397 7398 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7399 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7400 } 7401 7402 //===----------------------------------------------------------------------===// 7403 // vop3 7404 //===----------------------------------------------------------------------===// 7405 7406 static bool ConvertOmodMul(int64_t &Mul) { 7407 if (Mul != 1 && Mul != 2 && Mul != 4) 7408 return false; 7409 7410 Mul >>= 1; 7411 return true; 7412 } 7413 7414 static bool ConvertOmodDiv(int64_t &Div) { 7415 if (Div == 1) { 7416 Div = 0; 7417 return true; 7418 } 7419 7420 if (Div == 2) { 7421 Div = 3; 7422 return true; 7423 } 7424 7425 return false; 7426 } 7427 7428 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7429 // This is intentional and ensures compatibility with sp3. 7430 // See bug 35397 for details. 7431 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7432 if (BoundCtrl == 0 || BoundCtrl == 1) { 7433 BoundCtrl = 1; 7434 return true; 7435 } 7436 return false; 7437 } 7438 7439 // Note: the order in this table matches the order of operands in AsmString. 7440 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7441 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7442 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7443 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7444 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7445 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7446 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7447 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7448 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7449 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7450 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7451 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7452 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7453 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7454 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7455 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7456 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7457 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7458 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7459 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7460 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7461 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7462 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7463 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7464 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7465 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7466 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7467 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7468 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7469 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7470 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7471 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7472 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7473 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7474 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7475 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7476 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7477 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7478 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7479 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7480 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7481 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7482 }; 7483 7484 void AMDGPUAsmParser::onBeginOfFile() { 7485 if (!getParser().getStreamer().getTargetStreamer() || 7486 getSTI().getTargetTriple().getArch() == Triple::r600) 7487 return; 7488 7489 if (!getTargetStreamer().getTargetID()) 7490 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7491 7492 if (isHsaAbiVersion3AndAbove(&getSTI())) 7493 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7494 } 7495 7496 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7497 7498 OperandMatchResultTy res = parseOptionalOpr(Operands); 7499 7500 // This is a hack to enable hardcoded mandatory operands which follow 7501 // optional operands. 7502 // 7503 // Current design assumes that all operands after the first optional operand 7504 // are also optional. However implementation of some instructions violates 7505 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7506 // 7507 // To alleviate this problem, we have to (implicitly) parse extra operands 7508 // to make sure autogenerated parser of custom operands never hit hardcoded 7509 // mandatory operands. 7510 7511 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7512 if (res != MatchOperand_Success || 7513 isToken(AsmToken::EndOfStatement)) 7514 break; 7515 7516 trySkipToken(AsmToken::Comma); 7517 res = parseOptionalOpr(Operands); 7518 } 7519 7520 return res; 7521 } 7522 7523 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7524 OperandMatchResultTy res; 7525 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7526 // try to parse any optional operand here 7527 if (Op.IsBit) { 7528 res = parseNamedBit(Op.Name, Operands, Op.Type); 7529 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7530 res = parseOModOperand(Operands); 7531 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7532 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7533 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7534 res = parseSDWASel(Operands, Op.Name, Op.Type); 7535 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7536 res = parseSDWADstUnused(Operands); 7537 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7538 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7539 Op.Type == AMDGPUOperand::ImmTyNegLo || 7540 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7541 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7542 Op.ConvertResult); 7543 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7544 res = parseDim(Operands); 7545 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7546 res = parseCPol(Operands); 7547 } else { 7548 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7549 } 7550 if (res != MatchOperand_NoMatch) { 7551 return res; 7552 } 7553 } 7554 return MatchOperand_NoMatch; 7555 } 7556 7557 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7558 StringRef Name = getTokenStr(); 7559 if (Name == "mul") { 7560 return parseIntWithPrefix("mul", Operands, 7561 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7562 } 7563 7564 if (Name == "div") { 7565 return parseIntWithPrefix("div", Operands, 7566 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7567 } 7568 7569 return MatchOperand_NoMatch; 7570 } 7571 7572 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7573 cvtVOP3P(Inst, Operands); 7574 7575 int Opc = Inst.getOpcode(); 7576 7577 int SrcNum; 7578 const int Ops[] = { AMDGPU::OpName::src0, 7579 AMDGPU::OpName::src1, 7580 AMDGPU::OpName::src2 }; 7581 for (SrcNum = 0; 7582 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7583 ++SrcNum); 7584 assert(SrcNum > 0); 7585 7586 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7587 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7588 7589 if ((OpSel & (1 << SrcNum)) != 0) { 7590 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7591 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7592 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7593 } 7594 } 7595 7596 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7597 // 1. This operand is input modifiers 7598 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7599 // 2. This is not last operand 7600 && Desc.NumOperands > (OpNum + 1) 7601 // 3. Next operand is register class 7602 && Desc.OpInfo[OpNum + 1].RegClass != -1 7603 // 4. Next register is not tied to any other operand 7604 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7605 } 7606 7607 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7608 { 7609 OptionalImmIndexMap OptionalIdx; 7610 unsigned Opc = Inst.getOpcode(); 7611 7612 unsigned I = 1; 7613 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7614 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7615 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7616 } 7617 7618 for (unsigned E = Operands.size(); I != E; ++I) { 7619 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7620 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7621 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7622 } else if (Op.isInterpSlot() || 7623 Op.isInterpAttr() || 7624 Op.isAttrChan()) { 7625 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7626 } else if (Op.isImmModifier()) { 7627 OptionalIdx[Op.getImmTy()] = I; 7628 } else { 7629 llvm_unreachable("unhandled operand type"); 7630 } 7631 } 7632 7633 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7635 } 7636 7637 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7639 } 7640 7641 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7643 } 7644 } 7645 7646 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7647 OptionalImmIndexMap &OptionalIdx) { 7648 unsigned Opc = Inst.getOpcode(); 7649 7650 unsigned I = 1; 7651 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7652 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7653 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7654 } 7655 7656 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7657 // This instruction has src modifiers 7658 for (unsigned E = Operands.size(); I != E; ++I) { 7659 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7660 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7661 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7662 } else if (Op.isImmModifier()) { 7663 OptionalIdx[Op.getImmTy()] = I; 7664 } else if (Op.isRegOrImm()) { 7665 Op.addRegOrImmOperands(Inst, 1); 7666 } else { 7667 llvm_unreachable("unhandled operand type"); 7668 } 7669 } 7670 } else { 7671 // No src modifiers 7672 for (unsigned E = Operands.size(); I != E; ++I) { 7673 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7674 if (Op.isMod()) { 7675 OptionalIdx[Op.getImmTy()] = I; 7676 } else { 7677 Op.addRegOrImmOperands(Inst, 1); 7678 } 7679 } 7680 } 7681 7682 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7684 } 7685 7686 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7688 } 7689 7690 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7691 // it has src2 register operand that is tied to dst operand 7692 // we don't allow modifiers for this operand in assembler so src2_modifiers 7693 // should be 0. 7694 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7695 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7696 Opc == AMDGPU::V_MAC_F32_e64_vi || 7697 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7698 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7699 Opc == AMDGPU::V_MAC_F16_e64_vi || 7700 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7701 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7702 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7703 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7704 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7705 auto it = Inst.begin(); 7706 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7707 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7708 ++it; 7709 // Copy the operand to ensure it's not invalidated when Inst grows. 7710 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7711 } 7712 } 7713 7714 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7715 OptionalImmIndexMap OptionalIdx; 7716 cvtVOP3(Inst, Operands, OptionalIdx); 7717 } 7718 7719 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7720 OptionalImmIndexMap &OptIdx) { 7721 const int Opc = Inst.getOpcode(); 7722 const MCInstrDesc &Desc = MII.get(Opc); 7723 7724 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7725 7726 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7727 assert(!IsPacked); 7728 Inst.addOperand(Inst.getOperand(0)); 7729 } 7730 7731 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7732 // instruction, and then figure out where to actually put the modifiers 7733 7734 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7735 if (OpSelIdx != -1) { 7736 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7737 } 7738 7739 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7740 if (OpSelHiIdx != -1) { 7741 int DefaultVal = IsPacked ? -1 : 0; 7742 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7743 DefaultVal); 7744 } 7745 7746 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7747 if (NegLoIdx != -1) { 7748 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7749 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7750 } 7751 7752 const int Ops[] = { AMDGPU::OpName::src0, 7753 AMDGPU::OpName::src1, 7754 AMDGPU::OpName::src2 }; 7755 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7756 AMDGPU::OpName::src1_modifiers, 7757 AMDGPU::OpName::src2_modifiers }; 7758 7759 unsigned OpSel = 0; 7760 unsigned OpSelHi = 0; 7761 unsigned NegLo = 0; 7762 unsigned NegHi = 0; 7763 7764 if (OpSelIdx != -1) 7765 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7766 7767 if (OpSelHiIdx != -1) 7768 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7769 7770 if (NegLoIdx != -1) { 7771 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7772 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7773 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7774 } 7775 7776 for (int J = 0; J < 3; ++J) { 7777 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7778 if (OpIdx == -1) 7779 break; 7780 7781 uint32_t ModVal = 0; 7782 7783 if ((OpSel & (1 << J)) != 0) 7784 ModVal |= SISrcMods::OP_SEL_0; 7785 7786 if ((OpSelHi & (1 << J)) != 0) 7787 ModVal |= SISrcMods::OP_SEL_1; 7788 7789 if ((NegLo & (1 << J)) != 0) 7790 ModVal |= SISrcMods::NEG; 7791 7792 if ((NegHi & (1 << J)) != 0) 7793 ModVal |= SISrcMods::NEG_HI; 7794 7795 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7796 7797 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7798 } 7799 } 7800 7801 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7802 OptionalImmIndexMap OptIdx; 7803 cvtVOP3(Inst, Operands, OptIdx); 7804 cvtVOP3P(Inst, Operands, OptIdx); 7805 } 7806 7807 //===----------------------------------------------------------------------===// 7808 // dpp 7809 //===----------------------------------------------------------------------===// 7810 7811 bool AMDGPUOperand::isDPP8() const { 7812 return isImmTy(ImmTyDPP8); 7813 } 7814 7815 bool AMDGPUOperand::isDPPCtrl() const { 7816 using namespace AMDGPU::DPP; 7817 7818 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7819 if (result) { 7820 int64_t Imm = getImm(); 7821 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7822 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7823 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7824 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7825 (Imm == DppCtrl::WAVE_SHL1) || 7826 (Imm == DppCtrl::WAVE_ROL1) || 7827 (Imm == DppCtrl::WAVE_SHR1) || 7828 (Imm == DppCtrl::WAVE_ROR1) || 7829 (Imm == DppCtrl::ROW_MIRROR) || 7830 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7831 (Imm == DppCtrl::BCAST15) || 7832 (Imm == DppCtrl::BCAST31) || 7833 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7834 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7835 } 7836 return false; 7837 } 7838 7839 //===----------------------------------------------------------------------===// 7840 // mAI 7841 //===----------------------------------------------------------------------===// 7842 7843 bool AMDGPUOperand::isBLGP() const { 7844 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7845 } 7846 7847 bool AMDGPUOperand::isCBSZ() const { 7848 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7849 } 7850 7851 bool AMDGPUOperand::isABID() const { 7852 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7853 } 7854 7855 bool AMDGPUOperand::isS16Imm() const { 7856 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7857 } 7858 7859 bool AMDGPUOperand::isU16Imm() const { 7860 return isImm() && isUInt<16>(getImm()); 7861 } 7862 7863 //===----------------------------------------------------------------------===// 7864 // dim 7865 //===----------------------------------------------------------------------===// 7866 7867 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7868 // We want to allow "dim:1D" etc., 7869 // but the initial 1 is tokenized as an integer. 7870 std::string Token; 7871 if (isToken(AsmToken::Integer)) { 7872 SMLoc Loc = getToken().getEndLoc(); 7873 Token = std::string(getTokenStr()); 7874 lex(); 7875 if (getLoc() != Loc) 7876 return false; 7877 } 7878 7879 StringRef Suffix; 7880 if (!parseId(Suffix)) 7881 return false; 7882 Token += Suffix; 7883 7884 StringRef DimId = Token; 7885 if (DimId.startswith("SQ_RSRC_IMG_")) 7886 DimId = DimId.drop_front(12); 7887 7888 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7889 if (!DimInfo) 7890 return false; 7891 7892 Encoding = DimInfo->Encoding; 7893 return true; 7894 } 7895 7896 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7897 if (!isGFX10Plus()) 7898 return MatchOperand_NoMatch; 7899 7900 SMLoc S = getLoc(); 7901 7902 if (!trySkipId("dim", AsmToken::Colon)) 7903 return MatchOperand_NoMatch; 7904 7905 unsigned Encoding; 7906 SMLoc Loc = getLoc(); 7907 if (!parseDimId(Encoding)) { 7908 Error(Loc, "invalid dim value"); 7909 return MatchOperand_ParseFail; 7910 } 7911 7912 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7913 AMDGPUOperand::ImmTyDim)); 7914 return MatchOperand_Success; 7915 } 7916 7917 //===----------------------------------------------------------------------===// 7918 // dpp 7919 //===----------------------------------------------------------------------===// 7920 7921 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7922 SMLoc S = getLoc(); 7923 7924 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7925 return MatchOperand_NoMatch; 7926 7927 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7928 7929 int64_t Sels[8]; 7930 7931 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7932 return MatchOperand_ParseFail; 7933 7934 for (size_t i = 0; i < 8; ++i) { 7935 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7936 return MatchOperand_ParseFail; 7937 7938 SMLoc Loc = getLoc(); 7939 if (getParser().parseAbsoluteExpression(Sels[i])) 7940 return MatchOperand_ParseFail; 7941 if (0 > Sels[i] || 7 < Sels[i]) { 7942 Error(Loc, "expected a 3-bit value"); 7943 return MatchOperand_ParseFail; 7944 } 7945 } 7946 7947 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7948 return MatchOperand_ParseFail; 7949 7950 unsigned DPP8 = 0; 7951 for (size_t i = 0; i < 8; ++i) 7952 DPP8 |= (Sels[i] << (i * 3)); 7953 7954 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7955 return MatchOperand_Success; 7956 } 7957 7958 bool 7959 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7960 const OperandVector &Operands) { 7961 if (Ctrl == "row_newbcast") 7962 return isGFX90A(); 7963 7964 if (Ctrl == "row_share" || 7965 Ctrl == "row_xmask") 7966 return isGFX10Plus(); 7967 7968 if (Ctrl == "wave_shl" || 7969 Ctrl == "wave_shr" || 7970 Ctrl == "wave_rol" || 7971 Ctrl == "wave_ror" || 7972 Ctrl == "row_bcast") 7973 return isVI() || isGFX9(); 7974 7975 return Ctrl == "row_mirror" || 7976 Ctrl == "row_half_mirror" || 7977 Ctrl == "quad_perm" || 7978 Ctrl == "row_shl" || 7979 Ctrl == "row_shr" || 7980 Ctrl == "row_ror"; 7981 } 7982 7983 int64_t 7984 AMDGPUAsmParser::parseDPPCtrlPerm() { 7985 // quad_perm:[%d,%d,%d,%d] 7986 7987 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7988 return -1; 7989 7990 int64_t Val = 0; 7991 for (int i = 0; i < 4; ++i) { 7992 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7993 return -1; 7994 7995 int64_t Temp; 7996 SMLoc Loc = getLoc(); 7997 if (getParser().parseAbsoluteExpression(Temp)) 7998 return -1; 7999 if (Temp < 0 || Temp > 3) { 8000 Error(Loc, "expected a 2-bit value"); 8001 return -1; 8002 } 8003 8004 Val += (Temp << i * 2); 8005 } 8006 8007 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8008 return -1; 8009 8010 return Val; 8011 } 8012 8013 int64_t 8014 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8015 using namespace AMDGPU::DPP; 8016 8017 // sel:%d 8018 8019 int64_t Val; 8020 SMLoc Loc = getLoc(); 8021 8022 if (getParser().parseAbsoluteExpression(Val)) 8023 return -1; 8024 8025 struct DppCtrlCheck { 8026 int64_t Ctrl; 8027 int Lo; 8028 int Hi; 8029 }; 8030 8031 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8032 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8033 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8034 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8035 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8036 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8037 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8038 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8039 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8040 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8041 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8042 .Default({-1, 0, 0}); 8043 8044 bool Valid; 8045 if (Check.Ctrl == -1) { 8046 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8047 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8048 } else { 8049 Valid = Check.Lo <= Val && Val <= Check.Hi; 8050 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8051 } 8052 8053 if (!Valid) { 8054 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8055 return -1; 8056 } 8057 8058 return Val; 8059 } 8060 8061 OperandMatchResultTy 8062 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8063 using namespace AMDGPU::DPP; 8064 8065 if (!isToken(AsmToken::Identifier) || 8066 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8067 return MatchOperand_NoMatch; 8068 8069 SMLoc S = getLoc(); 8070 int64_t Val = -1; 8071 StringRef Ctrl; 8072 8073 parseId(Ctrl); 8074 8075 if (Ctrl == "row_mirror") { 8076 Val = DppCtrl::ROW_MIRROR; 8077 } else if (Ctrl == "row_half_mirror") { 8078 Val = DppCtrl::ROW_HALF_MIRROR; 8079 } else { 8080 if (skipToken(AsmToken::Colon, "expected a colon")) { 8081 if (Ctrl == "quad_perm") { 8082 Val = parseDPPCtrlPerm(); 8083 } else { 8084 Val = parseDPPCtrlSel(Ctrl); 8085 } 8086 } 8087 } 8088 8089 if (Val == -1) 8090 return MatchOperand_ParseFail; 8091 8092 Operands.push_back( 8093 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8094 return MatchOperand_Success; 8095 } 8096 8097 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8098 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8099 } 8100 8101 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8102 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8103 } 8104 8105 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8106 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8107 } 8108 8109 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8110 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8111 } 8112 8113 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8114 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8115 } 8116 8117 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8118 OptionalImmIndexMap OptionalIdx; 8119 8120 unsigned Opc = Inst.getOpcode(); 8121 bool HasModifiers = 8122 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8123 unsigned I = 1; 8124 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8125 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8126 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8127 } 8128 8129 int Fi = 0; 8130 for (unsigned E = Operands.size(); I != E; ++I) { 8131 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8132 MCOI::TIED_TO); 8133 if (TiedTo != -1) { 8134 assert((unsigned)TiedTo < Inst.getNumOperands()); 8135 // handle tied old or src2 for MAC instructions 8136 Inst.addOperand(Inst.getOperand(TiedTo)); 8137 } 8138 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8139 // Add the register arguments 8140 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8141 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8142 // Skip it. 8143 continue; 8144 } 8145 8146 if (IsDPP8) { 8147 if (Op.isDPP8()) { 8148 Op.addImmOperands(Inst, 1); 8149 } else if (HasModifiers && 8150 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8151 Op.addRegWithFPInputModsOperands(Inst, 2); 8152 } else if (Op.isFI()) { 8153 Fi = Op.getImm(); 8154 } else if (Op.isReg()) { 8155 Op.addRegOperands(Inst, 1); 8156 } else { 8157 llvm_unreachable("Invalid operand type"); 8158 } 8159 } else { 8160 if (HasModifiers && 8161 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8162 Op.addRegWithFPInputModsOperands(Inst, 2); 8163 } else if (Op.isReg()) { 8164 Op.addRegOperands(Inst, 1); 8165 } else if (Op.isDPPCtrl()) { 8166 Op.addImmOperands(Inst, 1); 8167 } else if (Op.isImm()) { 8168 // Handle optional arguments 8169 OptionalIdx[Op.getImmTy()] = I; 8170 } else { 8171 llvm_unreachable("Invalid operand type"); 8172 } 8173 } 8174 } 8175 8176 if (IsDPP8) { 8177 using namespace llvm::AMDGPU::DPP; 8178 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8179 } else { 8180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8183 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8185 } 8186 } 8187 } 8188 8189 //===----------------------------------------------------------------------===// 8190 // sdwa 8191 //===----------------------------------------------------------------------===// 8192 8193 OperandMatchResultTy 8194 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8195 AMDGPUOperand::ImmTy Type) { 8196 using namespace llvm::AMDGPU::SDWA; 8197 8198 SMLoc S = getLoc(); 8199 StringRef Value; 8200 OperandMatchResultTy res; 8201 8202 SMLoc StringLoc; 8203 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8204 if (res != MatchOperand_Success) { 8205 return res; 8206 } 8207 8208 int64_t Int; 8209 Int = StringSwitch<int64_t>(Value) 8210 .Case("BYTE_0", SdwaSel::BYTE_0) 8211 .Case("BYTE_1", SdwaSel::BYTE_1) 8212 .Case("BYTE_2", SdwaSel::BYTE_2) 8213 .Case("BYTE_3", SdwaSel::BYTE_3) 8214 .Case("WORD_0", SdwaSel::WORD_0) 8215 .Case("WORD_1", SdwaSel::WORD_1) 8216 .Case("DWORD", SdwaSel::DWORD) 8217 .Default(0xffffffff); 8218 8219 if (Int == 0xffffffff) { 8220 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8221 return MatchOperand_ParseFail; 8222 } 8223 8224 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8225 return MatchOperand_Success; 8226 } 8227 8228 OperandMatchResultTy 8229 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8230 using namespace llvm::AMDGPU::SDWA; 8231 8232 SMLoc S = getLoc(); 8233 StringRef Value; 8234 OperandMatchResultTy res; 8235 8236 SMLoc StringLoc; 8237 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8238 if (res != MatchOperand_Success) { 8239 return res; 8240 } 8241 8242 int64_t Int; 8243 Int = StringSwitch<int64_t>(Value) 8244 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8245 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8246 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8247 .Default(0xffffffff); 8248 8249 if (Int == 0xffffffff) { 8250 Error(StringLoc, "invalid dst_unused value"); 8251 return MatchOperand_ParseFail; 8252 } 8253 8254 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8255 return MatchOperand_Success; 8256 } 8257 8258 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8259 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8260 } 8261 8262 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8263 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8264 } 8265 8266 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8267 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8268 } 8269 8270 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8271 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8272 } 8273 8274 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8275 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8276 } 8277 8278 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8279 uint64_t BasicInstType, 8280 bool SkipDstVcc, 8281 bool SkipSrcVcc) { 8282 using namespace llvm::AMDGPU::SDWA; 8283 8284 OptionalImmIndexMap OptionalIdx; 8285 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8286 bool SkippedVcc = false; 8287 8288 unsigned I = 1; 8289 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8290 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8291 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8292 } 8293 8294 for (unsigned E = Operands.size(); I != E; ++I) { 8295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8296 if (SkipVcc && !SkippedVcc && Op.isReg() && 8297 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8298 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8299 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8300 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8301 // Skip VCC only if we didn't skip it on previous iteration. 8302 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8303 if (BasicInstType == SIInstrFlags::VOP2 && 8304 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8305 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8306 SkippedVcc = true; 8307 continue; 8308 } else if (BasicInstType == SIInstrFlags::VOPC && 8309 Inst.getNumOperands() == 0) { 8310 SkippedVcc = true; 8311 continue; 8312 } 8313 } 8314 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8315 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8316 } else if (Op.isImm()) { 8317 // Handle optional arguments 8318 OptionalIdx[Op.getImmTy()] = I; 8319 } else { 8320 llvm_unreachable("Invalid operand type"); 8321 } 8322 SkippedVcc = false; 8323 } 8324 8325 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8326 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8327 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8328 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8329 switch (BasicInstType) { 8330 case SIInstrFlags::VOP1: 8331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8332 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8334 } 8335 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8338 break; 8339 8340 case SIInstrFlags::VOP2: 8341 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8342 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8344 } 8345 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8346 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8347 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8349 break; 8350 8351 case SIInstrFlags::VOPC: 8352 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8355 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8356 break; 8357 8358 default: 8359 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8360 } 8361 } 8362 8363 // special case v_mac_{f16, f32}: 8364 // it has src2 register operand that is tied to dst operand 8365 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8366 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8367 auto it = Inst.begin(); 8368 std::advance( 8369 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8370 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8371 } 8372 } 8373 8374 //===----------------------------------------------------------------------===// 8375 // mAI 8376 //===----------------------------------------------------------------------===// 8377 8378 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8379 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8380 } 8381 8382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8383 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8384 } 8385 8386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8387 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8388 } 8389 8390 /// Force static initialization. 8391 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8392 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8393 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8394 } 8395 8396 #define GET_REGISTER_MATCHER 8397 #define GET_MATCHER_IMPLEMENTATION 8398 #define GET_MNEMONIC_SPELL_CHECKER 8399 #define GET_MNEMONIC_CHECKER 8400 #include "AMDGPUGenAsmMatcher.inc" 8401 8402 // This function should be defined after auto-generated include so that we have 8403 // MatchClassKind enum defined 8404 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8405 unsigned Kind) { 8406 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8407 // But MatchInstructionImpl() expects to meet token and fails to validate 8408 // operand. This method checks if we are given immediate operand but expect to 8409 // get corresponding token. 8410 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8411 switch (Kind) { 8412 case MCK_addr64: 8413 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8414 case MCK_gds: 8415 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8416 case MCK_lds: 8417 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8418 case MCK_idxen: 8419 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8420 case MCK_offen: 8421 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8422 case MCK_SSrcB32: 8423 // When operands have expression values, they will return true for isToken, 8424 // because it is not possible to distinguish between a token and an 8425 // expression at parse time. MatchInstructionImpl() will always try to 8426 // match an operand as a token, when isToken returns true, and when the 8427 // name of the expression is not a valid token, the match will fail, 8428 // so we need to handle it here. 8429 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8430 case MCK_SSrcF32: 8431 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8432 case MCK_SoppBrTarget: 8433 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8434 case MCK_VReg32OrOff: 8435 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8436 case MCK_InterpSlot: 8437 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8438 case MCK_Attr: 8439 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8440 case MCK_AttrChan: 8441 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8442 case MCK_ImmSMEMOffset: 8443 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8444 case MCK_SReg_64: 8445 case MCK_SReg_64_XEXEC: 8446 // Null is defined as a 32-bit register but 8447 // it should also be enabled with 64-bit operands. 8448 // The following code enables it for SReg_64 operands 8449 // used as source and destination. Remaining source 8450 // operands are handled in isInlinableImm. 8451 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8452 default: 8453 return Match_InvalidOperand; 8454 } 8455 } 8456 8457 //===----------------------------------------------------------------------===// 8458 // endpgm 8459 //===----------------------------------------------------------------------===// 8460 8461 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8462 SMLoc S = getLoc(); 8463 int64_t Imm = 0; 8464 8465 if (!parseExpr(Imm)) { 8466 // The operand is optional, if not present default to 0 8467 Imm = 0; 8468 } 8469 8470 if (!isUInt<16>(Imm)) { 8471 Error(S, "expected a 16-bit value"); 8472 return MatchOperand_ParseFail; 8473 } 8474 8475 Operands.push_back( 8476 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8477 return MatchOperand_Success; 8478 } 8479 8480 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8481