1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isHwreg() const; 823 bool isSendMsg() const; 824 bool isSwizzle() const; 825 bool isSMRDOffset8() const; 826 bool isSMEMOffset() const; 827 bool isSMRDLiteralOffset() const; 828 bool isDPP8() const; 829 bool isDPPCtrl() const; 830 bool isBLGP() const; 831 bool isCBSZ() const; 832 bool isABID() const; 833 bool isGPRIdxMode() const; 834 bool isS16Imm() const; 835 bool isU16Imm() const; 836 bool isEndpgm() const; 837 838 StringRef getExpressionAsToken() const { 839 assert(isExpr()); 840 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 841 return S->getSymbol().getName(); 842 } 843 844 StringRef getToken() const { 845 assert(isToken()); 846 847 if (Kind == Expression) 848 return getExpressionAsToken(); 849 850 return StringRef(Tok.Data, Tok.Length); 851 } 852 853 int64_t getImm() const { 854 assert(isImm()); 855 return Imm.Val; 856 } 857 858 void setImm(int64_t Val) { 859 assert(isImm()); 860 Imm.Val = Val; 861 } 862 863 ImmTy getImmTy() const { 864 assert(isImm()); 865 return Imm.Type; 866 } 867 868 unsigned getReg() const override { 869 assert(isRegKind()); 870 return Reg.RegNo; 871 } 872 873 SMLoc getStartLoc() const override { 874 return StartLoc; 875 } 876 877 SMLoc getEndLoc() const override { 878 return EndLoc; 879 } 880 881 SMRange getLocRange() const { 882 return SMRange(StartLoc, EndLoc); 883 } 884 885 Modifiers getModifiers() const { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 return isRegKind() ? Reg.Mods : Imm.Mods; 888 } 889 890 void setModifiers(Modifiers Mods) { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 if (isRegKind()) 893 Reg.Mods = Mods; 894 else 895 Imm.Mods = Mods; 896 } 897 898 bool hasModifiers() const { 899 return getModifiers().hasModifiers(); 900 } 901 902 bool hasFPModifiers() const { 903 return getModifiers().hasFPModifiers(); 904 } 905 906 bool hasIntModifiers() const { 907 return getModifiers().hasIntModifiers(); 908 } 909 910 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 911 912 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 913 914 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 915 916 template <unsigned Bitwidth> 917 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 918 919 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<16>(Inst, N); 921 } 922 923 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 924 addKImmFPOperands<32>(Inst, N); 925 } 926 927 void addRegOperands(MCInst &Inst, unsigned N) const; 928 929 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 930 addRegOperands(Inst, N); 931 } 932 933 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 934 if (isRegKind()) 935 addRegOperands(Inst, N); 936 else if (isExpr()) 937 Inst.addOperand(MCOperand::createExpr(Expr)); 938 else 939 addImmOperands(Inst, N); 940 } 941 942 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 943 Modifiers Mods = getModifiers(); 944 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 945 if (isRegKind()) { 946 addRegOperands(Inst, N); 947 } else { 948 addImmOperands(Inst, N, false); 949 } 950 } 951 952 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasIntModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasFPModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 963 Modifiers Mods = getModifiers(); 964 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 965 assert(isRegKind()); 966 addRegOperands(Inst, N); 967 } 968 969 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasIntModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasFPModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 980 if (isImm()) 981 addImmOperands(Inst, N); 982 else { 983 assert(isExpr()); 984 Inst.addOperand(MCOperand::createExpr(Expr)); 985 } 986 } 987 988 static void printImmTy(raw_ostream& OS, ImmTy Type) { 989 switch (Type) { 990 case ImmTyNone: OS << "None"; break; 991 case ImmTyGDS: OS << "GDS"; break; 992 case ImmTyLDS: OS << "LDS"; break; 993 case ImmTyOffen: OS << "Offen"; break; 994 case ImmTyIdxen: OS << "Idxen"; break; 995 case ImmTyAddr64: OS << "Addr64"; break; 996 case ImmTyOffset: OS << "Offset"; break; 997 case ImmTyInstOffset: OS << "InstOffset"; break; 998 case ImmTyOffset0: OS << "Offset0"; break; 999 case ImmTyOffset1: OS << "Offset1"; break; 1000 case ImmTyCPol: OS << "CPol"; break; 1001 case ImmTySWZ: OS << "SWZ"; break; 1002 case ImmTyTFE: OS << "TFE"; break; 1003 case ImmTyD16: OS << "D16"; break; 1004 case ImmTyFORMAT: OS << "FORMAT"; break; 1005 case ImmTyClampSI: OS << "ClampSI"; break; 1006 case ImmTyOModSI: OS << "OModSI"; break; 1007 case ImmTyDPP8: OS << "DPP8"; break; 1008 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1009 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1010 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1011 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1012 case ImmTyDppFi: OS << "FI"; break; 1013 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1014 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1015 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1016 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1017 case ImmTyDMask: OS << "DMask"; break; 1018 case ImmTyDim: OS << "Dim"; break; 1019 case ImmTyUNorm: OS << "UNorm"; break; 1020 case ImmTyDA: OS << "DA"; break; 1021 case ImmTyR128A16: OS << "R128A16"; break; 1022 case ImmTyA16: OS << "A16"; break; 1023 case ImmTyLWE: OS << "LWE"; break; 1024 case ImmTyOff: OS << "Off"; break; 1025 case ImmTyExpTgt: OS << "ExpTgt"; break; 1026 case ImmTyExpCompr: OS << "ExpCompr"; break; 1027 case ImmTyExpVM: OS << "ExpVM"; break; 1028 case ImmTyHwreg: OS << "Hwreg"; break; 1029 case ImmTySendMsg: OS << "SendMsg"; break; 1030 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1031 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1032 case ImmTyAttrChan: OS << "AttrChan"; break; 1033 case ImmTyOpSel: OS << "OpSel"; break; 1034 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1035 case ImmTyNegLo: OS << "NegLo"; break; 1036 case ImmTyNegHi: OS << "NegHi"; break; 1037 case ImmTySwizzle: OS << "Swizzle"; break; 1038 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1039 case ImmTyHigh: OS << "High"; break; 1040 case ImmTyBLGP: OS << "BLGP"; break; 1041 case ImmTyCBSZ: OS << "CBSZ"; break; 1042 case ImmTyABID: OS << "ABID"; break; 1043 case ImmTyEndpgm: OS << "Endpgm"; break; 1044 } 1045 } 1046 1047 void print(raw_ostream &OS) const override { 1048 switch (Kind) { 1049 case Register: 1050 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1051 break; 1052 case Immediate: 1053 OS << '<' << getImm(); 1054 if (getImmTy() != ImmTyNone) { 1055 OS << " type: "; printImmTy(OS, getImmTy()); 1056 } 1057 OS << " mods: " << Imm.Mods << '>'; 1058 break; 1059 case Token: 1060 OS << '\'' << getToken() << '\''; 1061 break; 1062 case Expression: 1063 OS << "<expr " << *Expr << '>'; 1064 break; 1065 } 1066 } 1067 1068 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1069 int64_t Val, SMLoc Loc, 1070 ImmTy Type = ImmTyNone, 1071 bool IsFPImm = false) { 1072 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1073 Op->Imm.Val = Val; 1074 Op->Imm.IsFPImm = IsFPImm; 1075 Op->Imm.Kind = ImmKindTyNone; 1076 Op->Imm.Type = Type; 1077 Op->Imm.Mods = Modifiers(); 1078 Op->StartLoc = Loc; 1079 Op->EndLoc = Loc; 1080 return Op; 1081 } 1082 1083 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1084 StringRef Str, SMLoc Loc, 1085 bool HasExplicitEncodingSize = true) { 1086 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1087 Res->Tok.Data = Str.data(); 1088 Res->Tok.Length = Str.size(); 1089 Res->StartLoc = Loc; 1090 Res->EndLoc = Loc; 1091 return Res; 1092 } 1093 1094 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1095 unsigned RegNo, SMLoc S, 1096 SMLoc E) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1098 Op->Reg.RegNo = RegNo; 1099 Op->Reg.Mods = Modifiers(); 1100 Op->StartLoc = S; 1101 Op->EndLoc = E; 1102 return Op; 1103 } 1104 1105 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1106 const class MCExpr *Expr, SMLoc S) { 1107 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1108 Op->Expr = Expr; 1109 Op->StartLoc = S; 1110 Op->EndLoc = S; 1111 return Op; 1112 } 1113 }; 1114 1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1116 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1117 return OS; 1118 } 1119 1120 //===----------------------------------------------------------------------===// 1121 // AsmParser 1122 //===----------------------------------------------------------------------===// 1123 1124 // Holds info related to the current kernel, e.g. count of SGPRs used. 1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1126 // .amdgpu_hsa_kernel or at EOF. 1127 class KernelScopeInfo { 1128 int SgprIndexUnusedMin = -1; 1129 int VgprIndexUnusedMin = -1; 1130 int AgprIndexUnusedMin = -1; 1131 MCContext *Ctx = nullptr; 1132 MCSubtargetInfo const *MSTI = nullptr; 1133 1134 void usesSgprAt(int i) { 1135 if (i >= SgprIndexUnusedMin) { 1136 SgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol* const Sym = 1139 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1140 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1141 } 1142 } 1143 } 1144 1145 void usesVgprAt(int i) { 1146 if (i >= VgprIndexUnusedMin) { 1147 VgprIndexUnusedMin = ++i; 1148 if (Ctx) { 1149 MCSymbol* const Sym = 1150 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1152 VgprIndexUnusedMin); 1153 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1154 } 1155 } 1156 } 1157 1158 void usesAgprAt(int i) { 1159 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1160 if (!hasMAIInsts(*MSTI)) 1161 return; 1162 1163 if (i >= AgprIndexUnusedMin) { 1164 AgprIndexUnusedMin = ++i; 1165 if (Ctx) { 1166 MCSymbol* const Sym = 1167 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1168 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1169 1170 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1171 MCSymbol* const vSym = 1172 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1173 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1174 VgprIndexUnusedMin); 1175 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1176 } 1177 } 1178 } 1179 1180 public: 1181 KernelScopeInfo() = default; 1182 1183 void initialize(MCContext &Context) { 1184 Ctx = &Context; 1185 MSTI = Ctx->getSubtargetInfo(); 1186 1187 usesSgprAt(SgprIndexUnusedMin = -1); 1188 usesVgprAt(VgprIndexUnusedMin = -1); 1189 if (hasMAIInsts(*MSTI)) { 1190 usesAgprAt(AgprIndexUnusedMin = -1); 1191 } 1192 } 1193 1194 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1195 unsigned RegWidth) { 1196 switch (RegKind) { 1197 case IS_SGPR: 1198 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1199 break; 1200 case IS_AGPR: 1201 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1202 break; 1203 case IS_VGPR: 1204 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1205 break; 1206 default: 1207 break; 1208 } 1209 } 1210 }; 1211 1212 class AMDGPUAsmParser : public MCTargetAsmParser { 1213 MCAsmParser &Parser; 1214 1215 // Number of extra operands parsed after the first optional operand. 1216 // This may be necessary to skip hardcoded mandatory operands. 1217 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1218 1219 unsigned ForcedEncodingSize = 0; 1220 bool ForcedDPP = false; 1221 bool ForcedSDWA = false; 1222 KernelScopeInfo KernelScope; 1223 unsigned CPolSeen; 1224 1225 /// @name Auto-generated Match Functions 1226 /// { 1227 1228 #define GET_ASSEMBLER_HEADER 1229 #include "AMDGPUGenAsmMatcher.inc" 1230 1231 /// } 1232 1233 private: 1234 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1235 bool OutOfRangeError(SMRange Range); 1236 /// Calculate VGPR/SGPR blocks required for given target, reserved 1237 /// registers, and user-specified NextFreeXGPR values. 1238 /// 1239 /// \param Features [in] Target features, used for bug corrections. 1240 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1241 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1242 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1243 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1244 /// descriptor field, if valid. 1245 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1246 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1247 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1248 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1249 /// \param VGPRBlocks [out] Result VGPR block count. 1250 /// \param SGPRBlocks [out] Result SGPR block count. 1251 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1252 bool FlatScrUsed, bool XNACKUsed, 1253 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1254 SMRange VGPRRange, unsigned NextFreeSGPR, 1255 SMRange SGPRRange, unsigned &VGPRBlocks, 1256 unsigned &SGPRBlocks); 1257 bool ParseDirectiveAMDGCNTarget(); 1258 bool ParseDirectiveAMDHSAKernel(); 1259 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1260 bool ParseDirectiveHSACodeObjectVersion(); 1261 bool ParseDirectiveHSACodeObjectISA(); 1262 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1263 bool ParseDirectiveAMDKernelCodeT(); 1264 // TODO: Possibly make subtargetHasRegister const. 1265 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1266 bool ParseDirectiveAMDGPUHsaKernel(); 1267 1268 bool ParseDirectiveISAVersion(); 1269 bool ParseDirectiveHSAMetadata(); 1270 bool ParseDirectivePALMetadataBegin(); 1271 bool ParseDirectivePALMetadata(); 1272 bool ParseDirectiveAMDGPULDS(); 1273 1274 /// Common code to parse out a block of text (typically YAML) between start and 1275 /// end directives. 1276 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1277 const char *AssemblerDirectiveEnd, 1278 std::string &CollectString); 1279 1280 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1281 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1282 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1283 unsigned &RegNum, unsigned &RegWidth, 1284 bool RestoreOnFailure = false); 1285 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1286 unsigned &RegNum, unsigned &RegWidth, 1287 SmallVectorImpl<AsmToken> &Tokens); 1288 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1289 unsigned &RegWidth, 1290 SmallVectorImpl<AsmToken> &Tokens); 1291 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1292 unsigned &RegWidth, 1293 SmallVectorImpl<AsmToken> &Tokens); 1294 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1295 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1296 bool ParseRegRange(unsigned& Num, unsigned& Width); 1297 unsigned getRegularReg(RegisterKind RegKind, 1298 unsigned RegNum, 1299 unsigned RegWidth, 1300 SMLoc Loc); 1301 1302 bool isRegister(); 1303 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1304 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1305 void initializeGprCountSymbol(RegisterKind RegKind); 1306 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1307 unsigned RegWidth); 1308 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1309 bool IsAtomic, bool IsLds = false); 1310 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1311 bool IsGdsHardcoded); 1312 1313 public: 1314 enum AMDGPUMatchResultTy { 1315 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1316 }; 1317 enum OperandMode { 1318 OperandMode_Default, 1319 OperandMode_NSA, 1320 }; 1321 1322 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1323 1324 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1325 const MCInstrInfo &MII, 1326 const MCTargetOptions &Options) 1327 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1328 MCAsmParserExtension::Initialize(Parser); 1329 1330 if (getFeatureBits().none()) { 1331 // Set default features. 1332 copySTI().ToggleFeature("southern-islands"); 1333 } 1334 1335 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1336 1337 { 1338 // TODO: make those pre-defined variables read-only. 1339 // Currently there is none suitable machinery in the core llvm-mc for this. 1340 // MCSymbol::isRedefinable is intended for another purpose, and 1341 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1342 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1343 MCContext &Ctx = getContext(); 1344 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1345 MCSymbol *Sym = 1346 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1350 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1351 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1352 } else { 1353 MCSymbol *Sym = 1354 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1355 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1356 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1357 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1358 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1359 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1360 } 1361 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1362 initializeGprCountSymbol(IS_VGPR); 1363 initializeGprCountSymbol(IS_SGPR); 1364 } else 1365 KernelScope.initialize(getContext()); 1366 } 1367 } 1368 1369 bool hasMIMG_R128() const { 1370 return AMDGPU::hasMIMG_R128(getSTI()); 1371 } 1372 1373 bool hasPackedD16() const { 1374 return AMDGPU::hasPackedD16(getSTI()); 1375 } 1376 1377 bool hasGFX10A16() const { 1378 return AMDGPU::hasGFX10A16(getSTI()); 1379 } 1380 1381 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1382 1383 bool isSI() const { 1384 return AMDGPU::isSI(getSTI()); 1385 } 1386 1387 bool isCI() const { 1388 return AMDGPU::isCI(getSTI()); 1389 } 1390 1391 bool isVI() const { 1392 return AMDGPU::isVI(getSTI()); 1393 } 1394 1395 bool isGFX9() const { 1396 return AMDGPU::isGFX9(getSTI()); 1397 } 1398 1399 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1400 bool isGFX90A() const { 1401 return AMDGPU::isGFX90A(getSTI()); 1402 } 1403 1404 bool isGFX940() const { 1405 return AMDGPU::isGFX940(getSTI()); 1406 } 1407 1408 bool isGFX9Plus() const { 1409 return AMDGPU::isGFX9Plus(getSTI()); 1410 } 1411 1412 bool isGFX10() const { 1413 return AMDGPU::isGFX10(getSTI()); 1414 } 1415 1416 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1417 1418 bool isGFX10_BEncoding() const { 1419 return AMDGPU::isGFX10_BEncoding(getSTI()); 1420 } 1421 1422 bool hasInv2PiInlineImm() const { 1423 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1424 } 1425 1426 bool hasFlatOffsets() const { 1427 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1428 } 1429 1430 bool hasArchitectedFlatScratch() const { 1431 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1432 } 1433 1434 bool hasSGPR102_SGPR103() const { 1435 return !isVI() && !isGFX9(); 1436 } 1437 1438 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1439 1440 bool hasIntClamp() const { 1441 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1442 } 1443 1444 AMDGPUTargetStreamer &getTargetStreamer() { 1445 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1446 return static_cast<AMDGPUTargetStreamer &>(TS); 1447 } 1448 1449 const MCRegisterInfo *getMRI() const { 1450 // We need this const_cast because for some reason getContext() is not const 1451 // in MCAsmParser. 1452 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1453 } 1454 1455 const MCInstrInfo *getMII() const { 1456 return &MII; 1457 } 1458 1459 const FeatureBitset &getFeatureBits() const { 1460 return getSTI().getFeatureBits(); 1461 } 1462 1463 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1464 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1465 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1466 1467 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1468 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1469 bool isForcedDPP() const { return ForcedDPP; } 1470 bool isForcedSDWA() const { return ForcedSDWA; } 1471 ArrayRef<unsigned> getMatchedVariants() const; 1472 StringRef getMatchedVariantName() const; 1473 1474 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1475 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1476 bool RestoreOnFailure); 1477 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1478 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1479 SMLoc &EndLoc) override; 1480 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1481 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1482 unsigned Kind) override; 1483 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1484 OperandVector &Operands, MCStreamer &Out, 1485 uint64_t &ErrorInfo, 1486 bool MatchingInlineAsm) override; 1487 bool ParseDirective(AsmToken DirectiveID) override; 1488 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1489 OperandMode Mode = OperandMode_Default); 1490 StringRef parseMnemonicSuffix(StringRef Name); 1491 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1492 SMLoc NameLoc, OperandVector &Operands) override; 1493 //bool ProcessInstruction(MCInst &Inst); 1494 1495 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1496 1497 OperandMatchResultTy 1498 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1499 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1500 bool (*ConvertResult)(int64_t &) = nullptr); 1501 1502 OperandMatchResultTy 1503 parseOperandArrayWithPrefix(const char *Prefix, 1504 OperandVector &Operands, 1505 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1506 bool (*ConvertResult)(int64_t&) = nullptr); 1507 1508 OperandMatchResultTy 1509 parseNamedBit(StringRef Name, OperandVector &Operands, 1510 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1511 OperandMatchResultTy parseCPol(OperandVector &Operands); 1512 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1513 StringRef &Value, 1514 SMLoc &StringLoc); 1515 1516 bool isModifier(); 1517 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1521 bool parseSP3NegModifier(); 1522 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1523 OperandMatchResultTy parseReg(OperandVector &Operands); 1524 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1525 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1527 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1529 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1530 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1531 OperandMatchResultTy parseUfmt(int64_t &Format); 1532 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1534 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1535 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1536 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1537 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1538 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1539 1540 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1541 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1542 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1543 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1544 1545 bool parseCnt(int64_t &IntVal); 1546 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1547 1548 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1549 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1550 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1551 1552 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1553 1554 private: 1555 struct OperandInfoTy { 1556 SMLoc Loc; 1557 int64_t Id; 1558 bool IsSymbolic = false; 1559 bool IsDefined = false; 1560 1561 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1562 }; 1563 1564 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1565 bool validateSendMsg(const OperandInfoTy &Msg, 1566 const OperandInfoTy &Op, 1567 const OperandInfoTy &Stream); 1568 1569 bool parseHwregBody(OperandInfoTy &HwReg, 1570 OperandInfoTy &Offset, 1571 OperandInfoTy &Width); 1572 bool validateHwreg(const OperandInfoTy &HwReg, 1573 const OperandInfoTy &Offset, 1574 const OperandInfoTy &Width); 1575 1576 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1577 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1578 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1579 1580 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1581 const OperandVector &Operands) const; 1582 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1583 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1584 SMLoc getLitLoc(const OperandVector &Operands) const; 1585 SMLoc getConstLoc(const OperandVector &Operands) const; 1586 1587 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1588 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1589 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateSOPLiteral(const MCInst &Inst) const; 1591 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1593 bool validateIntClampSupported(const MCInst &Inst); 1594 bool validateMIMGAtomicDMask(const MCInst &Inst); 1595 bool validateMIMGGatherDMask(const MCInst &Inst); 1596 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1597 bool validateMIMGDataSize(const MCInst &Inst); 1598 bool validateMIMGAddrSize(const MCInst &Inst); 1599 bool validateMIMGD16(const MCInst &Inst); 1600 bool validateMIMGDim(const MCInst &Inst); 1601 bool validateMIMGMSAA(const MCInst &Inst); 1602 bool validateOpSel(const MCInst &Inst); 1603 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateVccOperand(unsigned Reg) const; 1605 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1606 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateAGPRLdSt(const MCInst &Inst) const; 1609 bool validateVGPRAlign(const MCInst &Inst) const; 1610 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1612 bool validateDivScale(const MCInst &Inst); 1613 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1614 const SMLoc &IDLoc); 1615 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1616 unsigned getConstantBusLimit(unsigned Opcode) const; 1617 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1618 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1619 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1620 1621 bool isSupportedMnemo(StringRef Mnemo, 1622 const FeatureBitset &FBS); 1623 bool isSupportedMnemo(StringRef Mnemo, 1624 const FeatureBitset &FBS, 1625 ArrayRef<unsigned> Variants); 1626 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1627 1628 bool isId(const StringRef Id) const; 1629 bool isId(const AsmToken &Token, const StringRef Id) const; 1630 bool isToken(const AsmToken::TokenKind Kind) const; 1631 bool trySkipId(const StringRef Id); 1632 bool trySkipId(const StringRef Pref, const StringRef Id); 1633 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1634 bool trySkipToken(const AsmToken::TokenKind Kind); 1635 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1636 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1637 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1638 1639 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1640 AsmToken::TokenKind getTokenKind() const; 1641 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1642 bool parseExpr(OperandVector &Operands); 1643 StringRef getTokenStr() const; 1644 AsmToken peekToken(); 1645 AsmToken getToken() const; 1646 SMLoc getLoc() const; 1647 void lex(); 1648 1649 public: 1650 void onBeginOfFile() override; 1651 1652 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1653 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1654 1655 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1656 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1657 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1658 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1659 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1660 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1661 1662 bool parseSwizzleOperand(int64_t &Op, 1663 const unsigned MinVal, 1664 const unsigned MaxVal, 1665 const StringRef ErrMsg, 1666 SMLoc &Loc); 1667 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1668 const unsigned MinVal, 1669 const unsigned MaxVal, 1670 const StringRef ErrMsg); 1671 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1672 bool parseSwizzleOffset(int64_t &Imm); 1673 bool parseSwizzleMacro(int64_t &Imm); 1674 bool parseSwizzleQuadPerm(int64_t &Imm); 1675 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1676 bool parseSwizzleBroadcast(int64_t &Imm); 1677 bool parseSwizzleSwap(int64_t &Imm); 1678 bool parseSwizzleReverse(int64_t &Imm); 1679 1680 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1681 int64_t parseGPRIdxMacro(); 1682 1683 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1684 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1685 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1686 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1687 1688 AMDGPUOperand::Ptr defaultCPol() const; 1689 1690 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1691 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1692 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1693 AMDGPUOperand::Ptr defaultFlatOffset() const; 1694 1695 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1696 1697 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1698 OptionalImmIndexMap &OptionalIdx); 1699 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1700 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1701 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1702 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1703 OptionalImmIndexMap &OptionalIdx); 1704 1705 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1706 1707 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1708 bool IsAtomic = false); 1709 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1710 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1711 1712 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1713 1714 bool parseDimId(unsigned &Encoding); 1715 OperandMatchResultTy parseDim(OperandVector &Operands); 1716 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1717 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1718 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1719 int64_t parseDPPCtrlSel(StringRef Ctrl); 1720 int64_t parseDPPCtrlPerm(); 1721 AMDGPUOperand::Ptr defaultRowMask() const; 1722 AMDGPUOperand::Ptr defaultBankMask() const; 1723 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1724 AMDGPUOperand::Ptr defaultFI() const; 1725 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1726 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1727 1728 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1729 AMDGPUOperand::ImmTy Type); 1730 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1731 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1732 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1733 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1734 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1735 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1736 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1737 uint64_t BasicInstType, 1738 bool SkipDstVcc = false, 1739 bool SkipSrcVcc = false); 1740 1741 AMDGPUOperand::Ptr defaultBLGP() const; 1742 AMDGPUOperand::Ptr defaultCBSZ() const; 1743 AMDGPUOperand::Ptr defaultABID() const; 1744 1745 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1746 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1747 }; 1748 1749 struct OptionalOperand { 1750 const char *Name; 1751 AMDGPUOperand::ImmTy Type; 1752 bool IsBit; 1753 bool (*ConvertResult)(int64_t&); 1754 }; 1755 1756 } // end anonymous namespace 1757 1758 // May be called with integer type with equivalent bitwidth. 1759 static const fltSemantics *getFltSemantics(unsigned Size) { 1760 switch (Size) { 1761 case 4: 1762 return &APFloat::IEEEsingle(); 1763 case 8: 1764 return &APFloat::IEEEdouble(); 1765 case 2: 1766 return &APFloat::IEEEhalf(); 1767 default: 1768 llvm_unreachable("unsupported fp type"); 1769 } 1770 } 1771 1772 static const fltSemantics *getFltSemantics(MVT VT) { 1773 return getFltSemantics(VT.getSizeInBits() / 8); 1774 } 1775 1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1777 switch (OperandType) { 1778 case AMDGPU::OPERAND_REG_IMM_INT32: 1779 case AMDGPU::OPERAND_REG_IMM_FP32: 1780 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1781 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1786 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1788 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1789 case AMDGPU::OPERAND_KIMM32: 1790 return &APFloat::IEEEsingle(); 1791 case AMDGPU::OPERAND_REG_IMM_INT64: 1792 case AMDGPU::OPERAND_REG_IMM_FP64: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1796 return &APFloat::IEEEdouble(); 1797 case AMDGPU::OPERAND_REG_IMM_INT16: 1798 case AMDGPU::OPERAND_REG_IMM_FP16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1810 case AMDGPU::OPERAND_KIMM16: 1811 return &APFloat::IEEEhalf(); 1812 default: 1813 llvm_unreachable("unsupported fp type"); 1814 } 1815 } 1816 1817 //===----------------------------------------------------------------------===// 1818 // Operand 1819 //===----------------------------------------------------------------------===// 1820 1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1822 bool Lost; 1823 1824 // Convert literal to single precision 1825 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1826 APFloat::rmNearestTiesToEven, 1827 &Lost); 1828 // We allow precision lost but not overflow or underflow 1829 if (Status != APFloat::opOK && 1830 Lost && 1831 ((Status & APFloat::opOverflow) != 0 || 1832 (Status & APFloat::opUnderflow) != 0)) { 1833 return false; 1834 } 1835 1836 return true; 1837 } 1838 1839 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1840 return isUIntN(Size, Val) || isIntN(Size, Val); 1841 } 1842 1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1844 if (VT.getScalarType() == MVT::i16) { 1845 // FP immediate values are broken. 1846 return isInlinableIntLiteral(Val); 1847 } 1848 1849 // f16/v2f16 operands work correctly for all values. 1850 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1851 } 1852 1853 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1854 1855 // This is a hack to enable named inline values like 1856 // shared_base with both 32-bit and 64-bit operands. 1857 // Note that these values are defined as 1858 // 32-bit operands only. 1859 if (isInlineValue()) { 1860 return true; 1861 } 1862 1863 if (!isImmTy(ImmTyNone)) { 1864 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1865 return false; 1866 } 1867 // TODO: We should avoid using host float here. It would be better to 1868 // check the float bit values which is what a few other places do. 1869 // We've had bot failures before due to weird NaN support on mips hosts. 1870 1871 APInt Literal(64, Imm.Val); 1872 1873 if (Imm.IsFPImm) { // We got fp literal token 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1880 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1881 return false; 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 // Check if single precision literal is inlinable 1890 return AMDGPU::isInlinableLiteral32( 1891 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1892 AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 // We got int literal token. 1896 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1897 return AMDGPU::isInlinableLiteral64(Imm.Val, 1898 AsmParser->hasInv2PiInlineImm()); 1899 } 1900 1901 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1902 return false; 1903 } 1904 1905 if (type.getScalarSizeInBits() == 16) { 1906 return isInlineableLiteralOp16( 1907 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1908 type, AsmParser->hasInv2PiInlineImm()); 1909 } 1910 1911 return AMDGPU::isInlinableLiteral32( 1912 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1913 AsmParser->hasInv2PiInlineImm()); 1914 } 1915 1916 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1917 // Check that this immediate can be added as literal 1918 if (!isImmTy(ImmTyNone)) { 1919 return false; 1920 } 1921 1922 if (!Imm.IsFPImm) { 1923 // We got int literal token. 1924 1925 if (type == MVT::f64 && hasFPModifiers()) { 1926 // Cannot apply fp modifiers to int literals preserving the same semantics 1927 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1928 // disable these cases. 1929 return false; 1930 } 1931 1932 unsigned Size = type.getSizeInBits(); 1933 if (Size == 64) 1934 Size = 32; 1935 1936 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1937 // types. 1938 return isSafeTruncation(Imm.Val, Size); 1939 } 1940 1941 // We got fp literal token 1942 if (type == MVT::f64) { // Expected 64-bit fp operand 1943 // We would set low 64-bits of literal to zeroes but we accept this literals 1944 return true; 1945 } 1946 1947 if (type == MVT::i64) { // Expected 64-bit int operand 1948 // We don't allow fp literals in 64-bit integer instructions. It is 1949 // unclear how we should encode them. 1950 return false; 1951 } 1952 1953 // We allow fp literals with f16x2 operands assuming that the specified 1954 // literal goes into the lower half and the upper half is zero. We also 1955 // require that the literal may be losslessly converted to f16. 1956 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1957 (type == MVT::v2i16)? MVT::i16 : 1958 (type == MVT::v2f32)? MVT::f32 : type; 1959 1960 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1961 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1962 } 1963 1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1965 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1966 } 1967 1968 bool AMDGPUOperand::isVRegWithInputMods() const { 1969 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1970 // GFX90A allows DPP on 64-bit operands. 1971 (isRegClass(AMDGPU::VReg_64RegClassID) && 1972 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1976 if (AsmParser->isVI()) 1977 return isVReg32(); 1978 else if (AsmParser->isGFX9Plus()) 1979 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1980 else 1981 return false; 1982 } 1983 1984 bool AMDGPUOperand::isSDWAFP16Operand() const { 1985 return isSDWAOperand(MVT::f16); 1986 } 1987 1988 bool AMDGPUOperand::isSDWAFP32Operand() const { 1989 return isSDWAOperand(MVT::f32); 1990 } 1991 1992 bool AMDGPUOperand::isSDWAInt16Operand() const { 1993 return isSDWAOperand(MVT::i16); 1994 } 1995 1996 bool AMDGPUOperand::isSDWAInt32Operand() const { 1997 return isSDWAOperand(MVT::i32); 1998 } 1999 2000 bool AMDGPUOperand::isBoolReg() const { 2001 auto FB = AsmParser->getFeatureBits(); 2002 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2003 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2004 } 2005 2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2007 { 2008 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2009 assert(Size == 2 || Size == 4 || Size == 8); 2010 2011 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2012 2013 if (Imm.Mods.Abs) { 2014 Val &= ~FpSignMask; 2015 } 2016 if (Imm.Mods.Neg) { 2017 Val ^= FpSignMask; 2018 } 2019 2020 return Val; 2021 } 2022 2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2024 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2025 Inst.getNumOperands())) { 2026 addLiteralImmOperand(Inst, Imm.Val, 2027 ApplyModifiers & 2028 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2029 } else { 2030 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2031 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2032 setImmKindNone(); 2033 } 2034 } 2035 2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2037 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2038 auto OpNum = Inst.getNumOperands(); 2039 // Check that this operand accepts literals 2040 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2041 2042 if (ApplyModifiers) { 2043 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2044 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2045 Val = applyInputFPModifiers(Val, Size); 2046 } 2047 2048 APInt Literal(64, Val); 2049 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2050 2051 if (Imm.IsFPImm) { // We got fp literal token 2052 switch (OpTy) { 2053 case AMDGPU::OPERAND_REG_IMM_INT64: 2054 case AMDGPU::OPERAND_REG_IMM_FP64: 2055 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2056 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2058 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2059 AsmParser->hasInv2PiInlineImm())) { 2060 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2061 setImmKindConst(); 2062 return; 2063 } 2064 2065 // Non-inlineable 2066 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2067 // For fp operands we check if low 32 bits are zeros 2068 if (Literal.getLoBits(32) != 0) { 2069 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2070 "Can't encode literal as exact 64-bit floating-point operand. " 2071 "Low 32-bits will be set to zero"); 2072 } 2073 2074 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2075 setImmKindLiteral(); 2076 return; 2077 } 2078 2079 // We don't allow fp literals in 64-bit integer instructions. It is 2080 // unclear how we should encode them. This case should be checked earlier 2081 // in predicate methods (isLiteralImm()) 2082 llvm_unreachable("fp literal in 64-bit integer instruction."); 2083 2084 case AMDGPU::OPERAND_REG_IMM_INT32: 2085 case AMDGPU::OPERAND_REG_IMM_FP32: 2086 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2087 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2091 case AMDGPU::OPERAND_REG_IMM_INT16: 2092 case AMDGPU::OPERAND_REG_IMM_FP16: 2093 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2094 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2102 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2103 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2105 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2106 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2107 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2108 case AMDGPU::OPERAND_KIMM32: 2109 case AMDGPU::OPERAND_KIMM16: { 2110 bool lost; 2111 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2112 // Convert literal to single precision 2113 FPLiteral.convert(*getOpFltSemantics(OpTy), 2114 APFloat::rmNearestTiesToEven, &lost); 2115 // We allow precision lost but not overflow or underflow. This should be 2116 // checked earlier in isLiteralImm() 2117 2118 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2119 Inst.addOperand(MCOperand::createImm(ImmVal)); 2120 setImmKindLiteral(); 2121 return; 2122 } 2123 default: 2124 llvm_unreachable("invalid operand size"); 2125 } 2126 2127 return; 2128 } 2129 2130 // We got int literal token. 2131 // Only sign extend inline immediates. 2132 switch (OpTy) { 2133 case AMDGPU::OPERAND_REG_IMM_INT32: 2134 case AMDGPU::OPERAND_REG_IMM_FP32: 2135 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2136 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2139 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2141 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2142 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2144 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2145 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2146 if (isSafeTruncation(Val, 32) && 2147 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2148 AsmParser->hasInv2PiInlineImm())) { 2149 Inst.addOperand(MCOperand::createImm(Val)); 2150 setImmKindConst(); 2151 return; 2152 } 2153 2154 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2155 setImmKindLiteral(); 2156 return; 2157 2158 case AMDGPU::OPERAND_REG_IMM_INT64: 2159 case AMDGPU::OPERAND_REG_IMM_FP64: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2163 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2164 Inst.addOperand(MCOperand::createImm(Val)); 2165 setImmKindConst(); 2166 return; 2167 } 2168 2169 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2170 setImmKindLiteral(); 2171 return; 2172 2173 case AMDGPU::OPERAND_REG_IMM_INT16: 2174 case AMDGPU::OPERAND_REG_IMM_FP16: 2175 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2180 if (isSafeTruncation(Val, 16) && 2181 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2182 AsmParser->hasInv2PiInlineImm())) { 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 setImmKindConst(); 2185 return; 2186 } 2187 2188 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2189 setImmKindLiteral(); 2190 return; 2191 2192 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2195 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2196 assert(isSafeTruncation(Val, 16)); 2197 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2198 AsmParser->hasInv2PiInlineImm())); 2199 2200 Inst.addOperand(MCOperand::createImm(Val)); 2201 return; 2202 } 2203 case AMDGPU::OPERAND_KIMM32: 2204 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2205 setImmKindNone(); 2206 return; 2207 case AMDGPU::OPERAND_KIMM16: 2208 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2209 setImmKindNone(); 2210 return; 2211 default: 2212 llvm_unreachable("invalid operand size"); 2213 } 2214 } 2215 2216 template <unsigned Bitwidth> 2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2218 APInt Literal(64, Imm.Val); 2219 setImmKindNone(); 2220 2221 if (!Imm.IsFPImm) { 2222 // We got int literal token. 2223 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2224 return; 2225 } 2226 2227 bool Lost; 2228 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2229 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2230 APFloat::rmNearestTiesToEven, &Lost); 2231 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2232 } 2233 2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2235 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2236 } 2237 2238 static bool isInlineValue(unsigned Reg) { 2239 switch (Reg) { 2240 case AMDGPU::SRC_SHARED_BASE: 2241 case AMDGPU::SRC_SHARED_LIMIT: 2242 case AMDGPU::SRC_PRIVATE_BASE: 2243 case AMDGPU::SRC_PRIVATE_LIMIT: 2244 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2245 return true; 2246 case AMDGPU::SRC_VCCZ: 2247 case AMDGPU::SRC_EXECZ: 2248 case AMDGPU::SRC_SCC: 2249 return true; 2250 case AMDGPU::SGPR_NULL: 2251 return true; 2252 default: 2253 return false; 2254 } 2255 } 2256 2257 bool AMDGPUOperand::isInlineValue() const { 2258 return isRegKind() && ::isInlineValue(getReg()); 2259 } 2260 2261 //===----------------------------------------------------------------------===// 2262 // AsmParser 2263 //===----------------------------------------------------------------------===// 2264 2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2266 if (Is == IS_VGPR) { 2267 switch (RegWidth) { 2268 default: return -1; 2269 case 32: 2270 return AMDGPU::VGPR_32RegClassID; 2271 case 64: 2272 return AMDGPU::VReg_64RegClassID; 2273 case 96: 2274 return AMDGPU::VReg_96RegClassID; 2275 case 128: 2276 return AMDGPU::VReg_128RegClassID; 2277 case 160: 2278 return AMDGPU::VReg_160RegClassID; 2279 case 192: 2280 return AMDGPU::VReg_192RegClassID; 2281 case 224: 2282 return AMDGPU::VReg_224RegClassID; 2283 case 256: 2284 return AMDGPU::VReg_256RegClassID; 2285 case 512: 2286 return AMDGPU::VReg_512RegClassID; 2287 case 1024: 2288 return AMDGPU::VReg_1024RegClassID; 2289 } 2290 } else if (Is == IS_TTMP) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::TTMP_32RegClassID; 2295 case 64: 2296 return AMDGPU::TTMP_64RegClassID; 2297 case 128: 2298 return AMDGPU::TTMP_128RegClassID; 2299 case 256: 2300 return AMDGPU::TTMP_256RegClassID; 2301 case 512: 2302 return AMDGPU::TTMP_512RegClassID; 2303 } 2304 } else if (Is == IS_SGPR) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::SGPR_32RegClassID; 2309 case 64: 2310 return AMDGPU::SGPR_64RegClassID; 2311 case 96: 2312 return AMDGPU::SGPR_96RegClassID; 2313 case 128: 2314 return AMDGPU::SGPR_128RegClassID; 2315 case 160: 2316 return AMDGPU::SGPR_160RegClassID; 2317 case 192: 2318 return AMDGPU::SGPR_192RegClassID; 2319 case 224: 2320 return AMDGPU::SGPR_224RegClassID; 2321 case 256: 2322 return AMDGPU::SGPR_256RegClassID; 2323 case 512: 2324 return AMDGPU::SGPR_512RegClassID; 2325 } 2326 } else if (Is == IS_AGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::AGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::AReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::AReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::AReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::AReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::AReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::AReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::AReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::AReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::AReg_1024RegClassID; 2349 } 2350 } 2351 return -1; 2352 } 2353 2354 static unsigned getSpecialRegForName(StringRef RegName) { 2355 return StringSwitch<unsigned>(RegName) 2356 .Case("exec", AMDGPU::EXEC) 2357 .Case("vcc", AMDGPU::VCC) 2358 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2359 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2360 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2361 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2362 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2363 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2364 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2365 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2366 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2367 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2368 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2369 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2370 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2371 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2372 .Case("m0", AMDGPU::M0) 2373 .Case("vccz", AMDGPU::SRC_VCCZ) 2374 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2375 .Case("execz", AMDGPU::SRC_EXECZ) 2376 .Case("src_execz", AMDGPU::SRC_EXECZ) 2377 .Case("scc", AMDGPU::SRC_SCC) 2378 .Case("src_scc", AMDGPU::SRC_SCC) 2379 .Case("tba", AMDGPU::TBA) 2380 .Case("tma", AMDGPU::TMA) 2381 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2382 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2383 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2384 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2385 .Case("vcc_lo", AMDGPU::VCC_LO) 2386 .Case("vcc_hi", AMDGPU::VCC_HI) 2387 .Case("exec_lo", AMDGPU::EXEC_LO) 2388 .Case("exec_hi", AMDGPU::EXEC_HI) 2389 .Case("tma_lo", AMDGPU::TMA_LO) 2390 .Case("tma_hi", AMDGPU::TMA_HI) 2391 .Case("tba_lo", AMDGPU::TBA_LO) 2392 .Case("tba_hi", AMDGPU::TBA_HI) 2393 .Case("pc", AMDGPU::PC_REG) 2394 .Case("null", AMDGPU::SGPR_NULL) 2395 .Default(AMDGPU::NoRegister); 2396 } 2397 2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2399 SMLoc &EndLoc, bool RestoreOnFailure) { 2400 auto R = parseRegister(); 2401 if (!R) return true; 2402 assert(R->isReg()); 2403 RegNo = R->getReg(); 2404 StartLoc = R->getStartLoc(); 2405 EndLoc = R->getEndLoc(); 2406 return false; 2407 } 2408 2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2410 SMLoc &EndLoc) { 2411 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2412 } 2413 2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2415 SMLoc &StartLoc, 2416 SMLoc &EndLoc) { 2417 bool Result = 2418 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2419 bool PendingErrors = getParser().hasPendingError(); 2420 getParser().clearPendingErrors(); 2421 if (PendingErrors) 2422 return MatchOperand_ParseFail; 2423 if (Result) 2424 return MatchOperand_NoMatch; 2425 return MatchOperand_Success; 2426 } 2427 2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2429 RegisterKind RegKind, unsigned Reg1, 2430 SMLoc Loc) { 2431 switch (RegKind) { 2432 case IS_SPECIAL: 2433 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2434 Reg = AMDGPU::EXEC; 2435 RegWidth = 64; 2436 return true; 2437 } 2438 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2439 Reg = AMDGPU::FLAT_SCR; 2440 RegWidth = 64; 2441 return true; 2442 } 2443 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2444 Reg = AMDGPU::XNACK_MASK; 2445 RegWidth = 64; 2446 return true; 2447 } 2448 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2449 Reg = AMDGPU::VCC; 2450 RegWidth = 64; 2451 return true; 2452 } 2453 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2454 Reg = AMDGPU::TBA; 2455 RegWidth = 64; 2456 return true; 2457 } 2458 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2459 Reg = AMDGPU::TMA; 2460 RegWidth = 64; 2461 return true; 2462 } 2463 Error(Loc, "register does not fit in the list"); 2464 return false; 2465 case IS_VGPR: 2466 case IS_SGPR: 2467 case IS_AGPR: 2468 case IS_TTMP: 2469 if (Reg1 != Reg + RegWidth / 32) { 2470 Error(Loc, "registers in a list must have consecutive indices"); 2471 return false; 2472 } 2473 RegWidth += 32; 2474 return true; 2475 default: 2476 llvm_unreachable("unexpected register kind"); 2477 } 2478 } 2479 2480 struct RegInfo { 2481 StringLiteral Name; 2482 RegisterKind Kind; 2483 }; 2484 2485 static constexpr RegInfo RegularRegisters[] = { 2486 {{"v"}, IS_VGPR}, 2487 {{"s"}, IS_SGPR}, 2488 {{"ttmp"}, IS_TTMP}, 2489 {{"acc"}, IS_AGPR}, 2490 {{"a"}, IS_AGPR}, 2491 }; 2492 2493 static bool isRegularReg(RegisterKind Kind) { 2494 return Kind == IS_VGPR || 2495 Kind == IS_SGPR || 2496 Kind == IS_TTMP || 2497 Kind == IS_AGPR; 2498 } 2499 2500 static const RegInfo* getRegularRegInfo(StringRef Str) { 2501 for (const RegInfo &Reg : RegularRegisters) 2502 if (Str.startswith(Reg.Name)) 2503 return &Reg; 2504 return nullptr; 2505 } 2506 2507 static bool getRegNum(StringRef Str, unsigned& Num) { 2508 return !Str.getAsInteger(10, Num); 2509 } 2510 2511 bool 2512 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2513 const AsmToken &NextToken) const { 2514 2515 // A list of consecutive registers: [s0,s1,s2,s3] 2516 if (Token.is(AsmToken::LBrac)) 2517 return true; 2518 2519 if (!Token.is(AsmToken::Identifier)) 2520 return false; 2521 2522 // A single register like s0 or a range of registers like s[0:1] 2523 2524 StringRef Str = Token.getString(); 2525 const RegInfo *Reg = getRegularRegInfo(Str); 2526 if (Reg) { 2527 StringRef RegName = Reg->Name; 2528 StringRef RegSuffix = Str.substr(RegName.size()); 2529 if (!RegSuffix.empty()) { 2530 unsigned Num; 2531 // A single register with an index: rXX 2532 if (getRegNum(RegSuffix, Num)) 2533 return true; 2534 } else { 2535 // A range of registers: r[XX:YY]. 2536 if (NextToken.is(AsmToken::LBrac)) 2537 return true; 2538 } 2539 } 2540 2541 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2542 } 2543 2544 bool 2545 AMDGPUAsmParser::isRegister() 2546 { 2547 return isRegister(getToken(), peekToken()); 2548 } 2549 2550 unsigned 2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2552 unsigned RegNum, 2553 unsigned RegWidth, 2554 SMLoc Loc) { 2555 2556 assert(isRegularReg(RegKind)); 2557 2558 unsigned AlignSize = 1; 2559 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2560 // SGPR and TTMP registers must be aligned. 2561 // Max required alignment is 4 dwords. 2562 AlignSize = std::min(RegWidth / 32, 4u); 2563 } 2564 2565 if (RegNum % AlignSize != 0) { 2566 Error(Loc, "invalid register alignment"); 2567 return AMDGPU::NoRegister; 2568 } 2569 2570 unsigned RegIdx = RegNum / AlignSize; 2571 int RCID = getRegClass(RegKind, RegWidth); 2572 if (RCID == -1) { 2573 Error(Loc, "invalid or unsupported register size"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2578 const MCRegisterClass RC = TRI->getRegClass(RCID); 2579 if (RegIdx >= RC.getNumRegs()) { 2580 Error(Loc, "register index is out of range"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return RC.getRegister(RegIdx); 2585 } 2586 2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2588 int64_t RegLo, RegHi; 2589 if (!skipToken(AsmToken::LBrac, "missing register index")) 2590 return false; 2591 2592 SMLoc FirstIdxLoc = getLoc(); 2593 SMLoc SecondIdxLoc; 2594 2595 if (!parseExpr(RegLo)) 2596 return false; 2597 2598 if (trySkipToken(AsmToken::Colon)) { 2599 SecondIdxLoc = getLoc(); 2600 if (!parseExpr(RegHi)) 2601 return false; 2602 } else { 2603 RegHi = RegLo; 2604 } 2605 2606 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2607 return false; 2608 2609 if (!isUInt<32>(RegLo)) { 2610 Error(FirstIdxLoc, "invalid register index"); 2611 return false; 2612 } 2613 2614 if (!isUInt<32>(RegHi)) { 2615 Error(SecondIdxLoc, "invalid register index"); 2616 return false; 2617 } 2618 2619 if (RegLo > RegHi) { 2620 Error(FirstIdxLoc, "first register index should not exceed second index"); 2621 return false; 2622 } 2623 2624 Num = static_cast<unsigned>(RegLo); 2625 RegWidth = 32 * ((RegHi - RegLo) + 1); 2626 return true; 2627 } 2628 2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2630 unsigned &RegNum, unsigned &RegWidth, 2631 SmallVectorImpl<AsmToken> &Tokens) { 2632 assert(isToken(AsmToken::Identifier)); 2633 unsigned Reg = getSpecialRegForName(getTokenStr()); 2634 if (Reg) { 2635 RegNum = 0; 2636 RegWidth = 32; 2637 RegKind = IS_SPECIAL; 2638 Tokens.push_back(getToken()); 2639 lex(); // skip register name 2640 } 2641 return Reg; 2642 } 2643 2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2645 unsigned &RegNum, unsigned &RegWidth, 2646 SmallVectorImpl<AsmToken> &Tokens) { 2647 assert(isToken(AsmToken::Identifier)); 2648 StringRef RegName = getTokenStr(); 2649 auto Loc = getLoc(); 2650 2651 const RegInfo *RI = getRegularRegInfo(RegName); 2652 if (!RI) { 2653 Error(Loc, "invalid register name"); 2654 return AMDGPU::NoRegister; 2655 } 2656 2657 Tokens.push_back(getToken()); 2658 lex(); // skip register name 2659 2660 RegKind = RI->Kind; 2661 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2662 if (!RegSuffix.empty()) { 2663 // Single 32-bit register: vXX. 2664 if (!getRegNum(RegSuffix, RegNum)) { 2665 Error(Loc, "invalid register index"); 2666 return AMDGPU::NoRegister; 2667 } 2668 RegWidth = 32; 2669 } else { 2670 // Range of registers: v[XX:YY]. ":YY" is optional. 2671 if (!ParseRegRange(RegNum, RegWidth)) 2672 return AMDGPU::NoRegister; 2673 } 2674 2675 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2676 } 2677 2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2679 unsigned &RegWidth, 2680 SmallVectorImpl<AsmToken> &Tokens) { 2681 unsigned Reg = AMDGPU::NoRegister; 2682 auto ListLoc = getLoc(); 2683 2684 if (!skipToken(AsmToken::LBrac, 2685 "expected a register or a list of registers")) { 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2690 2691 auto Loc = getLoc(); 2692 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2693 return AMDGPU::NoRegister; 2694 if (RegWidth != 32) { 2695 Error(Loc, "expected a single 32-bit register"); 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 for (; trySkipToken(AsmToken::Comma); ) { 2700 RegisterKind NextRegKind; 2701 unsigned NextReg, NextRegNum, NextRegWidth; 2702 Loc = getLoc(); 2703 2704 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2705 NextRegNum, NextRegWidth, 2706 Tokens)) { 2707 return AMDGPU::NoRegister; 2708 } 2709 if (NextRegWidth != 32) { 2710 Error(Loc, "expected a single 32-bit register"); 2711 return AMDGPU::NoRegister; 2712 } 2713 if (NextRegKind != RegKind) { 2714 Error(Loc, "registers in a list must be of the same kind"); 2715 return AMDGPU::NoRegister; 2716 } 2717 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2718 return AMDGPU::NoRegister; 2719 } 2720 2721 if (!skipToken(AsmToken::RBrac, 2722 "expected a comma or a closing square bracket")) { 2723 return AMDGPU::NoRegister; 2724 } 2725 2726 if (isRegularReg(RegKind)) 2727 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2728 2729 return Reg; 2730 } 2731 2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2733 unsigned &RegNum, unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 auto Loc = getLoc(); 2736 Reg = AMDGPU::NoRegister; 2737 2738 if (isToken(AsmToken::Identifier)) { 2739 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2740 if (Reg == AMDGPU::NoRegister) 2741 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2742 } else { 2743 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2744 } 2745 2746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2747 if (Reg == AMDGPU::NoRegister) { 2748 assert(Parser.hasPendingError()); 2749 return false; 2750 } 2751 2752 if (!subtargetHasRegister(*TRI, Reg)) { 2753 if (Reg == AMDGPU::SGPR_NULL) { 2754 Error(Loc, "'null' operand is not supported on this GPU"); 2755 } else { 2756 Error(Loc, "register not available on this GPU"); 2757 } 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2765 unsigned &RegNum, unsigned &RegWidth, 2766 bool RestoreOnFailure /*=false*/) { 2767 Reg = AMDGPU::NoRegister; 2768 2769 SmallVector<AsmToken, 1> Tokens; 2770 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2771 if (RestoreOnFailure) { 2772 while (!Tokens.empty()) { 2773 getLexer().UnLex(Tokens.pop_back_val()); 2774 } 2775 } 2776 return true; 2777 } 2778 return false; 2779 } 2780 2781 Optional<StringRef> 2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2783 switch (RegKind) { 2784 case IS_VGPR: 2785 return StringRef(".amdgcn.next_free_vgpr"); 2786 case IS_SGPR: 2787 return StringRef(".amdgcn.next_free_sgpr"); 2788 default: 2789 return None; 2790 } 2791 } 2792 2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2794 auto SymbolName = getGprCountSymbolName(RegKind); 2795 assert(SymbolName && "initializing invalid register kind"); 2796 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2797 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2798 } 2799 2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2801 unsigned DwordRegIndex, 2802 unsigned RegWidth) { 2803 // Symbols are only defined for GCN targets 2804 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2805 return true; 2806 2807 auto SymbolName = getGprCountSymbolName(RegKind); 2808 if (!SymbolName) 2809 return true; 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 2812 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2813 int64_t OldCount; 2814 2815 if (!Sym->isVariable()) 2816 return !Error(getLoc(), 2817 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2818 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2819 return !Error( 2820 getLoc(), 2821 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2822 2823 if (OldCount <= NewMax) 2824 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2825 2826 return true; 2827 } 2828 2829 std::unique_ptr<AMDGPUOperand> 2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2831 const auto &Tok = getToken(); 2832 SMLoc StartLoc = Tok.getLoc(); 2833 SMLoc EndLoc = Tok.getEndLoc(); 2834 RegisterKind RegKind; 2835 unsigned Reg, RegNum, RegWidth; 2836 2837 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2838 return nullptr; 2839 } 2840 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2841 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2842 return nullptr; 2843 } else 2844 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2845 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2846 } 2847 2848 OperandMatchResultTy 2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2850 // TODO: add syntactic sugar for 1/(2*PI) 2851 2852 assert(!isRegister()); 2853 assert(!isModifier()); 2854 2855 const auto& Tok = getToken(); 2856 const auto& NextTok = peekToken(); 2857 bool IsReal = Tok.is(AsmToken::Real); 2858 SMLoc S = getLoc(); 2859 bool Negate = false; 2860 2861 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2862 lex(); 2863 IsReal = true; 2864 Negate = true; 2865 } 2866 2867 if (IsReal) { 2868 // Floating-point expressions are not supported. 2869 // Can only allow floating-point literals with an 2870 // optional sign. 2871 2872 StringRef Num = getTokenStr(); 2873 lex(); 2874 2875 APFloat RealVal(APFloat::IEEEdouble()); 2876 auto roundMode = APFloat::rmNearestTiesToEven; 2877 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2878 return MatchOperand_ParseFail; 2879 } 2880 if (Negate) 2881 RealVal.changeSign(); 2882 2883 Operands.push_back( 2884 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2885 AMDGPUOperand::ImmTyNone, true)); 2886 2887 return MatchOperand_Success; 2888 2889 } else { 2890 int64_t IntVal; 2891 const MCExpr *Expr; 2892 SMLoc S = getLoc(); 2893 2894 if (HasSP3AbsModifier) { 2895 // This is a workaround for handling expressions 2896 // as arguments of SP3 'abs' modifier, for example: 2897 // |1.0| 2898 // |-1| 2899 // |1+x| 2900 // This syntax is not compatible with syntax of standard 2901 // MC expressions (due to the trailing '|'). 2902 SMLoc EndLoc; 2903 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2904 return MatchOperand_ParseFail; 2905 } else { 2906 if (Parser.parseExpression(Expr)) 2907 return MatchOperand_ParseFail; 2908 } 2909 2910 if (Expr->evaluateAsAbsolute(IntVal)) { 2911 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2912 } else { 2913 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2914 } 2915 2916 return MatchOperand_Success; 2917 } 2918 2919 return MatchOperand_NoMatch; 2920 } 2921 2922 OperandMatchResultTy 2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2924 if (!isRegister()) 2925 return MatchOperand_NoMatch; 2926 2927 if (auto R = parseRegister()) { 2928 assert(R->isReg()); 2929 Operands.push_back(std::move(R)); 2930 return MatchOperand_Success; 2931 } 2932 return MatchOperand_ParseFail; 2933 } 2934 2935 OperandMatchResultTy 2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2937 auto res = parseReg(Operands); 2938 if (res != MatchOperand_NoMatch) { 2939 return res; 2940 } else if (isModifier()) { 2941 return MatchOperand_NoMatch; 2942 } else { 2943 return parseImm(Operands, HasSP3AbsMod); 2944 } 2945 } 2946 2947 bool 2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2949 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2950 const auto &str = Token.getString(); 2951 return str == "abs" || str == "neg" || str == "sext"; 2952 } 2953 return false; 2954 } 2955 2956 bool 2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2958 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2964 } 2965 2966 bool 2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2968 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2969 } 2970 2971 // Check if this is an operand modifier or an opcode modifier 2972 // which may look like an expression but it is not. We should 2973 // avoid parsing these modifiers as expressions. Currently 2974 // recognized sequences are: 2975 // |...| 2976 // abs(...) 2977 // neg(...) 2978 // sext(...) 2979 // -reg 2980 // -|...| 2981 // -abs(...) 2982 // name:... 2983 // Note that simple opcode modifiers like 'gds' may be parsed as 2984 // expressions; this is a special case. See getExpressionAsToken. 2985 // 2986 bool 2987 AMDGPUAsmParser::isModifier() { 2988 2989 AsmToken Tok = getToken(); 2990 AsmToken NextToken[2]; 2991 peekTokens(NextToken); 2992 2993 return isOperandModifier(Tok, NextToken[0]) || 2994 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2995 isOpcodeModifierWithVal(Tok, NextToken[0]); 2996 } 2997 2998 // Check if the current token is an SP3 'neg' modifier. 2999 // Currently this modifier is allowed in the following context: 3000 // 3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3002 // 2. Before an 'abs' modifier: -abs(...) 3003 // 3. Before an SP3 'abs' modifier: -|...| 3004 // 3005 // In all other cases "-" is handled as a part 3006 // of an expression that follows the sign. 3007 // 3008 // Note: When "-" is followed by an integer literal, 3009 // this is interpreted as integer negation rather 3010 // than a floating-point NEG modifier applied to N. 3011 // Beside being contr-intuitive, such use of floating-point 3012 // NEG modifier would have resulted in different meaning 3013 // of integer literals used with VOP1/2/C and VOP3, 3014 // for example: 3015 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3016 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3017 // Negative fp literals with preceding "-" are 3018 // handled likewise for uniformity 3019 // 3020 bool 3021 AMDGPUAsmParser::parseSP3NegModifier() { 3022 3023 AsmToken NextToken[2]; 3024 peekTokens(NextToken); 3025 3026 if (isToken(AsmToken::Minus) && 3027 (isRegister(NextToken[0], NextToken[1]) || 3028 NextToken[0].is(AsmToken::Pipe) || 3029 isId(NextToken[0], "abs"))) { 3030 lex(); 3031 return true; 3032 } 3033 3034 return false; 3035 } 3036 3037 OperandMatchResultTy 3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3039 bool AllowImm) { 3040 bool Neg, SP3Neg; 3041 bool Abs, SP3Abs; 3042 SMLoc Loc; 3043 3044 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3045 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3046 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3047 return MatchOperand_ParseFail; 3048 } 3049 3050 SP3Neg = parseSP3NegModifier(); 3051 3052 Loc = getLoc(); 3053 Neg = trySkipId("neg"); 3054 if (Neg && SP3Neg) { 3055 Error(Loc, "expected register or immediate"); 3056 return MatchOperand_ParseFail; 3057 } 3058 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3059 return MatchOperand_ParseFail; 3060 3061 Abs = trySkipId("abs"); 3062 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3063 return MatchOperand_ParseFail; 3064 3065 Loc = getLoc(); 3066 SP3Abs = trySkipToken(AsmToken::Pipe); 3067 if (Abs && SP3Abs) { 3068 Error(Loc, "expected register or immediate"); 3069 return MatchOperand_ParseFail; 3070 } 3071 3072 OperandMatchResultTy Res; 3073 if (AllowImm) { 3074 Res = parseRegOrImm(Operands, SP3Abs); 3075 } else { 3076 Res = parseReg(Operands); 3077 } 3078 if (Res != MatchOperand_Success) { 3079 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3080 } 3081 3082 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3083 return MatchOperand_ParseFail; 3084 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3085 return MatchOperand_ParseFail; 3086 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3087 return MatchOperand_ParseFail; 3088 3089 AMDGPUOperand::Modifiers Mods; 3090 Mods.Abs = Abs || SP3Abs; 3091 Mods.Neg = Neg || SP3Neg; 3092 3093 if (Mods.hasFPModifiers()) { 3094 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3095 if (Op.isExpr()) { 3096 Error(Op.getStartLoc(), "expected an absolute expression"); 3097 return MatchOperand_ParseFail; 3098 } 3099 Op.setModifiers(Mods); 3100 } 3101 return MatchOperand_Success; 3102 } 3103 3104 OperandMatchResultTy 3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3106 bool AllowImm) { 3107 bool Sext = trySkipId("sext"); 3108 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3109 return MatchOperand_ParseFail; 3110 3111 OperandMatchResultTy Res; 3112 if (AllowImm) { 3113 Res = parseRegOrImm(Operands); 3114 } else { 3115 Res = parseReg(Operands); 3116 } 3117 if (Res != MatchOperand_Success) { 3118 return Sext? MatchOperand_ParseFail : Res; 3119 } 3120 3121 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3122 return MatchOperand_ParseFail; 3123 3124 AMDGPUOperand::Modifiers Mods; 3125 Mods.Sext = Sext; 3126 3127 if (Mods.hasIntModifiers()) { 3128 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3129 if (Op.isExpr()) { 3130 Error(Op.getStartLoc(), "expected an absolute expression"); 3131 return MatchOperand_ParseFail; 3132 } 3133 Op.setModifiers(Mods); 3134 } 3135 3136 return MatchOperand_Success; 3137 } 3138 3139 OperandMatchResultTy 3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3141 return parseRegOrImmWithFPInputMods(Operands, false); 3142 } 3143 3144 OperandMatchResultTy 3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3146 return parseRegOrImmWithIntInputMods(Operands, false); 3147 } 3148 3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3150 auto Loc = getLoc(); 3151 if (trySkipId("off")) { 3152 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3153 AMDGPUOperand::ImmTyOff, false)); 3154 return MatchOperand_Success; 3155 } 3156 3157 if (!isRegister()) 3158 return MatchOperand_NoMatch; 3159 3160 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3161 if (Reg) { 3162 Operands.push_back(std::move(Reg)); 3163 return MatchOperand_Success; 3164 } 3165 3166 return MatchOperand_ParseFail; 3167 3168 } 3169 3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3172 3173 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3174 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3175 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3176 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3177 return Match_InvalidOperand; 3178 3179 if ((TSFlags & SIInstrFlags::VOP3) && 3180 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3181 getForcedEncodingSize() != 64) 3182 return Match_PreferE32; 3183 3184 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3185 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3186 // v_mac_f32/16 allow only dst_sel == DWORD; 3187 auto OpNum = 3188 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3189 const auto &Op = Inst.getOperand(OpNum); 3190 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3191 return Match_InvalidOperand; 3192 } 3193 } 3194 3195 return Match_Success; 3196 } 3197 3198 static ArrayRef<unsigned> getAllVariants() { 3199 static const unsigned Variants[] = { 3200 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3201 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3202 }; 3203 3204 return makeArrayRef(Variants); 3205 } 3206 3207 // What asm variants we should check 3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3209 if (getForcedEncodingSize() == 32) { 3210 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3211 return makeArrayRef(Variants); 3212 } 3213 3214 if (isForcedVOP3()) { 3215 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3216 return makeArrayRef(Variants); 3217 } 3218 3219 if (isForcedSDWA()) { 3220 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3221 AMDGPUAsmVariants::SDWA9}; 3222 return makeArrayRef(Variants); 3223 } 3224 3225 if (isForcedDPP()) { 3226 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3227 return makeArrayRef(Variants); 3228 } 3229 3230 return getAllVariants(); 3231 } 3232 3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3234 if (getForcedEncodingSize() == 32) 3235 return "e32"; 3236 3237 if (isForcedVOP3()) 3238 return "e64"; 3239 3240 if (isForcedSDWA()) 3241 return "sdwa"; 3242 3243 if (isForcedDPP()) 3244 return "dpp"; 3245 3246 return ""; 3247 } 3248 3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3250 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3251 const unsigned Num = Desc.getNumImplicitUses(); 3252 for (unsigned i = 0; i < Num; ++i) { 3253 unsigned Reg = Desc.ImplicitUses[i]; 3254 switch (Reg) { 3255 case AMDGPU::FLAT_SCR: 3256 case AMDGPU::VCC: 3257 case AMDGPU::VCC_LO: 3258 case AMDGPU::VCC_HI: 3259 case AMDGPU::M0: 3260 return Reg; 3261 default: 3262 break; 3263 } 3264 } 3265 return AMDGPU::NoRegister; 3266 } 3267 3268 // NB: This code is correct only when used to check constant 3269 // bus limitations because GFX7 support no f16 inline constants. 3270 // Note that there are no cases when a GFX7 opcode violates 3271 // constant bus limitations due to the use of an f16 constant. 3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3273 unsigned OpIdx) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 3276 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3277 return false; 3278 } 3279 3280 const MCOperand &MO = Inst.getOperand(OpIdx); 3281 3282 int64_t Val = MO.getImm(); 3283 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3284 3285 switch (OpSize) { // expected operand size 3286 case 8: 3287 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3288 case 4: 3289 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3290 case 2: { 3291 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3292 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3293 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3294 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3295 return AMDGPU::isInlinableIntLiteral(Val); 3296 3297 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3298 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3299 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3300 return AMDGPU::isInlinableIntLiteralV216(Val); 3301 3302 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3303 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3304 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3305 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3306 3307 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3308 } 3309 default: 3310 llvm_unreachable("invalid operand size"); 3311 } 3312 } 3313 3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3315 if (!isGFX10Plus()) 3316 return 1; 3317 3318 switch (Opcode) { 3319 // 64-bit shift instructions can use only one scalar value input 3320 case AMDGPU::V_LSHLREV_B64_e64: 3321 case AMDGPU::V_LSHLREV_B64_gfx10: 3322 case AMDGPU::V_LSHRREV_B64_e64: 3323 case AMDGPU::V_LSHRREV_B64_gfx10: 3324 case AMDGPU::V_ASHRREV_I64_e64: 3325 case AMDGPU::V_ASHRREV_I64_gfx10: 3326 case AMDGPU::V_LSHL_B64_e64: 3327 case AMDGPU::V_LSHR_B64_e64: 3328 case AMDGPU::V_ASHR_I64_e64: 3329 return 1; 3330 default: 3331 return 2; 3332 } 3333 } 3334 3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (MO.isImm()) { 3338 return !isInlineConstant(Inst, OpIdx); 3339 } else if (MO.isReg()) { 3340 auto Reg = MO.getReg(); 3341 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3342 auto PReg = mc2PseudoReg(Reg); 3343 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3344 } else { 3345 return true; 3346 } 3347 } 3348 3349 bool 3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3351 const OperandVector &Operands) { 3352 const unsigned Opcode = Inst.getOpcode(); 3353 const MCInstrDesc &Desc = MII.get(Opcode); 3354 unsigned LastSGPR = AMDGPU::NoRegister; 3355 unsigned ConstantBusUseCount = 0; 3356 unsigned NumLiterals = 0; 3357 unsigned LiteralSize; 3358 3359 if (Desc.TSFlags & 3360 (SIInstrFlags::VOPC | 3361 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3362 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3363 SIInstrFlags::SDWA)) { 3364 // Check special imm operands (used by madmk, etc) 3365 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3366 ++NumLiterals; 3367 LiteralSize = 4; 3368 } 3369 3370 SmallDenseSet<unsigned> SGPRsUsed; 3371 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3372 if (SGPRUsed != AMDGPU::NoRegister) { 3373 SGPRsUsed.insert(SGPRUsed); 3374 ++ConstantBusUseCount; 3375 } 3376 3377 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3378 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3379 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3380 3381 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3382 3383 for (int OpIdx : OpIndices) { 3384 if (OpIdx == -1) break; 3385 3386 const MCOperand &MO = Inst.getOperand(OpIdx); 3387 if (usesConstantBus(Inst, OpIdx)) { 3388 if (MO.isReg()) { 3389 LastSGPR = mc2PseudoReg(MO.getReg()); 3390 // Pairs of registers with a partial intersections like these 3391 // s0, s[0:1] 3392 // flat_scratch_lo, flat_scratch 3393 // flat_scratch_lo, flat_scratch_hi 3394 // are theoretically valid but they are disabled anyway. 3395 // Note that this code mimics SIInstrInfo::verifyInstruction 3396 if (!SGPRsUsed.count(LastSGPR)) { 3397 SGPRsUsed.insert(LastSGPR); 3398 ++ConstantBusUseCount; 3399 } 3400 } else { // Expression or a literal 3401 3402 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3403 continue; // special operand like VINTERP attr_chan 3404 3405 // An instruction may use only one literal. 3406 // This has been validated on the previous step. 3407 // See validateVOPLiteral. 3408 // This literal may be used as more than one operand. 3409 // If all these operands are of the same size, 3410 // this literal counts as one scalar value. 3411 // Otherwise it counts as 2 scalar values. 3412 // See "GFX10 Shader Programming", section 3.6.2.3. 3413 3414 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3415 if (Size < 4) Size = 4; 3416 3417 if (NumLiterals == 0) { 3418 NumLiterals = 1; 3419 LiteralSize = Size; 3420 } else if (LiteralSize != Size) { 3421 NumLiterals = 2; 3422 } 3423 } 3424 } 3425 } 3426 } 3427 ConstantBusUseCount += NumLiterals; 3428 3429 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3430 return true; 3431 3432 SMLoc LitLoc = getLitLoc(Operands); 3433 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3434 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3435 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3436 return false; 3437 } 3438 3439 bool 3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3441 const OperandVector &Operands) { 3442 const unsigned Opcode = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opcode); 3444 3445 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3446 if (DstIdx == -1 || 3447 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3448 return true; 3449 } 3450 3451 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3452 3453 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3454 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3455 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3456 3457 assert(DstIdx != -1); 3458 const MCOperand &Dst = Inst.getOperand(DstIdx); 3459 assert(Dst.isReg()); 3460 3461 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3462 3463 for (int SrcIdx : SrcIndices) { 3464 if (SrcIdx == -1) break; 3465 const MCOperand &Src = Inst.getOperand(SrcIdx); 3466 if (Src.isReg()) { 3467 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3468 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3469 Error(getRegLoc(SrcReg, Operands), 3470 "destination must be different than all sources"); 3471 return false; 3472 } 3473 } 3474 } 3475 3476 return true; 3477 } 3478 3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3480 3481 const unsigned Opc = Inst.getOpcode(); 3482 const MCInstrDesc &Desc = MII.get(Opc); 3483 3484 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3485 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3486 assert(ClampIdx != -1); 3487 return Inst.getOperand(ClampIdx).getImm() == 0; 3488 } 3489 3490 return true; 3491 } 3492 3493 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3499 return true; 3500 3501 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3502 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3503 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3504 3505 assert(VDataIdx != -1); 3506 3507 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3508 return true; 3509 3510 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3511 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3512 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3513 if (DMask == 0) 3514 DMask = 1; 3515 3516 unsigned DataSize = 3517 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3518 if (hasPackedD16()) { 3519 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3520 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3521 DataSize = (DataSize + 1) / 2; 3522 } 3523 3524 return (VDataSize / 4) == DataSize + TFESize; 3525 } 3526 3527 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3532 return true; 3533 3534 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3535 3536 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3537 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3538 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3539 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3540 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3541 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3542 3543 assert(VAddr0Idx != -1); 3544 assert(SrsrcIdx != -1); 3545 assert(SrsrcIdx > VAddr0Idx); 3546 3547 if (DimIdx == -1) 3548 return true; // intersect_ray 3549 3550 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3551 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3552 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3553 unsigned ActualAddrSize = 3554 IsNSA ? SrsrcIdx - VAddr0Idx 3555 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3556 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3557 3558 unsigned ExpectedAddrSize = 3559 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3560 3561 if (!IsNSA) { 3562 if (ExpectedAddrSize > 8) 3563 ExpectedAddrSize = 16; 3564 3565 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3566 // This provides backward compatibility for assembly created 3567 // before 160b/192b/224b types were directly supported. 3568 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3569 return true; 3570 } 3571 3572 return ActualAddrSize == ExpectedAddrSize; 3573 } 3574 3575 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3576 3577 const unsigned Opc = Inst.getOpcode(); 3578 const MCInstrDesc &Desc = MII.get(Opc); 3579 3580 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3581 return true; 3582 if (!Desc.mayLoad() || !Desc.mayStore()) 3583 return true; // Not atomic 3584 3585 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3586 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3587 3588 // This is an incomplete check because image_atomic_cmpswap 3589 // may only use 0x3 and 0xf while other atomic operations 3590 // may use 0x1 and 0x3. However these limitations are 3591 // verified when we check that dmask matches dst size. 3592 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3593 } 3594 3595 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3596 3597 const unsigned Opc = Inst.getOpcode(); 3598 const MCInstrDesc &Desc = MII.get(Opc); 3599 3600 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3601 return true; 3602 3603 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3604 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3605 3606 // GATHER4 instructions use dmask in a different fashion compared to 3607 // other MIMG instructions. The only useful DMASK values are 3608 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3609 // (red,red,red,red) etc.) The ISA document doesn't mention 3610 // this. 3611 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3612 } 3613 3614 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3615 const unsigned Opc = Inst.getOpcode(); 3616 const MCInstrDesc &Desc = MII.get(Opc); 3617 3618 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3619 return true; 3620 3621 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3622 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3623 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3624 3625 if (!BaseOpcode->MSAA) 3626 return true; 3627 3628 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3629 assert(DimIdx != -1); 3630 3631 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3632 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3633 3634 return DimInfo->MSAA; 3635 } 3636 3637 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3638 { 3639 switch (Opcode) { 3640 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3641 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3642 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3643 return true; 3644 default: 3645 return false; 3646 } 3647 } 3648 3649 // movrels* opcodes should only allow VGPRS as src0. 3650 // This is specified in .td description for vop1/vop3, 3651 // but sdwa is handled differently. See isSDWAOperand. 3652 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3653 const OperandVector &Operands) { 3654 3655 const unsigned Opc = Inst.getOpcode(); 3656 const MCInstrDesc &Desc = MII.get(Opc); 3657 3658 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3659 return true; 3660 3661 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3662 assert(Src0Idx != -1); 3663 3664 SMLoc ErrLoc; 3665 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3666 if (Src0.isReg()) { 3667 auto Reg = mc2PseudoReg(Src0.getReg()); 3668 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3669 if (!isSGPR(Reg, TRI)) 3670 return true; 3671 ErrLoc = getRegLoc(Reg, Operands); 3672 } else { 3673 ErrLoc = getConstLoc(Operands); 3674 } 3675 3676 Error(ErrLoc, "source operand must be a VGPR"); 3677 return false; 3678 } 3679 3680 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3681 const OperandVector &Operands) { 3682 3683 const unsigned Opc = Inst.getOpcode(); 3684 3685 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3686 return true; 3687 3688 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3689 assert(Src0Idx != -1); 3690 3691 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3692 if (!Src0.isReg()) 3693 return true; 3694 3695 auto Reg = mc2PseudoReg(Src0.getReg()); 3696 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3697 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3698 Error(getRegLoc(Reg, Operands), 3699 "source operand must be either a VGPR or an inline constant"); 3700 return false; 3701 } 3702 3703 return true; 3704 } 3705 3706 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3707 const OperandVector &Operands) { 3708 const unsigned Opc = Inst.getOpcode(); 3709 const MCInstrDesc &Desc = MII.get(Opc); 3710 3711 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3712 return true; 3713 3714 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3715 if (Src2Idx == -1) 3716 return true; 3717 3718 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3719 if (!Src2.isReg()) 3720 return true; 3721 3722 MCRegister Src2Reg = Src2.getReg(); 3723 MCRegister DstReg = Inst.getOperand(0).getReg(); 3724 if (Src2Reg == DstReg) 3725 return true; 3726 3727 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3728 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3729 return true; 3730 3731 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3732 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3733 "source 2 operand must not partially overlap with dst"); 3734 return false; 3735 } 3736 3737 return true; 3738 } 3739 3740 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3741 switch (Inst.getOpcode()) { 3742 default: 3743 return true; 3744 case V_DIV_SCALE_F32_gfx6_gfx7: 3745 case V_DIV_SCALE_F32_vi: 3746 case V_DIV_SCALE_F32_gfx10: 3747 case V_DIV_SCALE_F64_gfx6_gfx7: 3748 case V_DIV_SCALE_F64_vi: 3749 case V_DIV_SCALE_F64_gfx10: 3750 break; 3751 } 3752 3753 // TODO: Check that src0 = src1 or src2. 3754 3755 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3756 AMDGPU::OpName::src2_modifiers, 3757 AMDGPU::OpName::src2_modifiers}) { 3758 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3759 .getImm() & 3760 SISrcMods::ABS) { 3761 return false; 3762 } 3763 } 3764 3765 return true; 3766 } 3767 3768 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3769 3770 const unsigned Opc = Inst.getOpcode(); 3771 const MCInstrDesc &Desc = MII.get(Opc); 3772 3773 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3774 return true; 3775 3776 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3777 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3778 if (isCI() || isSI()) 3779 return false; 3780 } 3781 3782 return true; 3783 } 3784 3785 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3786 const unsigned Opc = Inst.getOpcode(); 3787 const MCInstrDesc &Desc = MII.get(Opc); 3788 3789 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3790 return true; 3791 3792 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3793 if (DimIdx < 0) 3794 return true; 3795 3796 long Imm = Inst.getOperand(DimIdx).getImm(); 3797 if (Imm < 0 || Imm >= 8) 3798 return false; 3799 3800 return true; 3801 } 3802 3803 static bool IsRevOpcode(const unsigned Opcode) 3804 { 3805 switch (Opcode) { 3806 case AMDGPU::V_SUBREV_F32_e32: 3807 case AMDGPU::V_SUBREV_F32_e64: 3808 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3809 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3810 case AMDGPU::V_SUBREV_F32_e32_vi: 3811 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3812 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3813 case AMDGPU::V_SUBREV_F32_e64_vi: 3814 3815 case AMDGPU::V_SUBREV_CO_U32_e32: 3816 case AMDGPU::V_SUBREV_CO_U32_e64: 3817 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3818 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3819 3820 case AMDGPU::V_SUBBREV_U32_e32: 3821 case AMDGPU::V_SUBBREV_U32_e64: 3822 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3823 case AMDGPU::V_SUBBREV_U32_e32_vi: 3824 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3825 case AMDGPU::V_SUBBREV_U32_e64_vi: 3826 3827 case AMDGPU::V_SUBREV_U32_e32: 3828 case AMDGPU::V_SUBREV_U32_e64: 3829 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3830 case AMDGPU::V_SUBREV_U32_e32_vi: 3831 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3832 case AMDGPU::V_SUBREV_U32_e64_vi: 3833 3834 case AMDGPU::V_SUBREV_F16_e32: 3835 case AMDGPU::V_SUBREV_F16_e64: 3836 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3837 case AMDGPU::V_SUBREV_F16_e32_vi: 3838 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3839 case AMDGPU::V_SUBREV_F16_e64_vi: 3840 3841 case AMDGPU::V_SUBREV_U16_e32: 3842 case AMDGPU::V_SUBREV_U16_e64: 3843 case AMDGPU::V_SUBREV_U16_e32_vi: 3844 case AMDGPU::V_SUBREV_U16_e64_vi: 3845 3846 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3847 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3848 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3849 3850 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3851 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3852 3853 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3854 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3855 3856 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3857 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3858 3859 case AMDGPU::V_LSHRREV_B32_e32: 3860 case AMDGPU::V_LSHRREV_B32_e64: 3861 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3862 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3863 case AMDGPU::V_LSHRREV_B32_e32_vi: 3864 case AMDGPU::V_LSHRREV_B32_e64_vi: 3865 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3866 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3867 3868 case AMDGPU::V_ASHRREV_I32_e32: 3869 case AMDGPU::V_ASHRREV_I32_e64: 3870 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3871 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3872 case AMDGPU::V_ASHRREV_I32_e32_vi: 3873 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3874 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3875 case AMDGPU::V_ASHRREV_I32_e64_vi: 3876 3877 case AMDGPU::V_LSHLREV_B32_e32: 3878 case AMDGPU::V_LSHLREV_B32_e64: 3879 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3880 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3881 case AMDGPU::V_LSHLREV_B32_e32_vi: 3882 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3883 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3884 case AMDGPU::V_LSHLREV_B32_e64_vi: 3885 3886 case AMDGPU::V_LSHLREV_B16_e32: 3887 case AMDGPU::V_LSHLREV_B16_e64: 3888 case AMDGPU::V_LSHLREV_B16_e32_vi: 3889 case AMDGPU::V_LSHLREV_B16_e64_vi: 3890 case AMDGPU::V_LSHLREV_B16_gfx10: 3891 3892 case AMDGPU::V_LSHRREV_B16_e32: 3893 case AMDGPU::V_LSHRREV_B16_e64: 3894 case AMDGPU::V_LSHRREV_B16_e32_vi: 3895 case AMDGPU::V_LSHRREV_B16_e64_vi: 3896 case AMDGPU::V_LSHRREV_B16_gfx10: 3897 3898 case AMDGPU::V_ASHRREV_I16_e32: 3899 case AMDGPU::V_ASHRREV_I16_e64: 3900 case AMDGPU::V_ASHRREV_I16_e32_vi: 3901 case AMDGPU::V_ASHRREV_I16_e64_vi: 3902 case AMDGPU::V_ASHRREV_I16_gfx10: 3903 3904 case AMDGPU::V_LSHLREV_B64_e64: 3905 case AMDGPU::V_LSHLREV_B64_gfx10: 3906 case AMDGPU::V_LSHLREV_B64_vi: 3907 3908 case AMDGPU::V_LSHRREV_B64_e64: 3909 case AMDGPU::V_LSHRREV_B64_gfx10: 3910 case AMDGPU::V_LSHRREV_B64_vi: 3911 3912 case AMDGPU::V_ASHRREV_I64_e64: 3913 case AMDGPU::V_ASHRREV_I64_gfx10: 3914 case AMDGPU::V_ASHRREV_I64_vi: 3915 3916 case AMDGPU::V_PK_LSHLREV_B16: 3917 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3918 case AMDGPU::V_PK_LSHLREV_B16_vi: 3919 3920 case AMDGPU::V_PK_LSHRREV_B16: 3921 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3922 case AMDGPU::V_PK_LSHRREV_B16_vi: 3923 case AMDGPU::V_PK_ASHRREV_I16: 3924 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3925 case AMDGPU::V_PK_ASHRREV_I16_vi: 3926 return true; 3927 default: 3928 return false; 3929 } 3930 } 3931 3932 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3933 3934 using namespace SIInstrFlags; 3935 const unsigned Opcode = Inst.getOpcode(); 3936 const MCInstrDesc &Desc = MII.get(Opcode); 3937 3938 // lds_direct register is defined so that it can be used 3939 // with 9-bit operands only. Ignore encodings which do not accept these. 3940 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3941 if ((Desc.TSFlags & Enc) == 0) 3942 return None; 3943 3944 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3945 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3946 if (SrcIdx == -1) 3947 break; 3948 const auto &Src = Inst.getOperand(SrcIdx); 3949 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3950 3951 if (isGFX90A()) 3952 return StringRef("lds_direct is not supported on this GPU"); 3953 3954 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3955 return StringRef("lds_direct cannot be used with this instruction"); 3956 3957 if (SrcName != OpName::src0) 3958 return StringRef("lds_direct may be used as src0 only"); 3959 } 3960 } 3961 3962 return None; 3963 } 3964 3965 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3966 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3967 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3968 if (Op.isFlatOffset()) 3969 return Op.getStartLoc(); 3970 } 3971 return getLoc(); 3972 } 3973 3974 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3975 const OperandVector &Operands) { 3976 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3977 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3978 return true; 3979 3980 auto Opcode = Inst.getOpcode(); 3981 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3982 assert(OpNum != -1); 3983 3984 const auto &Op = Inst.getOperand(OpNum); 3985 if (!hasFlatOffsets() && Op.getImm() != 0) { 3986 Error(getFlatOffsetLoc(Operands), 3987 "flat offset modifier is not supported on this GPU"); 3988 return false; 3989 } 3990 3991 // For FLAT segment the offset must be positive; 3992 // MSB is ignored and forced to zero. 3993 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3994 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3995 if (!isIntN(OffsetSize, Op.getImm())) { 3996 Error(getFlatOffsetLoc(Operands), 3997 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3998 return false; 3999 } 4000 } else { 4001 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4002 if (!isUIntN(OffsetSize, Op.getImm())) { 4003 Error(getFlatOffsetLoc(Operands), 4004 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4005 return false; 4006 } 4007 } 4008 4009 return true; 4010 } 4011 4012 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4013 // Start with second operand because SMEM Offset cannot be dst or src0. 4014 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4015 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4016 if (Op.isSMEMOffset()) 4017 return Op.getStartLoc(); 4018 } 4019 return getLoc(); 4020 } 4021 4022 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4023 const OperandVector &Operands) { 4024 if (isCI() || isSI()) 4025 return true; 4026 4027 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4028 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4029 return true; 4030 4031 auto Opcode = Inst.getOpcode(); 4032 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4033 if (OpNum == -1) 4034 return true; 4035 4036 const auto &Op = Inst.getOperand(OpNum); 4037 if (!Op.isImm()) 4038 return true; 4039 4040 uint64_t Offset = Op.getImm(); 4041 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4042 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4043 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4044 return true; 4045 4046 Error(getSMEMOffsetLoc(Operands), 4047 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4048 "expected a 21-bit signed offset"); 4049 4050 return false; 4051 } 4052 4053 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4054 unsigned Opcode = Inst.getOpcode(); 4055 const MCInstrDesc &Desc = MII.get(Opcode); 4056 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4057 return true; 4058 4059 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4060 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4061 4062 const int OpIndices[] = { Src0Idx, Src1Idx }; 4063 4064 unsigned NumExprs = 0; 4065 unsigned NumLiterals = 0; 4066 uint32_t LiteralValue; 4067 4068 for (int OpIdx : OpIndices) { 4069 if (OpIdx == -1) break; 4070 4071 const MCOperand &MO = Inst.getOperand(OpIdx); 4072 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4073 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4074 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4075 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4076 if (NumLiterals == 0 || LiteralValue != Value) { 4077 LiteralValue = Value; 4078 ++NumLiterals; 4079 } 4080 } else if (MO.isExpr()) { 4081 ++NumExprs; 4082 } 4083 } 4084 } 4085 4086 return NumLiterals + NumExprs <= 1; 4087 } 4088 4089 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4090 const unsigned Opc = Inst.getOpcode(); 4091 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4092 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4093 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4094 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4095 4096 if (OpSel & ~3) 4097 return false; 4098 } 4099 4100 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4101 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4102 if (OpSelIdx != -1) { 4103 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4104 return false; 4105 } 4106 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4107 if (OpSelHiIdx != -1) { 4108 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4109 return false; 4110 } 4111 } 4112 4113 return true; 4114 } 4115 4116 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4117 const OperandVector &Operands) { 4118 const unsigned Opc = Inst.getOpcode(); 4119 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4120 if (DppCtrlIdx < 0) 4121 return true; 4122 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4123 4124 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4125 // DPP64 is supported for row_newbcast only. 4126 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4127 if (Src0Idx >= 0 && 4128 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4129 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4130 Error(S, "64 bit dpp only supports row_newbcast"); 4131 return false; 4132 } 4133 } 4134 4135 return true; 4136 } 4137 4138 // Check if VCC register matches wavefront size 4139 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4140 auto FB = getFeatureBits(); 4141 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4142 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4143 } 4144 4145 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4146 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4147 const OperandVector &Operands) { 4148 unsigned Opcode = Inst.getOpcode(); 4149 const MCInstrDesc &Desc = MII.get(Opcode); 4150 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4151 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4152 ImmIdx == -1) 4153 return true; 4154 4155 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4156 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4157 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4158 4159 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4160 4161 unsigned NumExprs = 0; 4162 unsigned NumLiterals = 0; 4163 uint32_t LiteralValue; 4164 4165 for (int OpIdx : OpIndices) { 4166 if (OpIdx == -1) 4167 continue; 4168 4169 const MCOperand &MO = Inst.getOperand(OpIdx); 4170 if (!MO.isImm() && !MO.isExpr()) 4171 continue; 4172 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4173 continue; 4174 4175 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4176 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4177 Error(getConstLoc(Operands), 4178 "inline constants are not allowed for this operand"); 4179 return false; 4180 } 4181 4182 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4183 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4184 if (NumLiterals == 0 || LiteralValue != Value) { 4185 LiteralValue = Value; 4186 ++NumLiterals; 4187 } 4188 } else if (MO.isExpr()) { 4189 ++NumExprs; 4190 } 4191 } 4192 NumLiterals += NumExprs; 4193 4194 if (!NumLiterals) 4195 return true; 4196 4197 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4198 Error(getLitLoc(Operands), "literal operands are not supported"); 4199 return false; 4200 } 4201 4202 if (NumLiterals > 1) { 4203 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4204 return false; 4205 } 4206 4207 return true; 4208 } 4209 4210 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4211 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4212 const MCRegisterInfo *MRI) { 4213 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4214 if (OpIdx < 0) 4215 return -1; 4216 4217 const MCOperand &Op = Inst.getOperand(OpIdx); 4218 if (!Op.isReg()) 4219 return -1; 4220 4221 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4222 auto Reg = Sub ? Sub : Op.getReg(); 4223 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4224 return AGPR32.contains(Reg) ? 1 : 0; 4225 } 4226 4227 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4228 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4229 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4230 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4231 SIInstrFlags::DS)) == 0) 4232 return true; 4233 4234 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4235 : AMDGPU::OpName::vdata; 4236 4237 const MCRegisterInfo *MRI = getMRI(); 4238 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4239 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4240 4241 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4242 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4243 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4244 return false; 4245 } 4246 4247 auto FB = getFeatureBits(); 4248 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4249 if (DataAreg < 0 || DstAreg < 0) 4250 return true; 4251 return DstAreg == DataAreg; 4252 } 4253 4254 return DstAreg < 1 && DataAreg < 1; 4255 } 4256 4257 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4258 auto FB = getFeatureBits(); 4259 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4260 return true; 4261 4262 const MCRegisterInfo *MRI = getMRI(); 4263 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4264 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4265 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4266 const MCOperand &Op = Inst.getOperand(I); 4267 if (!Op.isReg()) 4268 continue; 4269 4270 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4271 if (!Sub) 4272 continue; 4273 4274 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4275 return false; 4276 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4277 return false; 4278 } 4279 4280 return true; 4281 } 4282 4283 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4284 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4285 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4286 if (Op.isBLGP()) 4287 return Op.getStartLoc(); 4288 } 4289 return SMLoc(); 4290 } 4291 4292 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4293 const OperandVector &Operands) { 4294 unsigned Opc = Inst.getOpcode(); 4295 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4296 if (BlgpIdx == -1) 4297 return true; 4298 SMLoc BLGPLoc = getBLGPLoc(Operands); 4299 if (!BLGPLoc.isValid()) 4300 return true; 4301 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4302 auto FB = getFeatureBits(); 4303 bool UsesNeg = false; 4304 if (FB[AMDGPU::FeatureGFX940Insts]) { 4305 switch (Opc) { 4306 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4307 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4308 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4309 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4310 UsesNeg = true; 4311 } 4312 } 4313 4314 if (IsNeg == UsesNeg) 4315 return true; 4316 4317 Error(BLGPLoc, 4318 UsesNeg ? "invalid modifier: blgp is not supported" 4319 : "invalid modifier: neg is not supported"); 4320 4321 return false; 4322 } 4323 4324 // gfx90a has an undocumented limitation: 4325 // DS_GWS opcodes must use even aligned registers. 4326 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4327 const OperandVector &Operands) { 4328 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4329 return true; 4330 4331 int Opc = Inst.getOpcode(); 4332 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4333 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4334 return true; 4335 4336 const MCRegisterInfo *MRI = getMRI(); 4337 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4338 int Data0Pos = 4339 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4340 assert(Data0Pos != -1); 4341 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4342 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4343 if (RegIdx & 1) { 4344 SMLoc RegLoc = getRegLoc(Reg, Operands); 4345 Error(RegLoc, "vgpr must be even aligned"); 4346 return false; 4347 } 4348 4349 return true; 4350 } 4351 4352 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4353 const OperandVector &Operands, 4354 const SMLoc &IDLoc) { 4355 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4356 AMDGPU::OpName::cpol); 4357 if (CPolPos == -1) 4358 return true; 4359 4360 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4361 4362 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4363 if ((TSFlags & (SIInstrFlags::SMRD)) && 4364 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4365 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4366 return false; 4367 } 4368 4369 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4371 StringRef CStr(S.getPointer()); 4372 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4373 Error(S, "scc is not supported on this GPU"); 4374 return false; 4375 } 4376 4377 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4378 return true; 4379 4380 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4381 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4382 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4383 : "instruction must use glc"); 4384 return false; 4385 } 4386 } else { 4387 if (CPol & CPol::GLC) { 4388 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4389 StringRef CStr(S.getPointer()); 4390 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4391 Error(S, isGFX940() ? "instruction must not use sc0" 4392 : "instruction must not use glc"); 4393 return false; 4394 } 4395 } 4396 4397 return true; 4398 } 4399 4400 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4401 const SMLoc &IDLoc, 4402 const OperandVector &Operands) { 4403 if (auto ErrMsg = validateLdsDirect(Inst)) { 4404 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4405 return false; 4406 } 4407 if (!validateSOPLiteral(Inst)) { 4408 Error(getLitLoc(Operands), 4409 "only one literal operand is allowed"); 4410 return false; 4411 } 4412 if (!validateVOPLiteral(Inst, Operands)) { 4413 return false; 4414 } 4415 if (!validateConstantBusLimitations(Inst, Operands)) { 4416 return false; 4417 } 4418 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4419 return false; 4420 } 4421 if (!validateIntClampSupported(Inst)) { 4422 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4423 "integer clamping is not supported on this GPU"); 4424 return false; 4425 } 4426 if (!validateOpSel(Inst)) { 4427 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4428 "invalid op_sel operand"); 4429 return false; 4430 } 4431 if (!validateDPP(Inst, Operands)) { 4432 return false; 4433 } 4434 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4435 if (!validateMIMGD16(Inst)) { 4436 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4437 "d16 modifier is not supported on this GPU"); 4438 return false; 4439 } 4440 if (!validateMIMGDim(Inst)) { 4441 Error(IDLoc, "dim modifier is required on this GPU"); 4442 return false; 4443 } 4444 if (!validateMIMGMSAA(Inst)) { 4445 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4446 "invalid dim; must be MSAA type"); 4447 return false; 4448 } 4449 if (!validateMIMGDataSize(Inst)) { 4450 Error(IDLoc, 4451 "image data size does not match dmask and tfe"); 4452 return false; 4453 } 4454 if (!validateMIMGAddrSize(Inst)) { 4455 Error(IDLoc, 4456 "image address size does not match dim and a16"); 4457 return false; 4458 } 4459 if (!validateMIMGAtomicDMask(Inst)) { 4460 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4461 "invalid atomic image dmask"); 4462 return false; 4463 } 4464 if (!validateMIMGGatherDMask(Inst)) { 4465 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4466 "invalid image_gather dmask: only one bit must be set"); 4467 return false; 4468 } 4469 if (!validateMovrels(Inst, Operands)) { 4470 return false; 4471 } 4472 if (!validateFlatOffset(Inst, Operands)) { 4473 return false; 4474 } 4475 if (!validateSMEMOffset(Inst, Operands)) { 4476 return false; 4477 } 4478 if (!validateMAIAccWrite(Inst, Operands)) { 4479 return false; 4480 } 4481 if (!validateMFMA(Inst, Operands)) { 4482 return false; 4483 } 4484 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4485 return false; 4486 } 4487 4488 if (!validateAGPRLdSt(Inst)) { 4489 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4490 ? "invalid register class: data and dst should be all VGPR or AGPR" 4491 : "invalid register class: agpr loads and stores not supported on this GPU" 4492 ); 4493 return false; 4494 } 4495 if (!validateVGPRAlign(Inst)) { 4496 Error(IDLoc, 4497 "invalid register class: vgpr tuples must be 64 bit aligned"); 4498 return false; 4499 } 4500 if (!validateGWS(Inst, Operands)) { 4501 return false; 4502 } 4503 4504 if (!validateBLGP(Inst, Operands)) { 4505 return false; 4506 } 4507 4508 if (!validateDivScale(Inst)) { 4509 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4510 return false; 4511 } 4512 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4513 return false; 4514 } 4515 4516 return true; 4517 } 4518 4519 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4520 const FeatureBitset &FBS, 4521 unsigned VariantID = 0); 4522 4523 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4524 const FeatureBitset &AvailableFeatures, 4525 unsigned VariantID); 4526 4527 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4528 const FeatureBitset &FBS) { 4529 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4530 } 4531 4532 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4533 const FeatureBitset &FBS, 4534 ArrayRef<unsigned> Variants) { 4535 for (auto Variant : Variants) { 4536 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4537 return true; 4538 } 4539 4540 return false; 4541 } 4542 4543 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4544 const SMLoc &IDLoc) { 4545 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4546 4547 // Check if requested instruction variant is supported. 4548 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4549 return false; 4550 4551 // This instruction is not supported. 4552 // Clear any other pending errors because they are no longer relevant. 4553 getParser().clearPendingErrors(); 4554 4555 // Requested instruction variant is not supported. 4556 // Check if any other variants are supported. 4557 StringRef VariantName = getMatchedVariantName(); 4558 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4559 return Error(IDLoc, 4560 Twine(VariantName, 4561 " variant of this instruction is not supported")); 4562 } 4563 4564 // Finally check if this instruction is supported on any other GPU. 4565 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4566 return Error(IDLoc, "instruction not supported on this GPU"); 4567 } 4568 4569 // Instruction not supported on any GPU. Probably a typo. 4570 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4571 return Error(IDLoc, "invalid instruction" + Suggestion); 4572 } 4573 4574 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4575 OperandVector &Operands, 4576 MCStreamer &Out, 4577 uint64_t &ErrorInfo, 4578 bool MatchingInlineAsm) { 4579 MCInst Inst; 4580 unsigned Result = Match_Success; 4581 for (auto Variant : getMatchedVariants()) { 4582 uint64_t EI; 4583 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4584 Variant); 4585 // We order match statuses from least to most specific. We use most specific 4586 // status as resulting 4587 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4588 if ((R == Match_Success) || 4589 (R == Match_PreferE32) || 4590 (R == Match_MissingFeature && Result != Match_PreferE32) || 4591 (R == Match_InvalidOperand && Result != Match_MissingFeature 4592 && Result != Match_PreferE32) || 4593 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4594 && Result != Match_MissingFeature 4595 && Result != Match_PreferE32)) { 4596 Result = R; 4597 ErrorInfo = EI; 4598 } 4599 if (R == Match_Success) 4600 break; 4601 } 4602 4603 if (Result == Match_Success) { 4604 if (!validateInstruction(Inst, IDLoc, Operands)) { 4605 return true; 4606 } 4607 Inst.setLoc(IDLoc); 4608 Out.emitInstruction(Inst, getSTI()); 4609 return false; 4610 } 4611 4612 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4613 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4614 return true; 4615 } 4616 4617 switch (Result) { 4618 default: break; 4619 case Match_MissingFeature: 4620 // It has been verified that the specified instruction 4621 // mnemonic is valid. A match was found but it requires 4622 // features which are not supported on this GPU. 4623 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4624 4625 case Match_InvalidOperand: { 4626 SMLoc ErrorLoc = IDLoc; 4627 if (ErrorInfo != ~0ULL) { 4628 if (ErrorInfo >= Operands.size()) { 4629 return Error(IDLoc, "too few operands for instruction"); 4630 } 4631 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4632 if (ErrorLoc == SMLoc()) 4633 ErrorLoc = IDLoc; 4634 } 4635 return Error(ErrorLoc, "invalid operand for instruction"); 4636 } 4637 4638 case Match_PreferE32: 4639 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4640 "should be encoded as e32"); 4641 case Match_MnemonicFail: 4642 llvm_unreachable("Invalid instructions should have been handled already"); 4643 } 4644 llvm_unreachable("Implement any new match types added!"); 4645 } 4646 4647 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4648 int64_t Tmp = -1; 4649 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4650 return true; 4651 } 4652 if (getParser().parseAbsoluteExpression(Tmp)) { 4653 return true; 4654 } 4655 Ret = static_cast<uint32_t>(Tmp); 4656 return false; 4657 } 4658 4659 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4660 uint32_t &Minor) { 4661 if (ParseAsAbsoluteExpression(Major)) 4662 return TokError("invalid major version"); 4663 4664 if (!trySkipToken(AsmToken::Comma)) 4665 return TokError("minor version number required, comma expected"); 4666 4667 if (ParseAsAbsoluteExpression(Minor)) 4668 return TokError("invalid minor version"); 4669 4670 return false; 4671 } 4672 4673 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4674 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4675 return TokError("directive only supported for amdgcn architecture"); 4676 4677 std::string TargetIDDirective; 4678 SMLoc TargetStart = getTok().getLoc(); 4679 if (getParser().parseEscapedString(TargetIDDirective)) 4680 return true; 4681 4682 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4683 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4684 return getParser().Error(TargetRange.Start, 4685 (Twine(".amdgcn_target directive's target id ") + 4686 Twine(TargetIDDirective) + 4687 Twine(" does not match the specified target id ") + 4688 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4689 4690 return false; 4691 } 4692 4693 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4694 return Error(Range.Start, "value out of range", Range); 4695 } 4696 4697 bool AMDGPUAsmParser::calculateGPRBlocks( 4698 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4699 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4700 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4701 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4702 // TODO(scott.linder): These calculations are duplicated from 4703 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4704 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4705 4706 unsigned NumVGPRs = NextFreeVGPR; 4707 unsigned NumSGPRs = NextFreeSGPR; 4708 4709 if (Version.Major >= 10) 4710 NumSGPRs = 0; 4711 else { 4712 unsigned MaxAddressableNumSGPRs = 4713 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4714 4715 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4716 NumSGPRs > MaxAddressableNumSGPRs) 4717 return OutOfRangeError(SGPRRange); 4718 4719 NumSGPRs += 4720 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4721 4722 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4723 NumSGPRs > MaxAddressableNumSGPRs) 4724 return OutOfRangeError(SGPRRange); 4725 4726 if (Features.test(FeatureSGPRInitBug)) 4727 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4728 } 4729 4730 VGPRBlocks = 4731 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4732 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4733 4734 return false; 4735 } 4736 4737 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4738 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4739 return TokError("directive only supported for amdgcn architecture"); 4740 4741 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4742 return TokError("directive only supported for amdhsa OS"); 4743 4744 StringRef KernelName; 4745 if (getParser().parseIdentifier(KernelName)) 4746 return true; 4747 4748 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4749 4750 StringSet<> Seen; 4751 4752 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4753 4754 SMRange VGPRRange; 4755 uint64_t NextFreeVGPR = 0; 4756 uint64_t AccumOffset = 0; 4757 uint64_t SharedVGPRCount = 0; 4758 SMRange SGPRRange; 4759 uint64_t NextFreeSGPR = 0; 4760 4761 // Count the number of user SGPRs implied from the enabled feature bits. 4762 unsigned ImpliedUserSGPRCount = 0; 4763 4764 // Track if the asm explicitly contains the directive for the user SGPR 4765 // count. 4766 Optional<unsigned> ExplicitUserSGPRCount; 4767 bool ReserveVCC = true; 4768 bool ReserveFlatScr = true; 4769 Optional<bool> EnableWavefrontSize32; 4770 4771 while (true) { 4772 while (trySkipToken(AsmToken::EndOfStatement)); 4773 4774 StringRef ID; 4775 SMRange IDRange = getTok().getLocRange(); 4776 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4777 return true; 4778 4779 if (ID == ".end_amdhsa_kernel") 4780 break; 4781 4782 if (Seen.find(ID) != Seen.end()) 4783 return TokError(".amdhsa_ directives cannot be repeated"); 4784 Seen.insert(ID); 4785 4786 SMLoc ValStart = getLoc(); 4787 int64_t IVal; 4788 if (getParser().parseAbsoluteExpression(IVal)) 4789 return true; 4790 SMLoc ValEnd = getLoc(); 4791 SMRange ValRange = SMRange(ValStart, ValEnd); 4792 4793 if (IVal < 0) 4794 return OutOfRangeError(ValRange); 4795 4796 uint64_t Val = IVal; 4797 4798 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4799 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4800 return OutOfRangeError(RANGE); \ 4801 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4802 4803 if (ID == ".amdhsa_group_segment_fixed_size") { 4804 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4805 return OutOfRangeError(ValRange); 4806 KD.group_segment_fixed_size = Val; 4807 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4808 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4809 return OutOfRangeError(ValRange); 4810 KD.private_segment_fixed_size = Val; 4811 } else if (ID == ".amdhsa_kernarg_size") { 4812 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4813 return OutOfRangeError(ValRange); 4814 KD.kernarg_size = Val; 4815 } else if (ID == ".amdhsa_user_sgpr_count") { 4816 ExplicitUserSGPRCount = Val; 4817 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4818 if (hasArchitectedFlatScratch()) 4819 return Error(IDRange.Start, 4820 "directive is not supported with architected flat scratch", 4821 IDRange); 4822 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4823 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4824 Val, ValRange); 4825 if (Val) 4826 ImpliedUserSGPRCount += 4; 4827 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4828 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4829 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4830 ValRange); 4831 if (Val) 4832 ImpliedUserSGPRCount += 2; 4833 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4834 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4835 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4836 ValRange); 4837 if (Val) 4838 ImpliedUserSGPRCount += 2; 4839 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4840 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4841 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4842 Val, ValRange); 4843 if (Val) 4844 ImpliedUserSGPRCount += 2; 4845 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4846 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4847 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4848 ValRange); 4849 if (Val) 4850 ImpliedUserSGPRCount += 2; 4851 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4852 if (hasArchitectedFlatScratch()) 4853 return Error(IDRange.Start, 4854 "directive is not supported with architected flat scratch", 4855 IDRange); 4856 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4857 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4858 ValRange); 4859 if (Val) 4860 ImpliedUserSGPRCount += 2; 4861 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4862 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4863 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4864 Val, ValRange); 4865 if (Val) 4866 ImpliedUserSGPRCount += 1; 4867 } else if (ID == ".amdhsa_wavefront_size32") { 4868 if (IVersion.Major < 10) 4869 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4870 EnableWavefrontSize32 = Val; 4871 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4872 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4873 Val, ValRange); 4874 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4875 if (hasArchitectedFlatScratch()) 4876 return Error(IDRange.Start, 4877 "directive is not supported with architected flat scratch", 4878 IDRange); 4879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4880 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4881 } else if (ID == ".amdhsa_enable_private_segment") { 4882 if (!hasArchitectedFlatScratch()) 4883 return Error( 4884 IDRange.Start, 4885 "directive is not supported without architected flat scratch", 4886 IDRange); 4887 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4888 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4889 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4890 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4891 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4892 ValRange); 4893 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4895 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4896 ValRange); 4897 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4899 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4900 ValRange); 4901 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4902 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4903 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4904 ValRange); 4905 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4906 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4907 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4908 ValRange); 4909 } else if (ID == ".amdhsa_next_free_vgpr") { 4910 VGPRRange = ValRange; 4911 NextFreeVGPR = Val; 4912 } else if (ID == ".amdhsa_next_free_sgpr") { 4913 SGPRRange = ValRange; 4914 NextFreeSGPR = Val; 4915 } else if (ID == ".amdhsa_accum_offset") { 4916 if (!isGFX90A()) 4917 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4918 AccumOffset = Val; 4919 } else if (ID == ".amdhsa_reserve_vcc") { 4920 if (!isUInt<1>(Val)) 4921 return OutOfRangeError(ValRange); 4922 ReserveVCC = Val; 4923 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4924 if (IVersion.Major < 7) 4925 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4926 if (hasArchitectedFlatScratch()) 4927 return Error(IDRange.Start, 4928 "directive is not supported with architected flat scratch", 4929 IDRange); 4930 if (!isUInt<1>(Val)) 4931 return OutOfRangeError(ValRange); 4932 ReserveFlatScr = Val; 4933 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4934 if (IVersion.Major < 8) 4935 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4936 if (!isUInt<1>(Val)) 4937 return OutOfRangeError(ValRange); 4938 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4939 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4940 IDRange); 4941 } else if (ID == ".amdhsa_float_round_mode_32") { 4942 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4943 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4944 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4946 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4947 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4949 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4950 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4951 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4952 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4953 ValRange); 4954 } else if (ID == ".amdhsa_dx10_clamp") { 4955 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4956 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4957 } else if (ID == ".amdhsa_ieee_mode") { 4958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4959 Val, ValRange); 4960 } else if (ID == ".amdhsa_fp16_overflow") { 4961 if (IVersion.Major < 9) 4962 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4963 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4964 ValRange); 4965 } else if (ID == ".amdhsa_tg_split") { 4966 if (!isGFX90A()) 4967 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4968 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4969 ValRange); 4970 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4971 if (IVersion.Major < 10) 4972 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4973 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4974 ValRange); 4975 } else if (ID == ".amdhsa_memory_ordered") { 4976 if (IVersion.Major < 10) 4977 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4978 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4979 ValRange); 4980 } else if (ID == ".amdhsa_forward_progress") { 4981 if (IVersion.Major < 10) 4982 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4983 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4984 ValRange); 4985 } else if (ID == ".amdhsa_shared_vgpr_count") { 4986 if (IVersion.Major < 10) 4987 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4988 SharedVGPRCount = Val; 4989 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4990 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4991 ValRange); 4992 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4993 PARSE_BITS_ENTRY( 4994 KD.compute_pgm_rsrc2, 4995 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4996 ValRange); 4997 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4998 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4999 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5000 Val, ValRange); 5001 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5002 PARSE_BITS_ENTRY( 5003 KD.compute_pgm_rsrc2, 5004 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5005 ValRange); 5006 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5007 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5008 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5009 Val, ValRange); 5010 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5012 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5013 Val, ValRange); 5014 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5015 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5016 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5017 Val, ValRange); 5018 } else if (ID == ".amdhsa_exception_int_div_zero") { 5019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5020 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5021 Val, ValRange); 5022 } else { 5023 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5024 } 5025 5026 #undef PARSE_BITS_ENTRY 5027 } 5028 5029 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5030 return TokError(".amdhsa_next_free_vgpr directive is required"); 5031 5032 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5033 return TokError(".amdhsa_next_free_sgpr directive is required"); 5034 5035 unsigned VGPRBlocks; 5036 unsigned SGPRBlocks; 5037 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5038 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5039 EnableWavefrontSize32, NextFreeVGPR, 5040 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5041 SGPRBlocks)) 5042 return true; 5043 5044 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5045 VGPRBlocks)) 5046 return OutOfRangeError(VGPRRange); 5047 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5048 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5049 5050 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5051 SGPRBlocks)) 5052 return OutOfRangeError(SGPRRange); 5053 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5054 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5055 SGPRBlocks); 5056 5057 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5058 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5059 "enabled user SGPRs"); 5060 5061 unsigned UserSGPRCount = 5062 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5063 5064 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5065 return TokError("too many user SGPRs enabled"); 5066 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5067 UserSGPRCount); 5068 5069 if (isGFX90A()) { 5070 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5071 return TokError(".amdhsa_accum_offset directive is required"); 5072 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5073 return TokError("accum_offset should be in range [4..256] in " 5074 "increments of 4"); 5075 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5076 return TokError("accum_offset exceeds total VGPR allocation"); 5077 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5078 (AccumOffset / 4 - 1)); 5079 } 5080 5081 if (IVersion.Major == 10) { 5082 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5083 if (SharedVGPRCount && EnableWavefrontSize32) { 5084 return TokError("shared_vgpr_count directive not valid on " 5085 "wavefront size 32"); 5086 } 5087 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5088 return TokError("shared_vgpr_count*2 + " 5089 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5090 "exceed 63\n"); 5091 } 5092 } 5093 5094 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5095 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5096 ReserveFlatScr); 5097 return false; 5098 } 5099 5100 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5101 uint32_t Major; 5102 uint32_t Minor; 5103 5104 if (ParseDirectiveMajorMinor(Major, Minor)) 5105 return true; 5106 5107 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5108 return false; 5109 } 5110 5111 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5112 uint32_t Major; 5113 uint32_t Minor; 5114 uint32_t Stepping; 5115 StringRef VendorName; 5116 StringRef ArchName; 5117 5118 // If this directive has no arguments, then use the ISA version for the 5119 // targeted GPU. 5120 if (isToken(AsmToken::EndOfStatement)) { 5121 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5122 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5123 ISA.Stepping, 5124 "AMD", "AMDGPU"); 5125 return false; 5126 } 5127 5128 if (ParseDirectiveMajorMinor(Major, Minor)) 5129 return true; 5130 5131 if (!trySkipToken(AsmToken::Comma)) 5132 return TokError("stepping version number required, comma expected"); 5133 5134 if (ParseAsAbsoluteExpression(Stepping)) 5135 return TokError("invalid stepping version"); 5136 5137 if (!trySkipToken(AsmToken::Comma)) 5138 return TokError("vendor name required, comma expected"); 5139 5140 if (!parseString(VendorName, "invalid vendor name")) 5141 return true; 5142 5143 if (!trySkipToken(AsmToken::Comma)) 5144 return TokError("arch name required, comma expected"); 5145 5146 if (!parseString(ArchName, "invalid arch name")) 5147 return true; 5148 5149 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5150 VendorName, ArchName); 5151 return false; 5152 } 5153 5154 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5155 amd_kernel_code_t &Header) { 5156 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5157 // assembly for backwards compatibility. 5158 if (ID == "max_scratch_backing_memory_byte_size") { 5159 Parser.eatToEndOfStatement(); 5160 return false; 5161 } 5162 5163 SmallString<40> ErrStr; 5164 raw_svector_ostream Err(ErrStr); 5165 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5166 return TokError(Err.str()); 5167 } 5168 Lex(); 5169 5170 if (ID == "enable_wavefront_size32") { 5171 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5172 if (!isGFX10Plus()) 5173 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5174 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5175 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5176 } else { 5177 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5178 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5179 } 5180 } 5181 5182 if (ID == "wavefront_size") { 5183 if (Header.wavefront_size == 5) { 5184 if (!isGFX10Plus()) 5185 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5186 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5187 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5188 } else if (Header.wavefront_size == 6) { 5189 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5190 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5191 } 5192 } 5193 5194 if (ID == "enable_wgp_mode") { 5195 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5196 !isGFX10Plus()) 5197 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5198 } 5199 5200 if (ID == "enable_mem_ordered") { 5201 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5202 !isGFX10Plus()) 5203 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5204 } 5205 5206 if (ID == "enable_fwd_progress") { 5207 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5208 !isGFX10Plus()) 5209 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5210 } 5211 5212 return false; 5213 } 5214 5215 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5216 amd_kernel_code_t Header; 5217 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5218 5219 while (true) { 5220 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5221 // will set the current token to EndOfStatement. 5222 while(trySkipToken(AsmToken::EndOfStatement)); 5223 5224 StringRef ID; 5225 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5226 return true; 5227 5228 if (ID == ".end_amd_kernel_code_t") 5229 break; 5230 5231 if (ParseAMDKernelCodeTValue(ID, Header)) 5232 return true; 5233 } 5234 5235 getTargetStreamer().EmitAMDKernelCodeT(Header); 5236 5237 return false; 5238 } 5239 5240 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5241 StringRef KernelName; 5242 if (!parseId(KernelName, "expected symbol name")) 5243 return true; 5244 5245 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5246 ELF::STT_AMDGPU_HSA_KERNEL); 5247 5248 KernelScope.initialize(getContext()); 5249 return false; 5250 } 5251 5252 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5253 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5254 return Error(getLoc(), 5255 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5256 "architectures"); 5257 } 5258 5259 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5260 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5261 return Error(getParser().getTok().getLoc(), "target id must match options"); 5262 5263 getTargetStreamer().EmitISAVersion(); 5264 Lex(); 5265 5266 return false; 5267 } 5268 5269 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5270 const char *AssemblerDirectiveBegin; 5271 const char *AssemblerDirectiveEnd; 5272 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5273 isHsaAbiVersion3AndAbove(&getSTI()) 5274 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5275 HSAMD::V3::AssemblerDirectiveEnd) 5276 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5277 HSAMD::AssemblerDirectiveEnd); 5278 5279 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5280 return Error(getLoc(), 5281 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5282 "not available on non-amdhsa OSes")).str()); 5283 } 5284 5285 std::string HSAMetadataString; 5286 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5287 HSAMetadataString)) 5288 return true; 5289 5290 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5291 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5292 return Error(getLoc(), "invalid HSA metadata"); 5293 } else { 5294 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5295 return Error(getLoc(), "invalid HSA metadata"); 5296 } 5297 5298 return false; 5299 } 5300 5301 /// Common code to parse out a block of text (typically YAML) between start and 5302 /// end directives. 5303 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5304 const char *AssemblerDirectiveEnd, 5305 std::string &CollectString) { 5306 5307 raw_string_ostream CollectStream(CollectString); 5308 5309 getLexer().setSkipSpace(false); 5310 5311 bool FoundEnd = false; 5312 while (!isToken(AsmToken::Eof)) { 5313 while (isToken(AsmToken::Space)) { 5314 CollectStream << getTokenStr(); 5315 Lex(); 5316 } 5317 5318 if (trySkipId(AssemblerDirectiveEnd)) { 5319 FoundEnd = true; 5320 break; 5321 } 5322 5323 CollectStream << Parser.parseStringToEndOfStatement() 5324 << getContext().getAsmInfo()->getSeparatorString(); 5325 5326 Parser.eatToEndOfStatement(); 5327 } 5328 5329 getLexer().setSkipSpace(true); 5330 5331 if (isToken(AsmToken::Eof) && !FoundEnd) { 5332 return TokError(Twine("expected directive ") + 5333 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5334 } 5335 5336 CollectStream.flush(); 5337 return false; 5338 } 5339 5340 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5341 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5342 std::string String; 5343 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5344 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5345 return true; 5346 5347 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5348 if (!PALMetadata->setFromString(String)) 5349 return Error(getLoc(), "invalid PAL metadata"); 5350 return false; 5351 } 5352 5353 /// Parse the assembler directive for old linear-format PAL metadata. 5354 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5355 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5356 return Error(getLoc(), 5357 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5358 "not available on non-amdpal OSes")).str()); 5359 } 5360 5361 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5362 PALMetadata->setLegacy(); 5363 for (;;) { 5364 uint32_t Key, Value; 5365 if (ParseAsAbsoluteExpression(Key)) { 5366 return TokError(Twine("invalid value in ") + 5367 Twine(PALMD::AssemblerDirective)); 5368 } 5369 if (!trySkipToken(AsmToken::Comma)) { 5370 return TokError(Twine("expected an even number of values in ") + 5371 Twine(PALMD::AssemblerDirective)); 5372 } 5373 if (ParseAsAbsoluteExpression(Value)) { 5374 return TokError(Twine("invalid value in ") + 5375 Twine(PALMD::AssemblerDirective)); 5376 } 5377 PALMetadata->setRegister(Key, Value); 5378 if (!trySkipToken(AsmToken::Comma)) 5379 break; 5380 } 5381 return false; 5382 } 5383 5384 /// ParseDirectiveAMDGPULDS 5385 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5386 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5387 if (getParser().checkForValidSection()) 5388 return true; 5389 5390 StringRef Name; 5391 SMLoc NameLoc = getLoc(); 5392 if (getParser().parseIdentifier(Name)) 5393 return TokError("expected identifier in directive"); 5394 5395 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5396 if (parseToken(AsmToken::Comma, "expected ','")) 5397 return true; 5398 5399 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5400 5401 int64_t Size; 5402 SMLoc SizeLoc = getLoc(); 5403 if (getParser().parseAbsoluteExpression(Size)) 5404 return true; 5405 if (Size < 0) 5406 return Error(SizeLoc, "size must be non-negative"); 5407 if (Size > LocalMemorySize) 5408 return Error(SizeLoc, "size is too large"); 5409 5410 int64_t Alignment = 4; 5411 if (trySkipToken(AsmToken::Comma)) { 5412 SMLoc AlignLoc = getLoc(); 5413 if (getParser().parseAbsoluteExpression(Alignment)) 5414 return true; 5415 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5416 return Error(AlignLoc, "alignment must be a power of two"); 5417 5418 // Alignment larger than the size of LDS is possible in theory, as long 5419 // as the linker manages to place to symbol at address 0, but we do want 5420 // to make sure the alignment fits nicely into a 32-bit integer. 5421 if (Alignment >= 1u << 31) 5422 return Error(AlignLoc, "alignment is too large"); 5423 } 5424 5425 if (parseToken(AsmToken::EndOfStatement, 5426 "unexpected token in '.amdgpu_lds' directive")) 5427 return true; 5428 5429 Symbol->redefineIfPossible(); 5430 if (!Symbol->isUndefined()) 5431 return Error(NameLoc, "invalid symbol redefinition"); 5432 5433 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5434 return false; 5435 } 5436 5437 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5438 StringRef IDVal = DirectiveID.getString(); 5439 5440 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5441 if (IDVal == ".amdhsa_kernel") 5442 return ParseDirectiveAMDHSAKernel(); 5443 5444 // TODO: Restructure/combine with PAL metadata directive. 5445 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5446 return ParseDirectiveHSAMetadata(); 5447 } else { 5448 if (IDVal == ".hsa_code_object_version") 5449 return ParseDirectiveHSACodeObjectVersion(); 5450 5451 if (IDVal == ".hsa_code_object_isa") 5452 return ParseDirectiveHSACodeObjectISA(); 5453 5454 if (IDVal == ".amd_kernel_code_t") 5455 return ParseDirectiveAMDKernelCodeT(); 5456 5457 if (IDVal == ".amdgpu_hsa_kernel") 5458 return ParseDirectiveAMDGPUHsaKernel(); 5459 5460 if (IDVal == ".amd_amdgpu_isa") 5461 return ParseDirectiveISAVersion(); 5462 5463 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5464 return ParseDirectiveHSAMetadata(); 5465 } 5466 5467 if (IDVal == ".amdgcn_target") 5468 return ParseDirectiveAMDGCNTarget(); 5469 5470 if (IDVal == ".amdgpu_lds") 5471 return ParseDirectiveAMDGPULDS(); 5472 5473 if (IDVal == PALMD::AssemblerDirectiveBegin) 5474 return ParseDirectivePALMetadataBegin(); 5475 5476 if (IDVal == PALMD::AssemblerDirective) 5477 return ParseDirectivePALMetadata(); 5478 5479 return true; 5480 } 5481 5482 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5483 unsigned RegNo) { 5484 5485 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5486 return isGFX9Plus(); 5487 5488 // GFX10 has 2 more SGPRs 104 and 105. 5489 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5490 return hasSGPR104_SGPR105(); 5491 5492 switch (RegNo) { 5493 case AMDGPU::SRC_SHARED_BASE: 5494 case AMDGPU::SRC_SHARED_LIMIT: 5495 case AMDGPU::SRC_PRIVATE_BASE: 5496 case AMDGPU::SRC_PRIVATE_LIMIT: 5497 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5498 return isGFX9Plus(); 5499 case AMDGPU::TBA: 5500 case AMDGPU::TBA_LO: 5501 case AMDGPU::TBA_HI: 5502 case AMDGPU::TMA: 5503 case AMDGPU::TMA_LO: 5504 case AMDGPU::TMA_HI: 5505 return !isGFX9Plus(); 5506 case AMDGPU::XNACK_MASK: 5507 case AMDGPU::XNACK_MASK_LO: 5508 case AMDGPU::XNACK_MASK_HI: 5509 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5510 case AMDGPU::SGPR_NULL: 5511 return isGFX10Plus(); 5512 default: 5513 break; 5514 } 5515 5516 if (isCI()) 5517 return true; 5518 5519 if (isSI() || isGFX10Plus()) { 5520 // No flat_scr on SI. 5521 // On GFX10 flat scratch is not a valid register operand and can only be 5522 // accessed with s_setreg/s_getreg. 5523 switch (RegNo) { 5524 case AMDGPU::FLAT_SCR: 5525 case AMDGPU::FLAT_SCR_LO: 5526 case AMDGPU::FLAT_SCR_HI: 5527 return false; 5528 default: 5529 return true; 5530 } 5531 } 5532 5533 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5534 // SI/CI have. 5535 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5536 return hasSGPR102_SGPR103(); 5537 5538 return true; 5539 } 5540 5541 OperandMatchResultTy 5542 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5543 OperandMode Mode) { 5544 // Try to parse with a custom parser 5545 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5546 5547 // If we successfully parsed the operand or if there as an error parsing, 5548 // we are done. 5549 // 5550 // If we are parsing after we reach EndOfStatement then this means we 5551 // are appending default values to the Operands list. This is only done 5552 // by custom parser, so we shouldn't continue on to the generic parsing. 5553 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5554 isToken(AsmToken::EndOfStatement)) 5555 return ResTy; 5556 5557 SMLoc RBraceLoc; 5558 SMLoc LBraceLoc = getLoc(); 5559 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5560 unsigned Prefix = Operands.size(); 5561 5562 for (;;) { 5563 auto Loc = getLoc(); 5564 ResTy = parseReg(Operands); 5565 if (ResTy == MatchOperand_NoMatch) 5566 Error(Loc, "expected a register"); 5567 if (ResTy != MatchOperand_Success) 5568 return MatchOperand_ParseFail; 5569 5570 RBraceLoc = getLoc(); 5571 if (trySkipToken(AsmToken::RBrac)) 5572 break; 5573 5574 if (!skipToken(AsmToken::Comma, 5575 "expected a comma or a closing square bracket")) { 5576 return MatchOperand_ParseFail; 5577 } 5578 } 5579 5580 if (Operands.size() - Prefix > 1) { 5581 Operands.insert(Operands.begin() + Prefix, 5582 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5583 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5584 } 5585 5586 return MatchOperand_Success; 5587 } 5588 5589 return parseRegOrImm(Operands); 5590 } 5591 5592 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5593 // Clear any forced encodings from the previous instruction. 5594 setForcedEncodingSize(0); 5595 setForcedDPP(false); 5596 setForcedSDWA(false); 5597 5598 if (Name.endswith("_e64")) { 5599 setForcedEncodingSize(64); 5600 return Name.substr(0, Name.size() - 4); 5601 } else if (Name.endswith("_e32")) { 5602 setForcedEncodingSize(32); 5603 return Name.substr(0, Name.size() - 4); 5604 } else if (Name.endswith("_dpp")) { 5605 setForcedDPP(true); 5606 return Name.substr(0, Name.size() - 4); 5607 } else if (Name.endswith("_sdwa")) { 5608 setForcedSDWA(true); 5609 return Name.substr(0, Name.size() - 5); 5610 } 5611 return Name; 5612 } 5613 5614 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5615 StringRef Name, 5616 SMLoc NameLoc, OperandVector &Operands) { 5617 // Add the instruction mnemonic 5618 Name = parseMnemonicSuffix(Name); 5619 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5620 5621 bool IsMIMG = Name.startswith("image_"); 5622 5623 while (!trySkipToken(AsmToken::EndOfStatement)) { 5624 OperandMode Mode = OperandMode_Default; 5625 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5626 Mode = OperandMode_NSA; 5627 CPolSeen = 0; 5628 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5629 5630 if (Res != MatchOperand_Success) { 5631 checkUnsupportedInstruction(Name, NameLoc); 5632 if (!Parser.hasPendingError()) { 5633 // FIXME: use real operand location rather than the current location. 5634 StringRef Msg = 5635 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5636 "not a valid operand."; 5637 Error(getLoc(), Msg); 5638 } 5639 while (!trySkipToken(AsmToken::EndOfStatement)) { 5640 lex(); 5641 } 5642 return true; 5643 } 5644 5645 // Eat the comma or space if there is one. 5646 trySkipToken(AsmToken::Comma); 5647 } 5648 5649 return false; 5650 } 5651 5652 //===----------------------------------------------------------------------===// 5653 // Utility functions 5654 //===----------------------------------------------------------------------===// 5655 5656 OperandMatchResultTy 5657 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5658 5659 if (!trySkipId(Prefix, AsmToken::Colon)) 5660 return MatchOperand_NoMatch; 5661 5662 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5663 } 5664 5665 OperandMatchResultTy 5666 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5667 AMDGPUOperand::ImmTy ImmTy, 5668 bool (*ConvertResult)(int64_t&)) { 5669 SMLoc S = getLoc(); 5670 int64_t Value = 0; 5671 5672 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5673 if (Res != MatchOperand_Success) 5674 return Res; 5675 5676 if (ConvertResult && !ConvertResult(Value)) { 5677 Error(S, "invalid " + StringRef(Prefix) + " value."); 5678 } 5679 5680 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5681 return MatchOperand_Success; 5682 } 5683 5684 OperandMatchResultTy 5685 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5686 OperandVector &Operands, 5687 AMDGPUOperand::ImmTy ImmTy, 5688 bool (*ConvertResult)(int64_t&)) { 5689 SMLoc S = getLoc(); 5690 if (!trySkipId(Prefix, AsmToken::Colon)) 5691 return MatchOperand_NoMatch; 5692 5693 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5694 return MatchOperand_ParseFail; 5695 5696 unsigned Val = 0; 5697 const unsigned MaxSize = 4; 5698 5699 // FIXME: How to verify the number of elements matches the number of src 5700 // operands? 5701 for (int I = 0; ; ++I) { 5702 int64_t Op; 5703 SMLoc Loc = getLoc(); 5704 if (!parseExpr(Op)) 5705 return MatchOperand_ParseFail; 5706 5707 if (Op != 0 && Op != 1) { 5708 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5709 return MatchOperand_ParseFail; 5710 } 5711 5712 Val |= (Op << I); 5713 5714 if (trySkipToken(AsmToken::RBrac)) 5715 break; 5716 5717 if (I + 1 == MaxSize) { 5718 Error(getLoc(), "expected a closing square bracket"); 5719 return MatchOperand_ParseFail; 5720 } 5721 5722 if (!skipToken(AsmToken::Comma, "expected a comma")) 5723 return MatchOperand_ParseFail; 5724 } 5725 5726 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5727 return MatchOperand_Success; 5728 } 5729 5730 OperandMatchResultTy 5731 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5732 AMDGPUOperand::ImmTy ImmTy) { 5733 int64_t Bit; 5734 SMLoc S = getLoc(); 5735 5736 if (trySkipId(Name)) { 5737 Bit = 1; 5738 } else if (trySkipId("no", Name)) { 5739 Bit = 0; 5740 } else { 5741 return MatchOperand_NoMatch; 5742 } 5743 5744 if (Name == "r128" && !hasMIMG_R128()) { 5745 Error(S, "r128 modifier is not supported on this GPU"); 5746 return MatchOperand_ParseFail; 5747 } 5748 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5749 Error(S, "a16 modifier is not supported on this GPU"); 5750 return MatchOperand_ParseFail; 5751 } 5752 5753 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5754 ImmTy = AMDGPUOperand::ImmTyR128A16; 5755 5756 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5757 return MatchOperand_Success; 5758 } 5759 5760 OperandMatchResultTy 5761 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5762 unsigned CPolOn = 0; 5763 unsigned CPolOff = 0; 5764 SMLoc S = getLoc(); 5765 5766 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5767 if (isGFX940() && !Mnemo.startswith("s_")) { 5768 if (trySkipId("sc0")) 5769 CPolOn = AMDGPU::CPol::SC0; 5770 else if (trySkipId("nosc0")) 5771 CPolOff = AMDGPU::CPol::SC0; 5772 else if (trySkipId("nt")) 5773 CPolOn = AMDGPU::CPol::NT; 5774 else if (trySkipId("nont")) 5775 CPolOff = AMDGPU::CPol::NT; 5776 else if (trySkipId("sc1")) 5777 CPolOn = AMDGPU::CPol::SC1; 5778 else if (trySkipId("nosc1")) 5779 CPolOff = AMDGPU::CPol::SC1; 5780 else 5781 return MatchOperand_NoMatch; 5782 } 5783 else if (trySkipId("glc")) 5784 CPolOn = AMDGPU::CPol::GLC; 5785 else if (trySkipId("noglc")) 5786 CPolOff = AMDGPU::CPol::GLC; 5787 else if (trySkipId("slc")) 5788 CPolOn = AMDGPU::CPol::SLC; 5789 else if (trySkipId("noslc")) 5790 CPolOff = AMDGPU::CPol::SLC; 5791 else if (trySkipId("dlc")) 5792 CPolOn = AMDGPU::CPol::DLC; 5793 else if (trySkipId("nodlc")) 5794 CPolOff = AMDGPU::CPol::DLC; 5795 else if (trySkipId("scc")) 5796 CPolOn = AMDGPU::CPol::SCC; 5797 else if (trySkipId("noscc")) 5798 CPolOff = AMDGPU::CPol::SCC; 5799 else 5800 return MatchOperand_NoMatch; 5801 5802 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5803 Error(S, "dlc modifier is not supported on this GPU"); 5804 return MatchOperand_ParseFail; 5805 } 5806 5807 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5808 Error(S, "scc modifier is not supported on this GPU"); 5809 return MatchOperand_ParseFail; 5810 } 5811 5812 if (CPolSeen & (CPolOn | CPolOff)) { 5813 Error(S, "duplicate cache policy modifier"); 5814 return MatchOperand_ParseFail; 5815 } 5816 5817 CPolSeen |= (CPolOn | CPolOff); 5818 5819 for (unsigned I = 1; I != Operands.size(); ++I) { 5820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5821 if (Op.isCPol()) { 5822 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5823 return MatchOperand_Success; 5824 } 5825 } 5826 5827 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5828 AMDGPUOperand::ImmTyCPol)); 5829 5830 return MatchOperand_Success; 5831 } 5832 5833 static void addOptionalImmOperand( 5834 MCInst& Inst, const OperandVector& Operands, 5835 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5836 AMDGPUOperand::ImmTy ImmT, 5837 int64_t Default = 0) { 5838 auto i = OptionalIdx.find(ImmT); 5839 if (i != OptionalIdx.end()) { 5840 unsigned Idx = i->second; 5841 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5842 } else { 5843 Inst.addOperand(MCOperand::createImm(Default)); 5844 } 5845 } 5846 5847 OperandMatchResultTy 5848 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5849 StringRef &Value, 5850 SMLoc &StringLoc) { 5851 if (!trySkipId(Prefix, AsmToken::Colon)) 5852 return MatchOperand_NoMatch; 5853 5854 StringLoc = getLoc(); 5855 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5856 : MatchOperand_ParseFail; 5857 } 5858 5859 //===----------------------------------------------------------------------===// 5860 // MTBUF format 5861 //===----------------------------------------------------------------------===// 5862 5863 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5864 int64_t MaxVal, 5865 int64_t &Fmt) { 5866 int64_t Val; 5867 SMLoc Loc = getLoc(); 5868 5869 auto Res = parseIntWithPrefix(Pref, Val); 5870 if (Res == MatchOperand_ParseFail) 5871 return false; 5872 if (Res == MatchOperand_NoMatch) 5873 return true; 5874 5875 if (Val < 0 || Val > MaxVal) { 5876 Error(Loc, Twine("out of range ", StringRef(Pref))); 5877 return false; 5878 } 5879 5880 Fmt = Val; 5881 return true; 5882 } 5883 5884 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5885 // values to live in a joint format operand in the MCInst encoding. 5886 OperandMatchResultTy 5887 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5888 using namespace llvm::AMDGPU::MTBUFFormat; 5889 5890 int64_t Dfmt = DFMT_UNDEF; 5891 int64_t Nfmt = NFMT_UNDEF; 5892 5893 // dfmt and nfmt can appear in either order, and each is optional. 5894 for (int I = 0; I < 2; ++I) { 5895 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5896 return MatchOperand_ParseFail; 5897 5898 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5899 return MatchOperand_ParseFail; 5900 } 5901 // Skip optional comma between dfmt/nfmt 5902 // but guard against 2 commas following each other. 5903 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5904 !peekToken().is(AsmToken::Comma)) { 5905 trySkipToken(AsmToken::Comma); 5906 } 5907 } 5908 5909 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5910 return MatchOperand_NoMatch; 5911 5912 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5913 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5914 5915 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5916 return MatchOperand_Success; 5917 } 5918 5919 OperandMatchResultTy 5920 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5921 using namespace llvm::AMDGPU::MTBUFFormat; 5922 5923 int64_t Fmt = UFMT_UNDEF; 5924 5925 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5926 return MatchOperand_ParseFail; 5927 5928 if (Fmt == UFMT_UNDEF) 5929 return MatchOperand_NoMatch; 5930 5931 Format = Fmt; 5932 return MatchOperand_Success; 5933 } 5934 5935 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5936 int64_t &Nfmt, 5937 StringRef FormatStr, 5938 SMLoc Loc) { 5939 using namespace llvm::AMDGPU::MTBUFFormat; 5940 int64_t Format; 5941 5942 Format = getDfmt(FormatStr); 5943 if (Format != DFMT_UNDEF) { 5944 Dfmt = Format; 5945 return true; 5946 } 5947 5948 Format = getNfmt(FormatStr, getSTI()); 5949 if (Format != NFMT_UNDEF) { 5950 Nfmt = Format; 5951 return true; 5952 } 5953 5954 Error(Loc, "unsupported format"); 5955 return false; 5956 } 5957 5958 OperandMatchResultTy 5959 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5960 SMLoc FormatLoc, 5961 int64_t &Format) { 5962 using namespace llvm::AMDGPU::MTBUFFormat; 5963 5964 int64_t Dfmt = DFMT_UNDEF; 5965 int64_t Nfmt = NFMT_UNDEF; 5966 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5967 return MatchOperand_ParseFail; 5968 5969 if (trySkipToken(AsmToken::Comma)) { 5970 StringRef Str; 5971 SMLoc Loc = getLoc(); 5972 if (!parseId(Str, "expected a format string") || 5973 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5974 return MatchOperand_ParseFail; 5975 } 5976 if (Dfmt == DFMT_UNDEF) { 5977 Error(Loc, "duplicate numeric format"); 5978 return MatchOperand_ParseFail; 5979 } else if (Nfmt == NFMT_UNDEF) { 5980 Error(Loc, "duplicate data format"); 5981 return MatchOperand_ParseFail; 5982 } 5983 } 5984 5985 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5986 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5987 5988 if (isGFX10Plus()) { 5989 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5990 if (Ufmt == UFMT_UNDEF) { 5991 Error(FormatLoc, "unsupported format"); 5992 return MatchOperand_ParseFail; 5993 } 5994 Format = Ufmt; 5995 } else { 5996 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5997 } 5998 5999 return MatchOperand_Success; 6000 } 6001 6002 OperandMatchResultTy 6003 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6004 SMLoc Loc, 6005 int64_t &Format) { 6006 using namespace llvm::AMDGPU::MTBUFFormat; 6007 6008 auto Id = getUnifiedFormat(FormatStr); 6009 if (Id == UFMT_UNDEF) 6010 return MatchOperand_NoMatch; 6011 6012 if (!isGFX10Plus()) { 6013 Error(Loc, "unified format is not supported on this GPU"); 6014 return MatchOperand_ParseFail; 6015 } 6016 6017 Format = Id; 6018 return MatchOperand_Success; 6019 } 6020 6021 OperandMatchResultTy 6022 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6023 using namespace llvm::AMDGPU::MTBUFFormat; 6024 SMLoc Loc = getLoc(); 6025 6026 if (!parseExpr(Format)) 6027 return MatchOperand_ParseFail; 6028 if (!isValidFormatEncoding(Format, getSTI())) { 6029 Error(Loc, "out of range format"); 6030 return MatchOperand_ParseFail; 6031 } 6032 6033 return MatchOperand_Success; 6034 } 6035 6036 OperandMatchResultTy 6037 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6038 using namespace llvm::AMDGPU::MTBUFFormat; 6039 6040 if (!trySkipId("format", AsmToken::Colon)) 6041 return MatchOperand_NoMatch; 6042 6043 if (trySkipToken(AsmToken::LBrac)) { 6044 StringRef FormatStr; 6045 SMLoc Loc = getLoc(); 6046 if (!parseId(FormatStr, "expected a format string")) 6047 return MatchOperand_ParseFail; 6048 6049 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6050 if (Res == MatchOperand_NoMatch) 6051 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6052 if (Res != MatchOperand_Success) 6053 return Res; 6054 6055 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6056 return MatchOperand_ParseFail; 6057 6058 return MatchOperand_Success; 6059 } 6060 6061 return parseNumericFormat(Format); 6062 } 6063 6064 OperandMatchResultTy 6065 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6066 using namespace llvm::AMDGPU::MTBUFFormat; 6067 6068 int64_t Format = getDefaultFormatEncoding(getSTI()); 6069 OperandMatchResultTy Res; 6070 SMLoc Loc = getLoc(); 6071 6072 // Parse legacy format syntax. 6073 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6074 if (Res == MatchOperand_ParseFail) 6075 return Res; 6076 6077 bool FormatFound = (Res == MatchOperand_Success); 6078 6079 Operands.push_back( 6080 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6081 6082 if (FormatFound) 6083 trySkipToken(AsmToken::Comma); 6084 6085 if (isToken(AsmToken::EndOfStatement)) { 6086 // We are expecting an soffset operand, 6087 // but let matcher handle the error. 6088 return MatchOperand_Success; 6089 } 6090 6091 // Parse soffset. 6092 Res = parseRegOrImm(Operands); 6093 if (Res != MatchOperand_Success) 6094 return Res; 6095 6096 trySkipToken(AsmToken::Comma); 6097 6098 if (!FormatFound) { 6099 Res = parseSymbolicOrNumericFormat(Format); 6100 if (Res == MatchOperand_ParseFail) 6101 return Res; 6102 if (Res == MatchOperand_Success) { 6103 auto Size = Operands.size(); 6104 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6105 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6106 Op.setImm(Format); 6107 } 6108 return MatchOperand_Success; 6109 } 6110 6111 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6112 Error(getLoc(), "duplicate format"); 6113 return MatchOperand_ParseFail; 6114 } 6115 return MatchOperand_Success; 6116 } 6117 6118 //===----------------------------------------------------------------------===// 6119 // ds 6120 //===----------------------------------------------------------------------===// 6121 6122 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6123 const OperandVector &Operands) { 6124 OptionalImmIndexMap OptionalIdx; 6125 6126 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6127 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6128 6129 // Add the register arguments 6130 if (Op.isReg()) { 6131 Op.addRegOperands(Inst, 1); 6132 continue; 6133 } 6134 6135 // Handle optional arguments 6136 OptionalIdx[Op.getImmTy()] = i; 6137 } 6138 6139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6141 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6142 6143 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6144 } 6145 6146 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6147 bool IsGdsHardcoded) { 6148 OptionalImmIndexMap OptionalIdx; 6149 6150 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6151 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6152 6153 // Add the register arguments 6154 if (Op.isReg()) { 6155 Op.addRegOperands(Inst, 1); 6156 continue; 6157 } 6158 6159 if (Op.isToken() && Op.getToken() == "gds") { 6160 IsGdsHardcoded = true; 6161 continue; 6162 } 6163 6164 // Handle optional arguments 6165 OptionalIdx[Op.getImmTy()] = i; 6166 } 6167 6168 AMDGPUOperand::ImmTy OffsetType = 6169 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6170 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6171 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6172 AMDGPUOperand::ImmTyOffset; 6173 6174 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6175 6176 if (!IsGdsHardcoded) { 6177 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6178 } 6179 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6180 } 6181 6182 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6183 OptionalImmIndexMap OptionalIdx; 6184 6185 unsigned OperandIdx[4]; 6186 unsigned EnMask = 0; 6187 int SrcIdx = 0; 6188 6189 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6190 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6191 6192 // Add the register arguments 6193 if (Op.isReg()) { 6194 assert(SrcIdx < 4); 6195 OperandIdx[SrcIdx] = Inst.size(); 6196 Op.addRegOperands(Inst, 1); 6197 ++SrcIdx; 6198 continue; 6199 } 6200 6201 if (Op.isOff()) { 6202 assert(SrcIdx < 4); 6203 OperandIdx[SrcIdx] = Inst.size(); 6204 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6205 ++SrcIdx; 6206 continue; 6207 } 6208 6209 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6210 Op.addImmOperands(Inst, 1); 6211 continue; 6212 } 6213 6214 if (Op.isToken() && Op.getToken() == "done") 6215 continue; 6216 6217 // Handle optional arguments 6218 OptionalIdx[Op.getImmTy()] = i; 6219 } 6220 6221 assert(SrcIdx == 4); 6222 6223 bool Compr = false; 6224 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6225 Compr = true; 6226 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6227 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6228 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6229 } 6230 6231 for (auto i = 0; i < SrcIdx; ++i) { 6232 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6233 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6234 } 6235 } 6236 6237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6239 6240 Inst.addOperand(MCOperand::createImm(EnMask)); 6241 } 6242 6243 //===----------------------------------------------------------------------===// 6244 // s_waitcnt 6245 //===----------------------------------------------------------------------===// 6246 6247 static bool 6248 encodeCnt( 6249 const AMDGPU::IsaVersion ISA, 6250 int64_t &IntVal, 6251 int64_t CntVal, 6252 bool Saturate, 6253 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6254 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6255 { 6256 bool Failed = false; 6257 6258 IntVal = encode(ISA, IntVal, CntVal); 6259 if (CntVal != decode(ISA, IntVal)) { 6260 if (Saturate) { 6261 IntVal = encode(ISA, IntVal, -1); 6262 } else { 6263 Failed = true; 6264 } 6265 } 6266 return Failed; 6267 } 6268 6269 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6270 6271 SMLoc CntLoc = getLoc(); 6272 StringRef CntName = getTokenStr(); 6273 6274 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6275 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6276 return false; 6277 6278 int64_t CntVal; 6279 SMLoc ValLoc = getLoc(); 6280 if (!parseExpr(CntVal)) 6281 return false; 6282 6283 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6284 6285 bool Failed = true; 6286 bool Sat = CntName.endswith("_sat"); 6287 6288 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6289 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6290 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6291 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6292 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6293 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6294 } else { 6295 Error(CntLoc, "invalid counter name " + CntName); 6296 return false; 6297 } 6298 6299 if (Failed) { 6300 Error(ValLoc, "too large value for " + CntName); 6301 return false; 6302 } 6303 6304 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6305 return false; 6306 6307 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6308 if (isToken(AsmToken::EndOfStatement)) { 6309 Error(getLoc(), "expected a counter name"); 6310 return false; 6311 } 6312 } 6313 6314 return true; 6315 } 6316 6317 OperandMatchResultTy 6318 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6319 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6320 int64_t Waitcnt = getWaitcntBitMask(ISA); 6321 SMLoc S = getLoc(); 6322 6323 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6324 while (!isToken(AsmToken::EndOfStatement)) { 6325 if (!parseCnt(Waitcnt)) 6326 return MatchOperand_ParseFail; 6327 } 6328 } else { 6329 if (!parseExpr(Waitcnt)) 6330 return MatchOperand_ParseFail; 6331 } 6332 6333 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6334 return MatchOperand_Success; 6335 } 6336 6337 bool 6338 AMDGPUOperand::isSWaitCnt() const { 6339 return isImm(); 6340 } 6341 6342 //===----------------------------------------------------------------------===// 6343 // DepCtr 6344 //===----------------------------------------------------------------------===// 6345 6346 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6347 StringRef DepCtrName) { 6348 switch (ErrorId) { 6349 case OPR_ID_UNKNOWN: 6350 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6351 return; 6352 case OPR_ID_UNSUPPORTED: 6353 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6354 return; 6355 case OPR_ID_DUPLICATE: 6356 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6357 return; 6358 case OPR_VAL_INVALID: 6359 Error(Loc, Twine("invalid value for ", DepCtrName)); 6360 return; 6361 default: 6362 assert(false); 6363 } 6364 } 6365 6366 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6367 6368 using namespace llvm::AMDGPU::DepCtr; 6369 6370 SMLoc DepCtrLoc = getLoc(); 6371 StringRef DepCtrName = getTokenStr(); 6372 6373 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6374 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6375 return false; 6376 6377 int64_t ExprVal; 6378 if (!parseExpr(ExprVal)) 6379 return false; 6380 6381 unsigned PrevOprMask = UsedOprMask; 6382 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6383 6384 if (CntVal < 0) { 6385 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6386 return false; 6387 } 6388 6389 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6390 return false; 6391 6392 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6393 if (isToken(AsmToken::EndOfStatement)) { 6394 Error(getLoc(), "expected a counter name"); 6395 return false; 6396 } 6397 } 6398 6399 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6400 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6401 return true; 6402 } 6403 6404 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6405 using namespace llvm::AMDGPU::DepCtr; 6406 6407 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6408 SMLoc Loc = getLoc(); 6409 6410 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6411 unsigned UsedOprMask = 0; 6412 while (!isToken(AsmToken::EndOfStatement)) { 6413 if (!parseDepCtr(DepCtr, UsedOprMask)) 6414 return MatchOperand_ParseFail; 6415 } 6416 } else { 6417 if (!parseExpr(DepCtr)) 6418 return MatchOperand_ParseFail; 6419 } 6420 6421 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6422 return MatchOperand_Success; 6423 } 6424 6425 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6426 6427 //===----------------------------------------------------------------------===// 6428 // hwreg 6429 //===----------------------------------------------------------------------===// 6430 6431 bool 6432 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6433 OperandInfoTy &Offset, 6434 OperandInfoTy &Width) { 6435 using namespace llvm::AMDGPU::Hwreg; 6436 6437 // The register may be specified by name or using a numeric code 6438 HwReg.Loc = getLoc(); 6439 if (isToken(AsmToken::Identifier) && 6440 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6441 HwReg.IsSymbolic = true; 6442 lex(); // skip register name 6443 } else if (!parseExpr(HwReg.Id, "a register name")) { 6444 return false; 6445 } 6446 6447 if (trySkipToken(AsmToken::RParen)) 6448 return true; 6449 6450 // parse optional params 6451 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6452 return false; 6453 6454 Offset.Loc = getLoc(); 6455 if (!parseExpr(Offset.Id)) 6456 return false; 6457 6458 if (!skipToken(AsmToken::Comma, "expected a comma")) 6459 return false; 6460 6461 Width.Loc = getLoc(); 6462 return parseExpr(Width.Id) && 6463 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6464 } 6465 6466 bool 6467 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6468 const OperandInfoTy &Offset, 6469 const OperandInfoTy &Width) { 6470 6471 using namespace llvm::AMDGPU::Hwreg; 6472 6473 if (HwReg.IsSymbolic) { 6474 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6475 Error(HwReg.Loc, 6476 "specified hardware register is not supported on this GPU"); 6477 return false; 6478 } 6479 } else { 6480 if (!isValidHwreg(HwReg.Id)) { 6481 Error(HwReg.Loc, 6482 "invalid code of hardware register: only 6-bit values are legal"); 6483 return false; 6484 } 6485 } 6486 if (!isValidHwregOffset(Offset.Id)) { 6487 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6488 return false; 6489 } 6490 if (!isValidHwregWidth(Width.Id)) { 6491 Error(Width.Loc, 6492 "invalid bitfield width: only values from 1 to 32 are legal"); 6493 return false; 6494 } 6495 return true; 6496 } 6497 6498 OperandMatchResultTy 6499 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6500 using namespace llvm::AMDGPU::Hwreg; 6501 6502 int64_t ImmVal = 0; 6503 SMLoc Loc = getLoc(); 6504 6505 if (trySkipId("hwreg", AsmToken::LParen)) { 6506 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6507 OperandInfoTy Offset(OFFSET_DEFAULT_); 6508 OperandInfoTy Width(WIDTH_DEFAULT_); 6509 if (parseHwregBody(HwReg, Offset, Width) && 6510 validateHwreg(HwReg, Offset, Width)) { 6511 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6512 } else { 6513 return MatchOperand_ParseFail; 6514 } 6515 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6516 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6517 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6518 return MatchOperand_ParseFail; 6519 } 6520 } else { 6521 return MatchOperand_ParseFail; 6522 } 6523 6524 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6525 return MatchOperand_Success; 6526 } 6527 6528 bool AMDGPUOperand::isHwreg() const { 6529 return isImmTy(ImmTyHwreg); 6530 } 6531 6532 //===----------------------------------------------------------------------===// 6533 // sendmsg 6534 //===----------------------------------------------------------------------===// 6535 6536 bool 6537 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6538 OperandInfoTy &Op, 6539 OperandInfoTy &Stream) { 6540 using namespace llvm::AMDGPU::SendMsg; 6541 6542 Msg.Loc = getLoc(); 6543 if (isToken(AsmToken::Identifier) && 6544 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6545 Msg.IsSymbolic = true; 6546 lex(); // skip message name 6547 } else if (!parseExpr(Msg.Id, "a message name")) { 6548 return false; 6549 } 6550 6551 if (trySkipToken(AsmToken::Comma)) { 6552 Op.IsDefined = true; 6553 Op.Loc = getLoc(); 6554 if (isToken(AsmToken::Identifier) && 6555 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6556 lex(); // skip operation name 6557 } else if (!parseExpr(Op.Id, "an operation name")) { 6558 return false; 6559 } 6560 6561 if (trySkipToken(AsmToken::Comma)) { 6562 Stream.IsDefined = true; 6563 Stream.Loc = getLoc(); 6564 if (!parseExpr(Stream.Id)) 6565 return false; 6566 } 6567 } 6568 6569 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6570 } 6571 6572 bool 6573 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6574 const OperandInfoTy &Op, 6575 const OperandInfoTy &Stream) { 6576 using namespace llvm::AMDGPU::SendMsg; 6577 6578 // Validation strictness depends on whether message is specified 6579 // in a symbolic or in a numeric form. In the latter case 6580 // only encoding possibility is checked. 6581 bool Strict = Msg.IsSymbolic; 6582 6583 if (Strict) { 6584 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6585 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6586 return false; 6587 } 6588 } else { 6589 if (!isValidMsgId(Msg.Id)) { 6590 Error(Msg.Loc, "invalid message id"); 6591 return false; 6592 } 6593 } 6594 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6595 if (Op.IsDefined) { 6596 Error(Op.Loc, "message does not support operations"); 6597 } else { 6598 Error(Msg.Loc, "missing message operation"); 6599 } 6600 return false; 6601 } 6602 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6603 Error(Op.Loc, "invalid operation id"); 6604 return false; 6605 } 6606 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6607 Error(Stream.Loc, "message operation does not support streams"); 6608 return false; 6609 } 6610 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6611 Error(Stream.Loc, "invalid message stream id"); 6612 return false; 6613 } 6614 return true; 6615 } 6616 6617 OperandMatchResultTy 6618 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6619 using namespace llvm::AMDGPU::SendMsg; 6620 6621 int64_t ImmVal = 0; 6622 SMLoc Loc = getLoc(); 6623 6624 if (trySkipId("sendmsg", AsmToken::LParen)) { 6625 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6626 OperandInfoTy Op(OP_NONE_); 6627 OperandInfoTy Stream(STREAM_ID_NONE_); 6628 if (parseSendMsgBody(Msg, Op, Stream) && 6629 validateSendMsg(Msg, Op, Stream)) { 6630 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6631 } else { 6632 return MatchOperand_ParseFail; 6633 } 6634 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6635 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6636 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6637 return MatchOperand_ParseFail; 6638 } 6639 } else { 6640 return MatchOperand_ParseFail; 6641 } 6642 6643 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6644 return MatchOperand_Success; 6645 } 6646 6647 bool AMDGPUOperand::isSendMsg() const { 6648 return isImmTy(ImmTySendMsg); 6649 } 6650 6651 //===----------------------------------------------------------------------===// 6652 // v_interp 6653 //===----------------------------------------------------------------------===// 6654 6655 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6656 StringRef Str; 6657 SMLoc S = getLoc(); 6658 6659 if (!parseId(Str)) 6660 return MatchOperand_NoMatch; 6661 6662 int Slot = StringSwitch<int>(Str) 6663 .Case("p10", 0) 6664 .Case("p20", 1) 6665 .Case("p0", 2) 6666 .Default(-1); 6667 6668 if (Slot == -1) { 6669 Error(S, "invalid interpolation slot"); 6670 return MatchOperand_ParseFail; 6671 } 6672 6673 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6674 AMDGPUOperand::ImmTyInterpSlot)); 6675 return MatchOperand_Success; 6676 } 6677 6678 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6679 StringRef Str; 6680 SMLoc S = getLoc(); 6681 6682 if (!parseId(Str)) 6683 return MatchOperand_NoMatch; 6684 6685 if (!Str.startswith("attr")) { 6686 Error(S, "invalid interpolation attribute"); 6687 return MatchOperand_ParseFail; 6688 } 6689 6690 StringRef Chan = Str.take_back(2); 6691 int AttrChan = StringSwitch<int>(Chan) 6692 .Case(".x", 0) 6693 .Case(".y", 1) 6694 .Case(".z", 2) 6695 .Case(".w", 3) 6696 .Default(-1); 6697 if (AttrChan == -1) { 6698 Error(S, "invalid or missing interpolation attribute channel"); 6699 return MatchOperand_ParseFail; 6700 } 6701 6702 Str = Str.drop_back(2).drop_front(4); 6703 6704 uint8_t Attr; 6705 if (Str.getAsInteger(10, Attr)) { 6706 Error(S, "invalid or missing interpolation attribute number"); 6707 return MatchOperand_ParseFail; 6708 } 6709 6710 if (Attr > 63) { 6711 Error(S, "out of bounds interpolation attribute number"); 6712 return MatchOperand_ParseFail; 6713 } 6714 6715 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6716 6717 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6718 AMDGPUOperand::ImmTyInterpAttr)); 6719 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6720 AMDGPUOperand::ImmTyAttrChan)); 6721 return MatchOperand_Success; 6722 } 6723 6724 //===----------------------------------------------------------------------===// 6725 // exp 6726 //===----------------------------------------------------------------------===// 6727 6728 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6729 using namespace llvm::AMDGPU::Exp; 6730 6731 StringRef Str; 6732 SMLoc S = getLoc(); 6733 6734 if (!parseId(Str)) 6735 return MatchOperand_NoMatch; 6736 6737 unsigned Id = getTgtId(Str); 6738 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6739 Error(S, (Id == ET_INVALID) ? 6740 "invalid exp target" : 6741 "exp target is not supported on this GPU"); 6742 return MatchOperand_ParseFail; 6743 } 6744 6745 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6746 AMDGPUOperand::ImmTyExpTgt)); 6747 return MatchOperand_Success; 6748 } 6749 6750 //===----------------------------------------------------------------------===// 6751 // parser helpers 6752 //===----------------------------------------------------------------------===// 6753 6754 bool 6755 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6756 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6757 } 6758 6759 bool 6760 AMDGPUAsmParser::isId(const StringRef Id) const { 6761 return isId(getToken(), Id); 6762 } 6763 6764 bool 6765 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6766 return getTokenKind() == Kind; 6767 } 6768 6769 bool 6770 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6771 if (isId(Id)) { 6772 lex(); 6773 return true; 6774 } 6775 return false; 6776 } 6777 6778 bool 6779 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6780 if (isToken(AsmToken::Identifier)) { 6781 StringRef Tok = getTokenStr(); 6782 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6783 lex(); 6784 return true; 6785 } 6786 } 6787 return false; 6788 } 6789 6790 bool 6791 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6792 if (isId(Id) && peekToken().is(Kind)) { 6793 lex(); 6794 lex(); 6795 return true; 6796 } 6797 return false; 6798 } 6799 6800 bool 6801 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6802 if (isToken(Kind)) { 6803 lex(); 6804 return true; 6805 } 6806 return false; 6807 } 6808 6809 bool 6810 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6811 const StringRef ErrMsg) { 6812 if (!trySkipToken(Kind)) { 6813 Error(getLoc(), ErrMsg); 6814 return false; 6815 } 6816 return true; 6817 } 6818 6819 bool 6820 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6821 SMLoc S = getLoc(); 6822 6823 const MCExpr *Expr; 6824 if (Parser.parseExpression(Expr)) 6825 return false; 6826 6827 if (Expr->evaluateAsAbsolute(Imm)) 6828 return true; 6829 6830 if (Expected.empty()) { 6831 Error(S, "expected absolute expression"); 6832 } else { 6833 Error(S, Twine("expected ", Expected) + 6834 Twine(" or an absolute expression")); 6835 } 6836 return false; 6837 } 6838 6839 bool 6840 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6841 SMLoc S = getLoc(); 6842 6843 const MCExpr *Expr; 6844 if (Parser.parseExpression(Expr)) 6845 return false; 6846 6847 int64_t IntVal; 6848 if (Expr->evaluateAsAbsolute(IntVal)) { 6849 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6850 } else { 6851 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6852 } 6853 return true; 6854 } 6855 6856 bool 6857 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6858 if (isToken(AsmToken::String)) { 6859 Val = getToken().getStringContents(); 6860 lex(); 6861 return true; 6862 } else { 6863 Error(getLoc(), ErrMsg); 6864 return false; 6865 } 6866 } 6867 6868 bool 6869 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6870 if (isToken(AsmToken::Identifier)) { 6871 Val = getTokenStr(); 6872 lex(); 6873 return true; 6874 } else { 6875 if (!ErrMsg.empty()) 6876 Error(getLoc(), ErrMsg); 6877 return false; 6878 } 6879 } 6880 6881 AsmToken 6882 AMDGPUAsmParser::getToken() const { 6883 return Parser.getTok(); 6884 } 6885 6886 AsmToken 6887 AMDGPUAsmParser::peekToken() { 6888 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6889 } 6890 6891 void 6892 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6893 auto TokCount = getLexer().peekTokens(Tokens); 6894 6895 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6896 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6897 } 6898 6899 AsmToken::TokenKind 6900 AMDGPUAsmParser::getTokenKind() const { 6901 return getLexer().getKind(); 6902 } 6903 6904 SMLoc 6905 AMDGPUAsmParser::getLoc() const { 6906 return getToken().getLoc(); 6907 } 6908 6909 StringRef 6910 AMDGPUAsmParser::getTokenStr() const { 6911 return getToken().getString(); 6912 } 6913 6914 void 6915 AMDGPUAsmParser::lex() { 6916 Parser.Lex(); 6917 } 6918 6919 SMLoc 6920 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6921 const OperandVector &Operands) const { 6922 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6923 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6924 if (Test(Op)) 6925 return Op.getStartLoc(); 6926 } 6927 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6928 } 6929 6930 SMLoc 6931 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6932 const OperandVector &Operands) const { 6933 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6934 return getOperandLoc(Test, Operands); 6935 } 6936 6937 SMLoc 6938 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6939 const OperandVector &Operands) const { 6940 auto Test = [=](const AMDGPUOperand& Op) { 6941 return Op.isRegKind() && Op.getReg() == Reg; 6942 }; 6943 return getOperandLoc(Test, Operands); 6944 } 6945 6946 SMLoc 6947 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6948 auto Test = [](const AMDGPUOperand& Op) { 6949 return Op.IsImmKindLiteral() || Op.isExpr(); 6950 }; 6951 return getOperandLoc(Test, Operands); 6952 } 6953 6954 SMLoc 6955 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6956 auto Test = [](const AMDGPUOperand& Op) { 6957 return Op.isImmKindConst(); 6958 }; 6959 return getOperandLoc(Test, Operands); 6960 } 6961 6962 //===----------------------------------------------------------------------===// 6963 // swizzle 6964 //===----------------------------------------------------------------------===// 6965 6966 LLVM_READNONE 6967 static unsigned 6968 encodeBitmaskPerm(const unsigned AndMask, 6969 const unsigned OrMask, 6970 const unsigned XorMask) { 6971 using namespace llvm::AMDGPU::Swizzle; 6972 6973 return BITMASK_PERM_ENC | 6974 (AndMask << BITMASK_AND_SHIFT) | 6975 (OrMask << BITMASK_OR_SHIFT) | 6976 (XorMask << BITMASK_XOR_SHIFT); 6977 } 6978 6979 bool 6980 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6981 const unsigned MinVal, 6982 const unsigned MaxVal, 6983 const StringRef ErrMsg, 6984 SMLoc &Loc) { 6985 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6986 return false; 6987 } 6988 Loc = getLoc(); 6989 if (!parseExpr(Op)) { 6990 return false; 6991 } 6992 if (Op < MinVal || Op > MaxVal) { 6993 Error(Loc, ErrMsg); 6994 return false; 6995 } 6996 6997 return true; 6998 } 6999 7000 bool 7001 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7002 const unsigned MinVal, 7003 const unsigned MaxVal, 7004 const StringRef ErrMsg) { 7005 SMLoc Loc; 7006 for (unsigned i = 0; i < OpNum; ++i) { 7007 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7008 return false; 7009 } 7010 7011 return true; 7012 } 7013 7014 bool 7015 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7016 using namespace llvm::AMDGPU::Swizzle; 7017 7018 int64_t Lane[LANE_NUM]; 7019 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7020 "expected a 2-bit lane id")) { 7021 Imm = QUAD_PERM_ENC; 7022 for (unsigned I = 0; I < LANE_NUM; ++I) { 7023 Imm |= Lane[I] << (LANE_SHIFT * I); 7024 } 7025 return true; 7026 } 7027 return false; 7028 } 7029 7030 bool 7031 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7032 using namespace llvm::AMDGPU::Swizzle; 7033 7034 SMLoc Loc; 7035 int64_t GroupSize; 7036 int64_t LaneIdx; 7037 7038 if (!parseSwizzleOperand(GroupSize, 7039 2, 32, 7040 "group size must be in the interval [2,32]", 7041 Loc)) { 7042 return false; 7043 } 7044 if (!isPowerOf2_64(GroupSize)) { 7045 Error(Loc, "group size must be a power of two"); 7046 return false; 7047 } 7048 if (parseSwizzleOperand(LaneIdx, 7049 0, GroupSize - 1, 7050 "lane id must be in the interval [0,group size - 1]", 7051 Loc)) { 7052 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7053 return true; 7054 } 7055 return false; 7056 } 7057 7058 bool 7059 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7060 using namespace llvm::AMDGPU::Swizzle; 7061 7062 SMLoc Loc; 7063 int64_t GroupSize; 7064 7065 if (!parseSwizzleOperand(GroupSize, 7066 2, 32, 7067 "group size must be in the interval [2,32]", 7068 Loc)) { 7069 return false; 7070 } 7071 if (!isPowerOf2_64(GroupSize)) { 7072 Error(Loc, "group size must be a power of two"); 7073 return false; 7074 } 7075 7076 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7077 return true; 7078 } 7079 7080 bool 7081 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7082 using namespace llvm::AMDGPU::Swizzle; 7083 7084 SMLoc Loc; 7085 int64_t GroupSize; 7086 7087 if (!parseSwizzleOperand(GroupSize, 7088 1, 16, 7089 "group size must be in the interval [1,16]", 7090 Loc)) { 7091 return false; 7092 } 7093 if (!isPowerOf2_64(GroupSize)) { 7094 Error(Loc, "group size must be a power of two"); 7095 return false; 7096 } 7097 7098 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7099 return true; 7100 } 7101 7102 bool 7103 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7104 using namespace llvm::AMDGPU::Swizzle; 7105 7106 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7107 return false; 7108 } 7109 7110 StringRef Ctl; 7111 SMLoc StrLoc = getLoc(); 7112 if (!parseString(Ctl)) { 7113 return false; 7114 } 7115 if (Ctl.size() != BITMASK_WIDTH) { 7116 Error(StrLoc, "expected a 5-character mask"); 7117 return false; 7118 } 7119 7120 unsigned AndMask = 0; 7121 unsigned OrMask = 0; 7122 unsigned XorMask = 0; 7123 7124 for (size_t i = 0; i < Ctl.size(); ++i) { 7125 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7126 switch(Ctl[i]) { 7127 default: 7128 Error(StrLoc, "invalid mask"); 7129 return false; 7130 case '0': 7131 break; 7132 case '1': 7133 OrMask |= Mask; 7134 break; 7135 case 'p': 7136 AndMask |= Mask; 7137 break; 7138 case 'i': 7139 AndMask |= Mask; 7140 XorMask |= Mask; 7141 break; 7142 } 7143 } 7144 7145 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7146 return true; 7147 } 7148 7149 bool 7150 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7151 7152 SMLoc OffsetLoc = getLoc(); 7153 7154 if (!parseExpr(Imm, "a swizzle macro")) { 7155 return false; 7156 } 7157 if (!isUInt<16>(Imm)) { 7158 Error(OffsetLoc, "expected a 16-bit offset"); 7159 return false; 7160 } 7161 return true; 7162 } 7163 7164 bool 7165 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7166 using namespace llvm::AMDGPU::Swizzle; 7167 7168 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7169 7170 SMLoc ModeLoc = getLoc(); 7171 bool Ok = false; 7172 7173 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7174 Ok = parseSwizzleQuadPerm(Imm); 7175 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7176 Ok = parseSwizzleBitmaskPerm(Imm); 7177 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7178 Ok = parseSwizzleBroadcast(Imm); 7179 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7180 Ok = parseSwizzleSwap(Imm); 7181 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7182 Ok = parseSwizzleReverse(Imm); 7183 } else { 7184 Error(ModeLoc, "expected a swizzle mode"); 7185 } 7186 7187 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7188 } 7189 7190 return false; 7191 } 7192 7193 OperandMatchResultTy 7194 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7195 SMLoc S = getLoc(); 7196 int64_t Imm = 0; 7197 7198 if (trySkipId("offset")) { 7199 7200 bool Ok = false; 7201 if (skipToken(AsmToken::Colon, "expected a colon")) { 7202 if (trySkipId("swizzle")) { 7203 Ok = parseSwizzleMacro(Imm); 7204 } else { 7205 Ok = parseSwizzleOffset(Imm); 7206 } 7207 } 7208 7209 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7210 7211 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7212 } else { 7213 // Swizzle "offset" operand is optional. 7214 // If it is omitted, try parsing other optional operands. 7215 return parseOptionalOpr(Operands); 7216 } 7217 } 7218 7219 bool 7220 AMDGPUOperand::isSwizzle() const { 7221 return isImmTy(ImmTySwizzle); 7222 } 7223 7224 //===----------------------------------------------------------------------===// 7225 // VGPR Index Mode 7226 //===----------------------------------------------------------------------===// 7227 7228 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7229 7230 using namespace llvm::AMDGPU::VGPRIndexMode; 7231 7232 if (trySkipToken(AsmToken::RParen)) { 7233 return OFF; 7234 } 7235 7236 int64_t Imm = 0; 7237 7238 while (true) { 7239 unsigned Mode = 0; 7240 SMLoc S = getLoc(); 7241 7242 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7243 if (trySkipId(IdSymbolic[ModeId])) { 7244 Mode = 1 << ModeId; 7245 break; 7246 } 7247 } 7248 7249 if (Mode == 0) { 7250 Error(S, (Imm == 0)? 7251 "expected a VGPR index mode or a closing parenthesis" : 7252 "expected a VGPR index mode"); 7253 return UNDEF; 7254 } 7255 7256 if (Imm & Mode) { 7257 Error(S, "duplicate VGPR index mode"); 7258 return UNDEF; 7259 } 7260 Imm |= Mode; 7261 7262 if (trySkipToken(AsmToken::RParen)) 7263 break; 7264 if (!skipToken(AsmToken::Comma, 7265 "expected a comma or a closing parenthesis")) 7266 return UNDEF; 7267 } 7268 7269 return Imm; 7270 } 7271 7272 OperandMatchResultTy 7273 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7274 7275 using namespace llvm::AMDGPU::VGPRIndexMode; 7276 7277 int64_t Imm = 0; 7278 SMLoc S = getLoc(); 7279 7280 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7281 Imm = parseGPRIdxMacro(); 7282 if (Imm == UNDEF) 7283 return MatchOperand_ParseFail; 7284 } else { 7285 if (getParser().parseAbsoluteExpression(Imm)) 7286 return MatchOperand_ParseFail; 7287 if (Imm < 0 || !isUInt<4>(Imm)) { 7288 Error(S, "invalid immediate: only 4-bit values are legal"); 7289 return MatchOperand_ParseFail; 7290 } 7291 } 7292 7293 Operands.push_back( 7294 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7295 return MatchOperand_Success; 7296 } 7297 7298 bool AMDGPUOperand::isGPRIdxMode() const { 7299 return isImmTy(ImmTyGprIdxMode); 7300 } 7301 7302 //===----------------------------------------------------------------------===// 7303 // sopp branch targets 7304 //===----------------------------------------------------------------------===// 7305 7306 OperandMatchResultTy 7307 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7308 7309 // Make sure we are not parsing something 7310 // that looks like a label or an expression but is not. 7311 // This will improve error messages. 7312 if (isRegister() || isModifier()) 7313 return MatchOperand_NoMatch; 7314 7315 if (!parseExpr(Operands)) 7316 return MatchOperand_ParseFail; 7317 7318 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7319 assert(Opr.isImm() || Opr.isExpr()); 7320 SMLoc Loc = Opr.getStartLoc(); 7321 7322 // Currently we do not support arbitrary expressions as branch targets. 7323 // Only labels and absolute expressions are accepted. 7324 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7325 Error(Loc, "expected an absolute expression or a label"); 7326 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7327 Error(Loc, "expected a 16-bit signed jump offset"); 7328 } 7329 7330 return MatchOperand_Success; 7331 } 7332 7333 //===----------------------------------------------------------------------===// 7334 // Boolean holding registers 7335 //===----------------------------------------------------------------------===// 7336 7337 OperandMatchResultTy 7338 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7339 return parseReg(Operands); 7340 } 7341 7342 //===----------------------------------------------------------------------===// 7343 // mubuf 7344 //===----------------------------------------------------------------------===// 7345 7346 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7347 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7348 } 7349 7350 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7351 const OperandVector &Operands, 7352 bool IsAtomic, 7353 bool IsLds) { 7354 bool IsLdsOpcode = IsLds; 7355 bool HasLdsModifier = false; 7356 OptionalImmIndexMap OptionalIdx; 7357 unsigned FirstOperandIdx = 1; 7358 bool IsAtomicReturn = false; 7359 7360 if (IsAtomic) { 7361 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7363 if (!Op.isCPol()) 7364 continue; 7365 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7366 break; 7367 } 7368 7369 if (!IsAtomicReturn) { 7370 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7371 if (NewOpc != -1) 7372 Inst.setOpcode(NewOpc); 7373 } 7374 7375 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7376 SIInstrFlags::IsAtomicRet; 7377 } 7378 7379 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7380 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7381 7382 // Add the register arguments 7383 if (Op.isReg()) { 7384 Op.addRegOperands(Inst, 1); 7385 // Insert a tied src for atomic return dst. 7386 // This cannot be postponed as subsequent calls to 7387 // addImmOperands rely on correct number of MC operands. 7388 if (IsAtomicReturn && i == FirstOperandIdx) 7389 Op.addRegOperands(Inst, 1); 7390 continue; 7391 } 7392 7393 // Handle the case where soffset is an immediate 7394 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7395 Op.addImmOperands(Inst, 1); 7396 continue; 7397 } 7398 7399 HasLdsModifier |= Op.isLDS(); 7400 7401 // Handle tokens like 'offen' which are sometimes hard-coded into the 7402 // asm string. There are no MCInst operands for these. 7403 if (Op.isToken()) { 7404 continue; 7405 } 7406 assert(Op.isImm()); 7407 7408 // Handle optional arguments 7409 OptionalIdx[Op.getImmTy()] = i; 7410 } 7411 7412 // This is a workaround for an llvm quirk which may result in an 7413 // incorrect instruction selection. Lds and non-lds versions of 7414 // MUBUF instructions are identical except that lds versions 7415 // have mandatory 'lds' modifier. However this modifier follows 7416 // optional modifiers and llvm asm matcher regards this 'lds' 7417 // modifier as an optional one. As a result, an lds version 7418 // of opcode may be selected even if it has no 'lds' modifier. 7419 if (IsLdsOpcode && !HasLdsModifier) { 7420 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7421 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7422 Inst.setOpcode(NoLdsOpcode); 7423 IsLdsOpcode = false; 7424 } 7425 } 7426 7427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7429 7430 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7431 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7432 } 7433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7434 } 7435 7436 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7437 OptionalImmIndexMap OptionalIdx; 7438 7439 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7440 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7441 7442 // Add the register arguments 7443 if (Op.isReg()) { 7444 Op.addRegOperands(Inst, 1); 7445 continue; 7446 } 7447 7448 // Handle the case where soffset is an immediate 7449 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7450 Op.addImmOperands(Inst, 1); 7451 continue; 7452 } 7453 7454 // Handle tokens like 'offen' which are sometimes hard-coded into the 7455 // asm string. There are no MCInst operands for these. 7456 if (Op.isToken()) { 7457 continue; 7458 } 7459 assert(Op.isImm()); 7460 7461 // Handle optional arguments 7462 OptionalIdx[Op.getImmTy()] = i; 7463 } 7464 7465 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7466 AMDGPUOperand::ImmTyOffset); 7467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7471 } 7472 7473 //===----------------------------------------------------------------------===// 7474 // mimg 7475 //===----------------------------------------------------------------------===// 7476 7477 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7478 bool IsAtomic) { 7479 unsigned I = 1; 7480 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7481 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7482 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7483 } 7484 7485 if (IsAtomic) { 7486 // Add src, same as dst 7487 assert(Desc.getNumDefs() == 1); 7488 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7489 } 7490 7491 OptionalImmIndexMap OptionalIdx; 7492 7493 for (unsigned E = Operands.size(); I != E; ++I) { 7494 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7495 7496 // Add the register arguments 7497 if (Op.isReg()) { 7498 Op.addRegOperands(Inst, 1); 7499 } else if (Op.isImmModifier()) { 7500 OptionalIdx[Op.getImmTy()] = I; 7501 } else if (!Op.isToken()) { 7502 llvm_unreachable("unexpected operand type"); 7503 } 7504 } 7505 7506 bool IsGFX10Plus = isGFX10Plus(); 7507 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7509 if (IsGFX10Plus) 7510 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7511 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7514 if (IsGFX10Plus) 7515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7516 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7519 if (!IsGFX10Plus) 7520 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7522 } 7523 7524 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7525 cvtMIMG(Inst, Operands, true); 7526 } 7527 7528 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7529 OptionalImmIndexMap OptionalIdx; 7530 bool IsAtomicReturn = false; 7531 7532 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7533 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7534 if (!Op.isCPol()) 7535 continue; 7536 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7537 break; 7538 } 7539 7540 if (!IsAtomicReturn) { 7541 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7542 if (NewOpc != -1) 7543 Inst.setOpcode(NewOpc); 7544 } 7545 7546 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7547 SIInstrFlags::IsAtomicRet; 7548 7549 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7551 7552 // Add the register arguments 7553 if (Op.isReg()) { 7554 Op.addRegOperands(Inst, 1); 7555 if (IsAtomicReturn && i == 1) 7556 Op.addRegOperands(Inst, 1); 7557 continue; 7558 } 7559 7560 // Handle the case where soffset is an immediate 7561 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7562 Op.addImmOperands(Inst, 1); 7563 continue; 7564 } 7565 7566 // Handle tokens like 'offen' which are sometimes hard-coded into the 7567 // asm string. There are no MCInst operands for these. 7568 if (Op.isToken()) { 7569 continue; 7570 } 7571 assert(Op.isImm()); 7572 7573 // Handle optional arguments 7574 OptionalIdx[Op.getImmTy()] = i; 7575 } 7576 7577 if ((int)Inst.getNumOperands() <= 7578 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7581 } 7582 7583 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7584 const OperandVector &Operands) { 7585 for (unsigned I = 1; I < Operands.size(); ++I) { 7586 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7587 if (Operand.isReg()) 7588 Operand.addRegOperands(Inst, 1); 7589 } 7590 7591 Inst.addOperand(MCOperand::createImm(1)); // a16 7592 } 7593 7594 //===----------------------------------------------------------------------===// 7595 // smrd 7596 //===----------------------------------------------------------------------===// 7597 7598 bool AMDGPUOperand::isSMRDOffset8() const { 7599 return isImm() && isUInt<8>(getImm()); 7600 } 7601 7602 bool AMDGPUOperand::isSMEMOffset() const { 7603 return isImm(); // Offset range is checked later by validator. 7604 } 7605 7606 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7607 // 32-bit literals are only supported on CI and we only want to use them 7608 // when the offset is > 8-bits. 7609 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7610 } 7611 7612 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7613 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7614 } 7615 7616 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7617 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7618 } 7619 7620 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7621 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7622 } 7623 7624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7625 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7626 } 7627 7628 //===----------------------------------------------------------------------===// 7629 // vop3 7630 //===----------------------------------------------------------------------===// 7631 7632 static bool ConvertOmodMul(int64_t &Mul) { 7633 if (Mul != 1 && Mul != 2 && Mul != 4) 7634 return false; 7635 7636 Mul >>= 1; 7637 return true; 7638 } 7639 7640 static bool ConvertOmodDiv(int64_t &Div) { 7641 if (Div == 1) { 7642 Div = 0; 7643 return true; 7644 } 7645 7646 if (Div == 2) { 7647 Div = 3; 7648 return true; 7649 } 7650 7651 return false; 7652 } 7653 7654 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7655 // This is intentional and ensures compatibility with sp3. 7656 // See bug 35397 for details. 7657 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7658 if (BoundCtrl == 0 || BoundCtrl == 1) { 7659 BoundCtrl = 1; 7660 return true; 7661 } 7662 return false; 7663 } 7664 7665 // Note: the order in this table matches the order of operands in AsmString. 7666 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7667 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7668 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7669 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7670 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7671 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7672 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7673 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7674 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7675 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7676 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7677 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7678 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7679 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7680 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7681 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7682 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7683 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7684 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7685 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7686 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7687 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7688 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7689 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7690 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7691 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7692 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7693 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7694 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7695 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7696 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7697 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7698 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7699 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7700 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7701 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7702 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7703 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7704 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7705 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7706 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7707 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7708 }; 7709 7710 void AMDGPUAsmParser::onBeginOfFile() { 7711 if (!getParser().getStreamer().getTargetStreamer() || 7712 getSTI().getTargetTriple().getArch() == Triple::r600) 7713 return; 7714 7715 if (!getTargetStreamer().getTargetID()) 7716 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7717 7718 if (isHsaAbiVersion3AndAbove(&getSTI())) 7719 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7720 } 7721 7722 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7723 7724 OperandMatchResultTy res = parseOptionalOpr(Operands); 7725 7726 // This is a hack to enable hardcoded mandatory operands which follow 7727 // optional operands. 7728 // 7729 // Current design assumes that all operands after the first optional operand 7730 // are also optional. However implementation of some instructions violates 7731 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7732 // 7733 // To alleviate this problem, we have to (implicitly) parse extra operands 7734 // to make sure autogenerated parser of custom operands never hit hardcoded 7735 // mandatory operands. 7736 7737 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7738 if (res != MatchOperand_Success || 7739 isToken(AsmToken::EndOfStatement)) 7740 break; 7741 7742 trySkipToken(AsmToken::Comma); 7743 res = parseOptionalOpr(Operands); 7744 } 7745 7746 return res; 7747 } 7748 7749 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7750 OperandMatchResultTy res; 7751 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7752 // try to parse any optional operand here 7753 if (Op.IsBit) { 7754 res = parseNamedBit(Op.Name, Operands, Op.Type); 7755 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7756 res = parseOModOperand(Operands); 7757 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7758 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7759 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7760 res = parseSDWASel(Operands, Op.Name, Op.Type); 7761 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7762 res = parseSDWADstUnused(Operands); 7763 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7764 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7765 Op.Type == AMDGPUOperand::ImmTyNegLo || 7766 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7767 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7768 Op.ConvertResult); 7769 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7770 res = parseDim(Operands); 7771 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7772 res = parseCPol(Operands); 7773 } else { 7774 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7775 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7776 res = parseOperandArrayWithPrefix("neg", Operands, 7777 AMDGPUOperand::ImmTyBLGP, 7778 nullptr); 7779 } 7780 } 7781 if (res != MatchOperand_NoMatch) { 7782 return res; 7783 } 7784 } 7785 return MatchOperand_NoMatch; 7786 } 7787 7788 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7789 StringRef Name = getTokenStr(); 7790 if (Name == "mul") { 7791 return parseIntWithPrefix("mul", Operands, 7792 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7793 } 7794 7795 if (Name == "div") { 7796 return parseIntWithPrefix("div", Operands, 7797 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7798 } 7799 7800 return MatchOperand_NoMatch; 7801 } 7802 7803 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7804 cvtVOP3P(Inst, Operands); 7805 7806 int Opc = Inst.getOpcode(); 7807 7808 int SrcNum; 7809 const int Ops[] = { AMDGPU::OpName::src0, 7810 AMDGPU::OpName::src1, 7811 AMDGPU::OpName::src2 }; 7812 for (SrcNum = 0; 7813 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7814 ++SrcNum); 7815 assert(SrcNum > 0); 7816 7817 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7818 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7819 7820 if ((OpSel & (1 << SrcNum)) != 0) { 7821 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7822 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7823 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7824 } 7825 } 7826 7827 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7828 // 1. This operand is input modifiers 7829 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7830 // 2. This is not last operand 7831 && Desc.NumOperands > (OpNum + 1) 7832 // 3. Next operand is register class 7833 && Desc.OpInfo[OpNum + 1].RegClass != -1 7834 // 4. Next register is not tied to any other operand 7835 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7836 } 7837 7838 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7839 { 7840 OptionalImmIndexMap OptionalIdx; 7841 unsigned Opc = Inst.getOpcode(); 7842 7843 unsigned I = 1; 7844 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7845 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7846 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7847 } 7848 7849 for (unsigned E = Operands.size(); I != E; ++I) { 7850 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7851 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7852 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7853 } else if (Op.isInterpSlot() || 7854 Op.isInterpAttr() || 7855 Op.isAttrChan()) { 7856 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7857 } else if (Op.isImmModifier()) { 7858 OptionalIdx[Op.getImmTy()] = I; 7859 } else { 7860 llvm_unreachable("unhandled operand type"); 7861 } 7862 } 7863 7864 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7865 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7866 } 7867 7868 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7870 } 7871 7872 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7874 } 7875 } 7876 7877 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7878 OptionalImmIndexMap &OptionalIdx) { 7879 unsigned Opc = Inst.getOpcode(); 7880 7881 unsigned I = 1; 7882 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7883 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7884 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7885 } 7886 7887 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7888 // This instruction has src modifiers 7889 for (unsigned E = Operands.size(); I != E; ++I) { 7890 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7891 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7892 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7893 } else if (Op.isImmModifier()) { 7894 OptionalIdx[Op.getImmTy()] = I; 7895 } else if (Op.isRegOrImm()) { 7896 Op.addRegOrImmOperands(Inst, 1); 7897 } else { 7898 llvm_unreachable("unhandled operand type"); 7899 } 7900 } 7901 } else { 7902 // No src modifiers 7903 for (unsigned E = Operands.size(); I != E; ++I) { 7904 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7905 if (Op.isMod()) { 7906 OptionalIdx[Op.getImmTy()] = I; 7907 } else { 7908 Op.addRegOrImmOperands(Inst, 1); 7909 } 7910 } 7911 } 7912 7913 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7915 } 7916 7917 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7918 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7919 } 7920 7921 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7922 // it has src2 register operand that is tied to dst operand 7923 // we don't allow modifiers for this operand in assembler so src2_modifiers 7924 // should be 0. 7925 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7926 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7927 Opc == AMDGPU::V_MAC_F32_e64_vi || 7928 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7929 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7930 Opc == AMDGPU::V_MAC_F16_e64_vi || 7931 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7932 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7933 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7934 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7935 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7936 auto it = Inst.begin(); 7937 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7938 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7939 ++it; 7940 // Copy the operand to ensure it's not invalidated when Inst grows. 7941 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7942 } 7943 } 7944 7945 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7946 OptionalImmIndexMap OptionalIdx; 7947 cvtVOP3(Inst, Operands, OptionalIdx); 7948 } 7949 7950 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7951 OptionalImmIndexMap &OptIdx) { 7952 const int Opc = Inst.getOpcode(); 7953 const MCInstrDesc &Desc = MII.get(Opc); 7954 7955 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7956 7957 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7958 assert(!IsPacked); 7959 Inst.addOperand(Inst.getOperand(0)); 7960 } 7961 7962 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7963 // instruction, and then figure out where to actually put the modifiers 7964 7965 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7966 if (OpSelIdx != -1) { 7967 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7968 } 7969 7970 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7971 if (OpSelHiIdx != -1) { 7972 int DefaultVal = IsPacked ? -1 : 0; 7973 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7974 DefaultVal); 7975 } 7976 7977 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7978 if (NegLoIdx != -1) { 7979 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7980 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7981 } 7982 7983 const int Ops[] = { AMDGPU::OpName::src0, 7984 AMDGPU::OpName::src1, 7985 AMDGPU::OpName::src2 }; 7986 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7987 AMDGPU::OpName::src1_modifiers, 7988 AMDGPU::OpName::src2_modifiers }; 7989 7990 unsigned OpSel = 0; 7991 unsigned OpSelHi = 0; 7992 unsigned NegLo = 0; 7993 unsigned NegHi = 0; 7994 7995 if (OpSelIdx != -1) 7996 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7997 7998 if (OpSelHiIdx != -1) 7999 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8000 8001 if (NegLoIdx != -1) { 8002 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8003 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8004 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8005 } 8006 8007 for (int J = 0; J < 3; ++J) { 8008 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8009 if (OpIdx == -1) 8010 break; 8011 8012 uint32_t ModVal = 0; 8013 8014 if ((OpSel & (1 << J)) != 0) 8015 ModVal |= SISrcMods::OP_SEL_0; 8016 8017 if ((OpSelHi & (1 << J)) != 0) 8018 ModVal |= SISrcMods::OP_SEL_1; 8019 8020 if ((NegLo & (1 << J)) != 0) 8021 ModVal |= SISrcMods::NEG; 8022 8023 if ((NegHi & (1 << J)) != 0) 8024 ModVal |= SISrcMods::NEG_HI; 8025 8026 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8027 8028 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8029 } 8030 } 8031 8032 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8033 OptionalImmIndexMap OptIdx; 8034 cvtVOP3(Inst, Operands, OptIdx); 8035 cvtVOP3P(Inst, Operands, OptIdx); 8036 } 8037 8038 //===----------------------------------------------------------------------===// 8039 // dpp 8040 //===----------------------------------------------------------------------===// 8041 8042 bool AMDGPUOperand::isDPP8() const { 8043 return isImmTy(ImmTyDPP8); 8044 } 8045 8046 bool AMDGPUOperand::isDPPCtrl() const { 8047 using namespace AMDGPU::DPP; 8048 8049 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8050 if (result) { 8051 int64_t Imm = getImm(); 8052 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8053 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8054 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8055 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8056 (Imm == DppCtrl::WAVE_SHL1) || 8057 (Imm == DppCtrl::WAVE_ROL1) || 8058 (Imm == DppCtrl::WAVE_SHR1) || 8059 (Imm == DppCtrl::WAVE_ROR1) || 8060 (Imm == DppCtrl::ROW_MIRROR) || 8061 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8062 (Imm == DppCtrl::BCAST15) || 8063 (Imm == DppCtrl::BCAST31) || 8064 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8065 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8066 } 8067 return false; 8068 } 8069 8070 //===----------------------------------------------------------------------===// 8071 // mAI 8072 //===----------------------------------------------------------------------===// 8073 8074 bool AMDGPUOperand::isBLGP() const { 8075 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8076 } 8077 8078 bool AMDGPUOperand::isCBSZ() const { 8079 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8080 } 8081 8082 bool AMDGPUOperand::isABID() const { 8083 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8084 } 8085 8086 bool AMDGPUOperand::isS16Imm() const { 8087 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8088 } 8089 8090 bool AMDGPUOperand::isU16Imm() const { 8091 return isImm() && isUInt<16>(getImm()); 8092 } 8093 8094 //===----------------------------------------------------------------------===// 8095 // dim 8096 //===----------------------------------------------------------------------===// 8097 8098 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8099 // We want to allow "dim:1D" etc., 8100 // but the initial 1 is tokenized as an integer. 8101 std::string Token; 8102 if (isToken(AsmToken::Integer)) { 8103 SMLoc Loc = getToken().getEndLoc(); 8104 Token = std::string(getTokenStr()); 8105 lex(); 8106 if (getLoc() != Loc) 8107 return false; 8108 } 8109 8110 StringRef Suffix; 8111 if (!parseId(Suffix)) 8112 return false; 8113 Token += Suffix; 8114 8115 StringRef DimId = Token; 8116 if (DimId.startswith("SQ_RSRC_IMG_")) 8117 DimId = DimId.drop_front(12); 8118 8119 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8120 if (!DimInfo) 8121 return false; 8122 8123 Encoding = DimInfo->Encoding; 8124 return true; 8125 } 8126 8127 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8128 if (!isGFX10Plus()) 8129 return MatchOperand_NoMatch; 8130 8131 SMLoc S = getLoc(); 8132 8133 if (!trySkipId("dim", AsmToken::Colon)) 8134 return MatchOperand_NoMatch; 8135 8136 unsigned Encoding; 8137 SMLoc Loc = getLoc(); 8138 if (!parseDimId(Encoding)) { 8139 Error(Loc, "invalid dim value"); 8140 return MatchOperand_ParseFail; 8141 } 8142 8143 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8144 AMDGPUOperand::ImmTyDim)); 8145 return MatchOperand_Success; 8146 } 8147 8148 //===----------------------------------------------------------------------===// 8149 // dpp 8150 //===----------------------------------------------------------------------===// 8151 8152 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8153 SMLoc S = getLoc(); 8154 8155 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8156 return MatchOperand_NoMatch; 8157 8158 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8159 8160 int64_t Sels[8]; 8161 8162 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8163 return MatchOperand_ParseFail; 8164 8165 for (size_t i = 0; i < 8; ++i) { 8166 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8167 return MatchOperand_ParseFail; 8168 8169 SMLoc Loc = getLoc(); 8170 if (getParser().parseAbsoluteExpression(Sels[i])) 8171 return MatchOperand_ParseFail; 8172 if (0 > Sels[i] || 7 < Sels[i]) { 8173 Error(Loc, "expected a 3-bit value"); 8174 return MatchOperand_ParseFail; 8175 } 8176 } 8177 8178 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8179 return MatchOperand_ParseFail; 8180 8181 unsigned DPP8 = 0; 8182 for (size_t i = 0; i < 8; ++i) 8183 DPP8 |= (Sels[i] << (i * 3)); 8184 8185 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8186 return MatchOperand_Success; 8187 } 8188 8189 bool 8190 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8191 const OperandVector &Operands) { 8192 if (Ctrl == "row_newbcast") 8193 return isGFX90A(); 8194 8195 if (Ctrl == "row_share" || 8196 Ctrl == "row_xmask") 8197 return isGFX10Plus(); 8198 8199 if (Ctrl == "wave_shl" || 8200 Ctrl == "wave_shr" || 8201 Ctrl == "wave_rol" || 8202 Ctrl == "wave_ror" || 8203 Ctrl == "row_bcast") 8204 return isVI() || isGFX9(); 8205 8206 return Ctrl == "row_mirror" || 8207 Ctrl == "row_half_mirror" || 8208 Ctrl == "quad_perm" || 8209 Ctrl == "row_shl" || 8210 Ctrl == "row_shr" || 8211 Ctrl == "row_ror"; 8212 } 8213 8214 int64_t 8215 AMDGPUAsmParser::parseDPPCtrlPerm() { 8216 // quad_perm:[%d,%d,%d,%d] 8217 8218 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8219 return -1; 8220 8221 int64_t Val = 0; 8222 for (int i = 0; i < 4; ++i) { 8223 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8224 return -1; 8225 8226 int64_t Temp; 8227 SMLoc Loc = getLoc(); 8228 if (getParser().parseAbsoluteExpression(Temp)) 8229 return -1; 8230 if (Temp < 0 || Temp > 3) { 8231 Error(Loc, "expected a 2-bit value"); 8232 return -1; 8233 } 8234 8235 Val += (Temp << i * 2); 8236 } 8237 8238 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8239 return -1; 8240 8241 return Val; 8242 } 8243 8244 int64_t 8245 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8246 using namespace AMDGPU::DPP; 8247 8248 // sel:%d 8249 8250 int64_t Val; 8251 SMLoc Loc = getLoc(); 8252 8253 if (getParser().parseAbsoluteExpression(Val)) 8254 return -1; 8255 8256 struct DppCtrlCheck { 8257 int64_t Ctrl; 8258 int Lo; 8259 int Hi; 8260 }; 8261 8262 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8263 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8264 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8265 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8266 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8267 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8268 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8269 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8270 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8271 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8272 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8273 .Default({-1, 0, 0}); 8274 8275 bool Valid; 8276 if (Check.Ctrl == -1) { 8277 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8278 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8279 } else { 8280 Valid = Check.Lo <= Val && Val <= Check.Hi; 8281 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8282 } 8283 8284 if (!Valid) { 8285 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8286 return -1; 8287 } 8288 8289 return Val; 8290 } 8291 8292 OperandMatchResultTy 8293 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8294 using namespace AMDGPU::DPP; 8295 8296 if (!isToken(AsmToken::Identifier) || 8297 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8298 return MatchOperand_NoMatch; 8299 8300 SMLoc S = getLoc(); 8301 int64_t Val = -1; 8302 StringRef Ctrl; 8303 8304 parseId(Ctrl); 8305 8306 if (Ctrl == "row_mirror") { 8307 Val = DppCtrl::ROW_MIRROR; 8308 } else if (Ctrl == "row_half_mirror") { 8309 Val = DppCtrl::ROW_HALF_MIRROR; 8310 } else { 8311 if (skipToken(AsmToken::Colon, "expected a colon")) { 8312 if (Ctrl == "quad_perm") { 8313 Val = parseDPPCtrlPerm(); 8314 } else { 8315 Val = parseDPPCtrlSel(Ctrl); 8316 } 8317 } 8318 } 8319 8320 if (Val == -1) 8321 return MatchOperand_ParseFail; 8322 8323 Operands.push_back( 8324 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8325 return MatchOperand_Success; 8326 } 8327 8328 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8329 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8330 } 8331 8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8333 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8334 } 8335 8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8337 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8338 } 8339 8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8341 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8342 } 8343 8344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8345 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8346 } 8347 8348 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8349 OptionalImmIndexMap OptionalIdx; 8350 8351 unsigned Opc = Inst.getOpcode(); 8352 bool HasModifiers = 8353 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8354 unsigned I = 1; 8355 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8356 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8357 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8358 } 8359 8360 int Fi = 0; 8361 for (unsigned E = Operands.size(); I != E; ++I) { 8362 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8363 MCOI::TIED_TO); 8364 if (TiedTo != -1) { 8365 assert((unsigned)TiedTo < Inst.getNumOperands()); 8366 // handle tied old or src2 for MAC instructions 8367 Inst.addOperand(Inst.getOperand(TiedTo)); 8368 } 8369 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8370 // Add the register arguments 8371 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8372 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8373 // Skip it. 8374 continue; 8375 } 8376 8377 if (IsDPP8) { 8378 if (Op.isDPP8()) { 8379 Op.addImmOperands(Inst, 1); 8380 } else if (HasModifiers && 8381 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8382 Op.addRegWithFPInputModsOperands(Inst, 2); 8383 } else if (Op.isFI()) { 8384 Fi = Op.getImm(); 8385 } else if (Op.isReg()) { 8386 Op.addRegOperands(Inst, 1); 8387 } else { 8388 llvm_unreachable("Invalid operand type"); 8389 } 8390 } else { 8391 if (HasModifiers && 8392 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8393 Op.addRegWithFPInputModsOperands(Inst, 2); 8394 } else if (Op.isReg()) { 8395 Op.addRegOperands(Inst, 1); 8396 } else if (Op.isDPPCtrl()) { 8397 Op.addImmOperands(Inst, 1); 8398 } else if (Op.isImm()) { 8399 // Handle optional arguments 8400 OptionalIdx[Op.getImmTy()] = I; 8401 } else { 8402 llvm_unreachable("Invalid operand type"); 8403 } 8404 } 8405 } 8406 8407 if (IsDPP8) { 8408 using namespace llvm::AMDGPU::DPP; 8409 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8410 } else { 8411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8414 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8416 } 8417 } 8418 } 8419 8420 //===----------------------------------------------------------------------===// 8421 // sdwa 8422 //===----------------------------------------------------------------------===// 8423 8424 OperandMatchResultTy 8425 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8426 AMDGPUOperand::ImmTy Type) { 8427 using namespace llvm::AMDGPU::SDWA; 8428 8429 SMLoc S = getLoc(); 8430 StringRef Value; 8431 OperandMatchResultTy res; 8432 8433 SMLoc StringLoc; 8434 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8435 if (res != MatchOperand_Success) { 8436 return res; 8437 } 8438 8439 int64_t Int; 8440 Int = StringSwitch<int64_t>(Value) 8441 .Case("BYTE_0", SdwaSel::BYTE_0) 8442 .Case("BYTE_1", SdwaSel::BYTE_1) 8443 .Case("BYTE_2", SdwaSel::BYTE_2) 8444 .Case("BYTE_3", SdwaSel::BYTE_3) 8445 .Case("WORD_0", SdwaSel::WORD_0) 8446 .Case("WORD_1", SdwaSel::WORD_1) 8447 .Case("DWORD", SdwaSel::DWORD) 8448 .Default(0xffffffff); 8449 8450 if (Int == 0xffffffff) { 8451 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8452 return MatchOperand_ParseFail; 8453 } 8454 8455 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8456 return MatchOperand_Success; 8457 } 8458 8459 OperandMatchResultTy 8460 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8461 using namespace llvm::AMDGPU::SDWA; 8462 8463 SMLoc S = getLoc(); 8464 StringRef Value; 8465 OperandMatchResultTy res; 8466 8467 SMLoc StringLoc; 8468 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8469 if (res != MatchOperand_Success) { 8470 return res; 8471 } 8472 8473 int64_t Int; 8474 Int = StringSwitch<int64_t>(Value) 8475 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8476 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8477 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8478 .Default(0xffffffff); 8479 8480 if (Int == 0xffffffff) { 8481 Error(StringLoc, "invalid dst_unused value"); 8482 return MatchOperand_ParseFail; 8483 } 8484 8485 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8486 return MatchOperand_Success; 8487 } 8488 8489 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8490 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8491 } 8492 8493 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8494 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8495 } 8496 8497 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8498 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8499 } 8500 8501 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8502 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8503 } 8504 8505 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8506 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8507 } 8508 8509 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8510 uint64_t BasicInstType, 8511 bool SkipDstVcc, 8512 bool SkipSrcVcc) { 8513 using namespace llvm::AMDGPU::SDWA; 8514 8515 OptionalImmIndexMap OptionalIdx; 8516 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8517 bool SkippedVcc = false; 8518 8519 unsigned I = 1; 8520 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8521 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8522 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8523 } 8524 8525 for (unsigned E = Operands.size(); I != E; ++I) { 8526 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8527 if (SkipVcc && !SkippedVcc && Op.isReg() && 8528 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8529 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8530 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8531 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8532 // Skip VCC only if we didn't skip it on previous iteration. 8533 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8534 if (BasicInstType == SIInstrFlags::VOP2 && 8535 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8536 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8537 SkippedVcc = true; 8538 continue; 8539 } else if (BasicInstType == SIInstrFlags::VOPC && 8540 Inst.getNumOperands() == 0) { 8541 SkippedVcc = true; 8542 continue; 8543 } 8544 } 8545 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8546 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8547 } else if (Op.isImm()) { 8548 // Handle optional arguments 8549 OptionalIdx[Op.getImmTy()] = I; 8550 } else { 8551 llvm_unreachable("Invalid operand type"); 8552 } 8553 SkippedVcc = false; 8554 } 8555 8556 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8557 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8558 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8559 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8560 switch (BasicInstType) { 8561 case SIInstrFlags::VOP1: 8562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8563 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8564 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8565 } 8566 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8569 break; 8570 8571 case SIInstrFlags::VOP2: 8572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8573 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8575 } 8576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8580 break; 8581 8582 case SIInstrFlags::VOPC: 8583 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8584 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8587 break; 8588 8589 default: 8590 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8591 } 8592 } 8593 8594 // special case v_mac_{f16, f32}: 8595 // it has src2 register operand that is tied to dst operand 8596 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8597 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8598 auto it = Inst.begin(); 8599 std::advance( 8600 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8601 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8602 } 8603 } 8604 8605 //===----------------------------------------------------------------------===// 8606 // mAI 8607 //===----------------------------------------------------------------------===// 8608 8609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8610 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8611 } 8612 8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8614 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8615 } 8616 8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8618 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8619 } 8620 8621 /// Force static initialization. 8622 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8623 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8624 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8625 } 8626 8627 #define GET_REGISTER_MATCHER 8628 #define GET_MATCHER_IMPLEMENTATION 8629 #define GET_MNEMONIC_SPELL_CHECKER 8630 #define GET_MNEMONIC_CHECKER 8631 #include "AMDGPUGenAsmMatcher.inc" 8632 8633 // This function should be defined after auto-generated include so that we have 8634 // MatchClassKind enum defined 8635 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8636 unsigned Kind) { 8637 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8638 // But MatchInstructionImpl() expects to meet token and fails to validate 8639 // operand. This method checks if we are given immediate operand but expect to 8640 // get corresponding token. 8641 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8642 switch (Kind) { 8643 case MCK_addr64: 8644 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8645 case MCK_gds: 8646 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8647 case MCK_lds: 8648 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8649 case MCK_idxen: 8650 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8651 case MCK_offen: 8652 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8653 case MCK_SSrcB32: 8654 // When operands have expression values, they will return true for isToken, 8655 // because it is not possible to distinguish between a token and an 8656 // expression at parse time. MatchInstructionImpl() will always try to 8657 // match an operand as a token, when isToken returns true, and when the 8658 // name of the expression is not a valid token, the match will fail, 8659 // so we need to handle it here. 8660 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8661 case MCK_SSrcF32: 8662 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8663 case MCK_SoppBrTarget: 8664 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8665 case MCK_VReg32OrOff: 8666 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8667 case MCK_InterpSlot: 8668 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8669 case MCK_Attr: 8670 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8671 case MCK_AttrChan: 8672 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8673 case MCK_ImmSMEMOffset: 8674 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8675 case MCK_SReg_64: 8676 case MCK_SReg_64_XEXEC: 8677 // Null is defined as a 32-bit register but 8678 // it should also be enabled with 64-bit operands. 8679 // The following code enables it for SReg_64 operands 8680 // used as source and destination. Remaining source 8681 // operands are handled in isInlinableImm. 8682 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8683 default: 8684 return Match_InvalidOperand; 8685 } 8686 } 8687 8688 //===----------------------------------------------------------------------===// 8689 // endpgm 8690 //===----------------------------------------------------------------------===// 8691 8692 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8693 SMLoc S = getLoc(); 8694 int64_t Imm = 0; 8695 8696 if (!parseExpr(Imm)) { 8697 // The operand is optional, if not present default to 0 8698 Imm = 0; 8699 } 8700 8701 if (!isUInt<16>(Imm)) { 8702 Error(S, "expected a 16-bit value"); 8703 return MatchOperand_ParseFail; 8704 } 8705 8706 Operands.push_back( 8707 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8708 return MatchOperand_Success; 8709 } 8710 8711 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8712