1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool isSI() const { 1333 return AMDGPU::isSI(getSTI()); 1334 } 1335 1336 bool isCI() const { 1337 return AMDGPU::isCI(getSTI()); 1338 } 1339 1340 bool isVI() const { 1341 return AMDGPU::isVI(getSTI()); 1342 } 1343 1344 bool isGFX9() const { 1345 return AMDGPU::isGFX9(getSTI()); 1346 } 1347 1348 bool isGFX90A() const { 1349 return AMDGPU::isGFX90A(getSTI()); 1350 } 1351 1352 bool isGFX9Plus() const { 1353 return AMDGPU::isGFX9Plus(getSTI()); 1354 } 1355 1356 bool isGFX10() const { 1357 return AMDGPU::isGFX10(getSTI()); 1358 } 1359 1360 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1361 1362 bool isGFX10_BEncoding() const { 1363 return AMDGPU::isGFX10_BEncoding(getSTI()); 1364 } 1365 1366 bool hasInv2PiInlineImm() const { 1367 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1368 } 1369 1370 bool hasFlatOffsets() const { 1371 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1372 } 1373 1374 bool hasSGPR102_SGPR103() const { 1375 return !isVI() && !isGFX9(); 1376 } 1377 1378 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1379 1380 bool hasIntClamp() const { 1381 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1382 } 1383 1384 AMDGPUTargetStreamer &getTargetStreamer() { 1385 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1386 return static_cast<AMDGPUTargetStreamer &>(TS); 1387 } 1388 1389 const MCRegisterInfo *getMRI() const { 1390 // We need this const_cast because for some reason getContext() is not const 1391 // in MCAsmParser. 1392 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1393 } 1394 1395 const MCInstrInfo *getMII() const { 1396 return &MII; 1397 } 1398 1399 const FeatureBitset &getFeatureBits() const { 1400 return getSTI().getFeatureBits(); 1401 } 1402 1403 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1404 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1405 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1406 1407 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1408 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1409 bool isForcedDPP() const { return ForcedDPP; } 1410 bool isForcedSDWA() const { return ForcedSDWA; } 1411 ArrayRef<unsigned> getMatchedVariants() const; 1412 StringRef getMatchedVariantName() const; 1413 1414 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1415 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1416 bool RestoreOnFailure); 1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1418 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1419 SMLoc &EndLoc) override; 1420 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1421 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1422 unsigned Kind) override; 1423 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1424 OperandVector &Operands, MCStreamer &Out, 1425 uint64_t &ErrorInfo, 1426 bool MatchingInlineAsm) override; 1427 bool ParseDirective(AsmToken DirectiveID) override; 1428 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1429 OperandMode Mode = OperandMode_Default); 1430 StringRef parseMnemonicSuffix(StringRef Name); 1431 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1432 SMLoc NameLoc, OperandVector &Operands) override; 1433 //bool ProcessInstruction(MCInst &Inst); 1434 1435 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1436 1437 OperandMatchResultTy 1438 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1439 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1440 bool (*ConvertResult)(int64_t &) = nullptr); 1441 1442 OperandMatchResultTy 1443 parseOperandArrayWithPrefix(const char *Prefix, 1444 OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t&) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseNamedBit(StringRef Name, OperandVector &Operands, 1450 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1451 OperandMatchResultTy parseCPol(OperandVector &Operands); 1452 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1453 StringRef &Value, 1454 SMLoc &StringLoc); 1455 1456 bool isModifier(); 1457 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1458 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1459 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1460 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool parseSP3NegModifier(); 1462 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1463 OperandMatchResultTy parseReg(OperandVector &Operands); 1464 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1465 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1466 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1467 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1468 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1469 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1470 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1471 OperandMatchResultTy parseUfmt(int64_t &Format); 1472 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1473 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1474 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1475 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1476 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1477 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1478 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1479 1480 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1481 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1482 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1483 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1484 1485 bool parseCnt(int64_t &IntVal); 1486 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1487 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1488 1489 private: 1490 struct OperandInfoTy { 1491 SMLoc Loc; 1492 int64_t Id; 1493 bool IsSymbolic = false; 1494 bool IsDefined = false; 1495 1496 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1497 }; 1498 1499 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1500 bool validateSendMsg(const OperandInfoTy &Msg, 1501 const OperandInfoTy &Op, 1502 const OperandInfoTy &Stream); 1503 1504 bool parseHwregBody(OperandInfoTy &HwReg, 1505 OperandInfoTy &Offset, 1506 OperandInfoTy &Width); 1507 bool validateHwreg(const OperandInfoTy &HwReg, 1508 const OperandInfoTy &Offset, 1509 const OperandInfoTy &Width); 1510 1511 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1512 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1513 1514 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1515 const OperandVector &Operands) const; 1516 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1517 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1518 SMLoc getLitLoc(const OperandVector &Operands) const; 1519 SMLoc getConstLoc(const OperandVector &Operands) const; 1520 1521 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1522 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1523 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1524 bool validateSOPLiteral(const MCInst &Inst) const; 1525 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1527 bool validateIntClampSupported(const MCInst &Inst); 1528 bool validateMIMGAtomicDMask(const MCInst &Inst); 1529 bool validateMIMGGatherDMask(const MCInst &Inst); 1530 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1531 bool validateMIMGDataSize(const MCInst &Inst); 1532 bool validateMIMGAddrSize(const MCInst &Inst); 1533 bool validateMIMGD16(const MCInst &Inst); 1534 bool validateMIMGDim(const MCInst &Inst); 1535 bool validateMIMGMSAA(const MCInst &Inst); 1536 bool validateOpSel(const MCInst &Inst); 1537 bool validateVccOperand(unsigned Reg) const; 1538 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateAGPRLdSt(const MCInst &Inst) const; 1541 bool validateVGPRAlign(const MCInst &Inst) const; 1542 bool validateDivScale(const MCInst &Inst); 1543 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1544 const SMLoc &IDLoc); 1545 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1546 unsigned getConstantBusLimit(unsigned Opcode) const; 1547 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1548 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1549 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1550 1551 bool isSupportedMnemo(StringRef Mnemo, 1552 const FeatureBitset &FBS); 1553 bool isSupportedMnemo(StringRef Mnemo, 1554 const FeatureBitset &FBS, 1555 ArrayRef<unsigned> Variants); 1556 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1557 1558 bool isId(const StringRef Id) const; 1559 bool isId(const AsmToken &Token, const StringRef Id) const; 1560 bool isToken(const AsmToken::TokenKind Kind) const; 1561 bool trySkipId(const StringRef Id); 1562 bool trySkipId(const StringRef Pref, const StringRef Id); 1563 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1564 bool trySkipToken(const AsmToken::TokenKind Kind); 1565 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1566 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1567 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1568 1569 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1570 AsmToken::TokenKind getTokenKind() const; 1571 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1572 bool parseExpr(OperandVector &Operands); 1573 StringRef getTokenStr() const; 1574 AsmToken peekToken(); 1575 AsmToken getToken() const; 1576 SMLoc getLoc() const; 1577 void lex(); 1578 1579 public: 1580 void onBeginOfFile() override; 1581 1582 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1583 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1584 1585 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1586 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1587 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1588 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1589 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1590 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1591 1592 bool parseSwizzleOperand(int64_t &Op, 1593 const unsigned MinVal, 1594 const unsigned MaxVal, 1595 const StringRef ErrMsg, 1596 SMLoc &Loc); 1597 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1598 const unsigned MinVal, 1599 const unsigned MaxVal, 1600 const StringRef ErrMsg); 1601 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1602 bool parseSwizzleOffset(int64_t &Imm); 1603 bool parseSwizzleMacro(int64_t &Imm); 1604 bool parseSwizzleQuadPerm(int64_t &Imm); 1605 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1606 bool parseSwizzleBroadcast(int64_t &Imm); 1607 bool parseSwizzleSwap(int64_t &Imm); 1608 bool parseSwizzleReverse(int64_t &Imm); 1609 1610 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1611 int64_t parseGPRIdxMacro(); 1612 1613 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1614 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1615 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1616 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1617 1618 AMDGPUOperand::Ptr defaultCPol() const; 1619 1620 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1621 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1622 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1623 AMDGPUOperand::Ptr defaultFlatOffset() const; 1624 1625 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1626 1627 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1628 OptionalImmIndexMap &OptionalIdx); 1629 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1630 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1631 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1632 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1633 OptionalImmIndexMap &OptionalIdx); 1634 1635 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1636 1637 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1638 bool IsAtomic = false); 1639 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1640 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1641 1642 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1643 1644 bool parseDimId(unsigned &Encoding); 1645 OperandMatchResultTy parseDim(OperandVector &Operands); 1646 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1647 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1648 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1649 int64_t parseDPPCtrlSel(StringRef Ctrl); 1650 int64_t parseDPPCtrlPerm(); 1651 AMDGPUOperand::Ptr defaultRowMask() const; 1652 AMDGPUOperand::Ptr defaultBankMask() const; 1653 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1654 AMDGPUOperand::Ptr defaultFI() const; 1655 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1656 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1657 1658 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1659 AMDGPUOperand::ImmTy Type); 1660 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1661 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1662 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1663 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1664 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1667 uint64_t BasicInstType, 1668 bool SkipDstVcc = false, 1669 bool SkipSrcVcc = false); 1670 1671 AMDGPUOperand::Ptr defaultBLGP() const; 1672 AMDGPUOperand::Ptr defaultCBSZ() const; 1673 AMDGPUOperand::Ptr defaultABID() const; 1674 1675 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1676 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1677 }; 1678 1679 struct OptionalOperand { 1680 const char *Name; 1681 AMDGPUOperand::ImmTy Type; 1682 bool IsBit; 1683 bool (*ConvertResult)(int64_t&); 1684 }; 1685 1686 } // end anonymous namespace 1687 1688 // May be called with integer type with equivalent bitwidth. 1689 static const fltSemantics *getFltSemantics(unsigned Size) { 1690 switch (Size) { 1691 case 4: 1692 return &APFloat::IEEEsingle(); 1693 case 8: 1694 return &APFloat::IEEEdouble(); 1695 case 2: 1696 return &APFloat::IEEEhalf(); 1697 default: 1698 llvm_unreachable("unsupported fp type"); 1699 } 1700 } 1701 1702 static const fltSemantics *getFltSemantics(MVT VT) { 1703 return getFltSemantics(VT.getSizeInBits() / 8); 1704 } 1705 1706 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1707 switch (OperandType) { 1708 case AMDGPU::OPERAND_REG_IMM_INT32: 1709 case AMDGPU::OPERAND_REG_IMM_FP32: 1710 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1711 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1712 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1713 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1715 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1716 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1717 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1718 return &APFloat::IEEEsingle(); 1719 case AMDGPU::OPERAND_REG_IMM_INT64: 1720 case AMDGPU::OPERAND_REG_IMM_FP64: 1721 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1722 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1723 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1724 return &APFloat::IEEEdouble(); 1725 case AMDGPU::OPERAND_REG_IMM_INT16: 1726 case AMDGPU::OPERAND_REG_IMM_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1735 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1736 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1737 return &APFloat::IEEEhalf(); 1738 default: 1739 llvm_unreachable("unsupported fp type"); 1740 } 1741 } 1742 1743 //===----------------------------------------------------------------------===// 1744 // Operand 1745 //===----------------------------------------------------------------------===// 1746 1747 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1748 bool Lost; 1749 1750 // Convert literal to single precision 1751 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1752 APFloat::rmNearestTiesToEven, 1753 &Lost); 1754 // We allow precision lost but not overflow or underflow 1755 if (Status != APFloat::opOK && 1756 Lost && 1757 ((Status & APFloat::opOverflow) != 0 || 1758 (Status & APFloat::opUnderflow) != 0)) { 1759 return false; 1760 } 1761 1762 return true; 1763 } 1764 1765 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1766 return isUIntN(Size, Val) || isIntN(Size, Val); 1767 } 1768 1769 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1770 if (VT.getScalarType() == MVT::i16) { 1771 // FP immediate values are broken. 1772 return isInlinableIntLiteral(Val); 1773 } 1774 1775 // f16/v2f16 operands work correctly for all values. 1776 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1777 } 1778 1779 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1780 1781 // This is a hack to enable named inline values like 1782 // shared_base with both 32-bit and 64-bit operands. 1783 // Note that these values are defined as 1784 // 32-bit operands only. 1785 if (isInlineValue()) { 1786 return true; 1787 } 1788 1789 if (!isImmTy(ImmTyNone)) { 1790 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1791 return false; 1792 } 1793 // TODO: We should avoid using host float here. It would be better to 1794 // check the float bit values which is what a few other places do. 1795 // We've had bot failures before due to weird NaN support on mips hosts. 1796 1797 APInt Literal(64, Imm.Val); 1798 1799 if (Imm.IsFPImm) { // We got fp literal token 1800 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1801 return AMDGPU::isInlinableLiteral64(Imm.Val, 1802 AsmParser->hasInv2PiInlineImm()); 1803 } 1804 1805 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1806 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1807 return false; 1808 1809 if (type.getScalarSizeInBits() == 16) { 1810 return isInlineableLiteralOp16( 1811 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1812 type, AsmParser->hasInv2PiInlineImm()); 1813 } 1814 1815 // Check if single precision literal is inlinable 1816 return AMDGPU::isInlinableLiteral32( 1817 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1818 AsmParser->hasInv2PiInlineImm()); 1819 } 1820 1821 // We got int literal token. 1822 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1823 return AMDGPU::isInlinableLiteral64(Imm.Val, 1824 AsmParser->hasInv2PiInlineImm()); 1825 } 1826 1827 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1828 return false; 1829 } 1830 1831 if (type.getScalarSizeInBits() == 16) { 1832 return isInlineableLiteralOp16( 1833 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1834 type, AsmParser->hasInv2PiInlineImm()); 1835 } 1836 1837 return AMDGPU::isInlinableLiteral32( 1838 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1839 AsmParser->hasInv2PiInlineImm()); 1840 } 1841 1842 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1843 // Check that this immediate can be added as literal 1844 if (!isImmTy(ImmTyNone)) { 1845 return false; 1846 } 1847 1848 if (!Imm.IsFPImm) { 1849 // We got int literal token. 1850 1851 if (type == MVT::f64 && hasFPModifiers()) { 1852 // Cannot apply fp modifiers to int literals preserving the same semantics 1853 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1854 // disable these cases. 1855 return false; 1856 } 1857 1858 unsigned Size = type.getSizeInBits(); 1859 if (Size == 64) 1860 Size = 32; 1861 1862 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1863 // types. 1864 return isSafeTruncation(Imm.Val, Size); 1865 } 1866 1867 // We got fp literal token 1868 if (type == MVT::f64) { // Expected 64-bit fp operand 1869 // We would set low 64-bits of literal to zeroes but we accept this literals 1870 return true; 1871 } 1872 1873 if (type == MVT::i64) { // Expected 64-bit int operand 1874 // We don't allow fp literals in 64-bit integer instructions. It is 1875 // unclear how we should encode them. 1876 return false; 1877 } 1878 1879 // We allow fp literals with f16x2 operands assuming that the specified 1880 // literal goes into the lower half and the upper half is zero. We also 1881 // require that the literal may be losslesly converted to f16. 1882 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1883 (type == MVT::v2i16)? MVT::i16 : 1884 (type == MVT::v2f32)? MVT::f32 : type; 1885 1886 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1887 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1888 } 1889 1890 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1891 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1892 } 1893 1894 bool AMDGPUOperand::isVRegWithInputMods() const { 1895 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1896 // GFX90A allows DPP on 64-bit operands. 1897 (isRegClass(AMDGPU::VReg_64RegClassID) && 1898 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1899 } 1900 1901 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1902 if (AsmParser->isVI()) 1903 return isVReg32(); 1904 else if (AsmParser->isGFX9Plus()) 1905 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1906 else 1907 return false; 1908 } 1909 1910 bool AMDGPUOperand::isSDWAFP16Operand() const { 1911 return isSDWAOperand(MVT::f16); 1912 } 1913 1914 bool AMDGPUOperand::isSDWAFP32Operand() const { 1915 return isSDWAOperand(MVT::f32); 1916 } 1917 1918 bool AMDGPUOperand::isSDWAInt16Operand() const { 1919 return isSDWAOperand(MVT::i16); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAInt32Operand() const { 1923 return isSDWAOperand(MVT::i32); 1924 } 1925 1926 bool AMDGPUOperand::isBoolReg() const { 1927 auto FB = AsmParser->getFeatureBits(); 1928 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1929 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1930 } 1931 1932 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1933 { 1934 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1935 assert(Size == 2 || Size == 4 || Size == 8); 1936 1937 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1938 1939 if (Imm.Mods.Abs) { 1940 Val &= ~FpSignMask; 1941 } 1942 if (Imm.Mods.Neg) { 1943 Val ^= FpSignMask; 1944 } 1945 1946 return Val; 1947 } 1948 1949 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1950 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1951 Inst.getNumOperands())) { 1952 addLiteralImmOperand(Inst, Imm.Val, 1953 ApplyModifiers & 1954 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1955 } else { 1956 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1957 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1958 setImmKindNone(); 1959 } 1960 } 1961 1962 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1963 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1964 auto OpNum = Inst.getNumOperands(); 1965 // Check that this operand accepts literals 1966 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1967 1968 if (ApplyModifiers) { 1969 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1970 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1971 Val = applyInputFPModifiers(Val, Size); 1972 } 1973 1974 APInt Literal(64, Val); 1975 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1976 1977 if (Imm.IsFPImm) { // We got fp literal token 1978 switch (OpTy) { 1979 case AMDGPU::OPERAND_REG_IMM_INT64: 1980 case AMDGPU::OPERAND_REG_IMM_FP64: 1981 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1982 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1983 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1984 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1985 AsmParser->hasInv2PiInlineImm())) { 1986 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1987 setImmKindConst(); 1988 return; 1989 } 1990 1991 // Non-inlineable 1992 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1993 // For fp operands we check if low 32 bits are zeros 1994 if (Literal.getLoBits(32) != 0) { 1995 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1996 "Can't encode literal as exact 64-bit floating-point operand. " 1997 "Low 32-bits will be set to zero"); 1998 } 1999 2000 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2001 setImmKindLiteral(); 2002 return; 2003 } 2004 2005 // We don't allow fp literals in 64-bit integer instructions. It is 2006 // unclear how we should encode them. This case should be checked earlier 2007 // in predicate methods (isLiteralImm()) 2008 llvm_unreachable("fp literal in 64-bit integer instruction."); 2009 2010 case AMDGPU::OPERAND_REG_IMM_INT32: 2011 case AMDGPU::OPERAND_REG_IMM_FP32: 2012 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2013 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2014 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2015 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2016 case AMDGPU::OPERAND_REG_IMM_INT16: 2017 case AMDGPU::OPERAND_REG_IMM_FP16: 2018 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2019 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2020 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2021 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2026 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2027 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2029 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2030 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2031 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2032 bool lost; 2033 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2034 // Convert literal to single precision 2035 FPLiteral.convert(*getOpFltSemantics(OpTy), 2036 APFloat::rmNearestTiesToEven, &lost); 2037 // We allow precision lost but not overflow or underflow. This should be 2038 // checked earlier in isLiteralImm() 2039 2040 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2041 Inst.addOperand(MCOperand::createImm(ImmVal)); 2042 setImmKindLiteral(); 2043 return; 2044 } 2045 default: 2046 llvm_unreachable("invalid operand size"); 2047 } 2048 2049 return; 2050 } 2051 2052 // We got int literal token. 2053 // Only sign extend inline immediates. 2054 switch (OpTy) { 2055 case AMDGPU::OPERAND_REG_IMM_INT32: 2056 case AMDGPU::OPERAND_REG_IMM_FP32: 2057 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2058 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2059 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2060 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2061 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2062 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2063 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2064 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2065 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2067 if (isSafeTruncation(Val, 32) && 2068 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2069 AsmParser->hasInv2PiInlineImm())) { 2070 Inst.addOperand(MCOperand::createImm(Val)); 2071 setImmKindConst(); 2072 return; 2073 } 2074 2075 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2076 setImmKindLiteral(); 2077 return; 2078 2079 case AMDGPU::OPERAND_REG_IMM_INT64: 2080 case AMDGPU::OPERAND_REG_IMM_FP64: 2081 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2082 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2084 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2085 Inst.addOperand(MCOperand::createImm(Val)); 2086 setImmKindConst(); 2087 return; 2088 } 2089 2090 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2091 setImmKindLiteral(); 2092 return; 2093 2094 case AMDGPU::OPERAND_REG_IMM_INT16: 2095 case AMDGPU::OPERAND_REG_IMM_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2100 if (isSafeTruncation(Val, 16) && 2101 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2102 AsmParser->hasInv2PiInlineImm())) { 2103 Inst.addOperand(MCOperand::createImm(Val)); 2104 setImmKindConst(); 2105 return; 2106 } 2107 2108 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2109 setImmKindLiteral(); 2110 return; 2111 2112 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2113 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2116 assert(isSafeTruncation(Val, 16)); 2117 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2118 AsmParser->hasInv2PiInlineImm())); 2119 2120 Inst.addOperand(MCOperand::createImm(Val)); 2121 return; 2122 } 2123 default: 2124 llvm_unreachable("invalid operand size"); 2125 } 2126 } 2127 2128 template <unsigned Bitwidth> 2129 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2130 APInt Literal(64, Imm.Val); 2131 setImmKindNone(); 2132 2133 if (!Imm.IsFPImm) { 2134 // We got int literal token. 2135 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2136 return; 2137 } 2138 2139 bool Lost; 2140 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2141 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2142 APFloat::rmNearestTiesToEven, &Lost); 2143 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2144 } 2145 2146 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2147 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2148 } 2149 2150 static bool isInlineValue(unsigned Reg) { 2151 switch (Reg) { 2152 case AMDGPU::SRC_SHARED_BASE: 2153 case AMDGPU::SRC_SHARED_LIMIT: 2154 case AMDGPU::SRC_PRIVATE_BASE: 2155 case AMDGPU::SRC_PRIVATE_LIMIT: 2156 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2157 return true; 2158 case AMDGPU::SRC_VCCZ: 2159 case AMDGPU::SRC_EXECZ: 2160 case AMDGPU::SRC_SCC: 2161 return true; 2162 case AMDGPU::SGPR_NULL: 2163 return true; 2164 default: 2165 return false; 2166 } 2167 } 2168 2169 bool AMDGPUOperand::isInlineValue() const { 2170 return isRegKind() && ::isInlineValue(getReg()); 2171 } 2172 2173 //===----------------------------------------------------------------------===// 2174 // AsmParser 2175 //===----------------------------------------------------------------------===// 2176 2177 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2178 if (Is == IS_VGPR) { 2179 switch (RegWidth) { 2180 default: return -1; 2181 case 1: return AMDGPU::VGPR_32RegClassID; 2182 case 2: return AMDGPU::VReg_64RegClassID; 2183 case 3: return AMDGPU::VReg_96RegClassID; 2184 case 4: return AMDGPU::VReg_128RegClassID; 2185 case 5: return AMDGPU::VReg_160RegClassID; 2186 case 6: return AMDGPU::VReg_192RegClassID; 2187 case 8: return AMDGPU::VReg_256RegClassID; 2188 case 16: return AMDGPU::VReg_512RegClassID; 2189 case 32: return AMDGPU::VReg_1024RegClassID; 2190 } 2191 } else if (Is == IS_TTMP) { 2192 switch (RegWidth) { 2193 default: return -1; 2194 case 1: return AMDGPU::TTMP_32RegClassID; 2195 case 2: return AMDGPU::TTMP_64RegClassID; 2196 case 4: return AMDGPU::TTMP_128RegClassID; 2197 case 8: return AMDGPU::TTMP_256RegClassID; 2198 case 16: return AMDGPU::TTMP_512RegClassID; 2199 } 2200 } else if (Is == IS_SGPR) { 2201 switch (RegWidth) { 2202 default: return -1; 2203 case 1: return AMDGPU::SGPR_32RegClassID; 2204 case 2: return AMDGPU::SGPR_64RegClassID; 2205 case 3: return AMDGPU::SGPR_96RegClassID; 2206 case 4: return AMDGPU::SGPR_128RegClassID; 2207 case 5: return AMDGPU::SGPR_160RegClassID; 2208 case 6: return AMDGPU::SGPR_192RegClassID; 2209 case 8: return AMDGPU::SGPR_256RegClassID; 2210 case 16: return AMDGPU::SGPR_512RegClassID; 2211 } 2212 } else if (Is == IS_AGPR) { 2213 switch (RegWidth) { 2214 default: return -1; 2215 case 1: return AMDGPU::AGPR_32RegClassID; 2216 case 2: return AMDGPU::AReg_64RegClassID; 2217 case 3: return AMDGPU::AReg_96RegClassID; 2218 case 4: return AMDGPU::AReg_128RegClassID; 2219 case 5: return AMDGPU::AReg_160RegClassID; 2220 case 6: return AMDGPU::AReg_192RegClassID; 2221 case 8: return AMDGPU::AReg_256RegClassID; 2222 case 16: return AMDGPU::AReg_512RegClassID; 2223 case 32: return AMDGPU::AReg_1024RegClassID; 2224 } 2225 } 2226 return -1; 2227 } 2228 2229 static unsigned getSpecialRegForName(StringRef RegName) { 2230 return StringSwitch<unsigned>(RegName) 2231 .Case("exec", AMDGPU::EXEC) 2232 .Case("vcc", AMDGPU::VCC) 2233 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2234 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2235 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2236 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2237 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2238 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2239 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2240 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2241 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2242 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2243 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2244 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2245 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2246 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2247 .Case("m0", AMDGPU::M0) 2248 .Case("vccz", AMDGPU::SRC_VCCZ) 2249 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2250 .Case("execz", AMDGPU::SRC_EXECZ) 2251 .Case("src_execz", AMDGPU::SRC_EXECZ) 2252 .Case("scc", AMDGPU::SRC_SCC) 2253 .Case("src_scc", AMDGPU::SRC_SCC) 2254 .Case("tba", AMDGPU::TBA) 2255 .Case("tma", AMDGPU::TMA) 2256 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2257 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2258 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2259 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2260 .Case("vcc_lo", AMDGPU::VCC_LO) 2261 .Case("vcc_hi", AMDGPU::VCC_HI) 2262 .Case("exec_lo", AMDGPU::EXEC_LO) 2263 .Case("exec_hi", AMDGPU::EXEC_HI) 2264 .Case("tma_lo", AMDGPU::TMA_LO) 2265 .Case("tma_hi", AMDGPU::TMA_HI) 2266 .Case("tba_lo", AMDGPU::TBA_LO) 2267 .Case("tba_hi", AMDGPU::TBA_HI) 2268 .Case("pc", AMDGPU::PC_REG) 2269 .Case("null", AMDGPU::SGPR_NULL) 2270 .Default(AMDGPU::NoRegister); 2271 } 2272 2273 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2274 SMLoc &EndLoc, bool RestoreOnFailure) { 2275 auto R = parseRegister(); 2276 if (!R) return true; 2277 assert(R->isReg()); 2278 RegNo = R->getReg(); 2279 StartLoc = R->getStartLoc(); 2280 EndLoc = R->getEndLoc(); 2281 return false; 2282 } 2283 2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2285 SMLoc &EndLoc) { 2286 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2287 } 2288 2289 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2290 SMLoc &StartLoc, 2291 SMLoc &EndLoc) { 2292 bool Result = 2293 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2294 bool PendingErrors = getParser().hasPendingError(); 2295 getParser().clearPendingErrors(); 2296 if (PendingErrors) 2297 return MatchOperand_ParseFail; 2298 if (Result) 2299 return MatchOperand_NoMatch; 2300 return MatchOperand_Success; 2301 } 2302 2303 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2304 RegisterKind RegKind, unsigned Reg1, 2305 SMLoc Loc) { 2306 switch (RegKind) { 2307 case IS_SPECIAL: 2308 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2309 Reg = AMDGPU::EXEC; 2310 RegWidth = 2; 2311 return true; 2312 } 2313 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2314 Reg = AMDGPU::FLAT_SCR; 2315 RegWidth = 2; 2316 return true; 2317 } 2318 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2319 Reg = AMDGPU::XNACK_MASK; 2320 RegWidth = 2; 2321 return true; 2322 } 2323 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2324 Reg = AMDGPU::VCC; 2325 RegWidth = 2; 2326 return true; 2327 } 2328 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2329 Reg = AMDGPU::TBA; 2330 RegWidth = 2; 2331 return true; 2332 } 2333 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2334 Reg = AMDGPU::TMA; 2335 RegWidth = 2; 2336 return true; 2337 } 2338 Error(Loc, "register does not fit in the list"); 2339 return false; 2340 case IS_VGPR: 2341 case IS_SGPR: 2342 case IS_AGPR: 2343 case IS_TTMP: 2344 if (Reg1 != Reg + RegWidth) { 2345 Error(Loc, "registers in a list must have consecutive indices"); 2346 return false; 2347 } 2348 RegWidth++; 2349 return true; 2350 default: 2351 llvm_unreachable("unexpected register kind"); 2352 } 2353 } 2354 2355 struct RegInfo { 2356 StringLiteral Name; 2357 RegisterKind Kind; 2358 }; 2359 2360 static constexpr RegInfo RegularRegisters[] = { 2361 {{"v"}, IS_VGPR}, 2362 {{"s"}, IS_SGPR}, 2363 {{"ttmp"}, IS_TTMP}, 2364 {{"acc"}, IS_AGPR}, 2365 {{"a"}, IS_AGPR}, 2366 }; 2367 2368 static bool isRegularReg(RegisterKind Kind) { 2369 return Kind == IS_VGPR || 2370 Kind == IS_SGPR || 2371 Kind == IS_TTMP || 2372 Kind == IS_AGPR; 2373 } 2374 2375 static const RegInfo* getRegularRegInfo(StringRef Str) { 2376 for (const RegInfo &Reg : RegularRegisters) 2377 if (Str.startswith(Reg.Name)) 2378 return &Reg; 2379 return nullptr; 2380 } 2381 2382 static bool getRegNum(StringRef Str, unsigned& Num) { 2383 return !Str.getAsInteger(10, Num); 2384 } 2385 2386 bool 2387 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2388 const AsmToken &NextToken) const { 2389 2390 // A list of consecutive registers: [s0,s1,s2,s3] 2391 if (Token.is(AsmToken::LBrac)) 2392 return true; 2393 2394 if (!Token.is(AsmToken::Identifier)) 2395 return false; 2396 2397 // A single register like s0 or a range of registers like s[0:1] 2398 2399 StringRef Str = Token.getString(); 2400 const RegInfo *Reg = getRegularRegInfo(Str); 2401 if (Reg) { 2402 StringRef RegName = Reg->Name; 2403 StringRef RegSuffix = Str.substr(RegName.size()); 2404 if (!RegSuffix.empty()) { 2405 unsigned Num; 2406 // A single register with an index: rXX 2407 if (getRegNum(RegSuffix, Num)) 2408 return true; 2409 } else { 2410 // A range of registers: r[XX:YY]. 2411 if (NextToken.is(AsmToken::LBrac)) 2412 return true; 2413 } 2414 } 2415 2416 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2417 } 2418 2419 bool 2420 AMDGPUAsmParser::isRegister() 2421 { 2422 return isRegister(getToken(), peekToken()); 2423 } 2424 2425 unsigned 2426 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2427 unsigned RegNum, 2428 unsigned RegWidth, 2429 SMLoc Loc) { 2430 2431 assert(isRegularReg(RegKind)); 2432 2433 unsigned AlignSize = 1; 2434 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2435 // SGPR and TTMP registers must be aligned. 2436 // Max required alignment is 4 dwords. 2437 AlignSize = std::min(RegWidth, 4u); 2438 } 2439 2440 if (RegNum % AlignSize != 0) { 2441 Error(Loc, "invalid register alignment"); 2442 return AMDGPU::NoRegister; 2443 } 2444 2445 unsigned RegIdx = RegNum / AlignSize; 2446 int RCID = getRegClass(RegKind, RegWidth); 2447 if (RCID == -1) { 2448 Error(Loc, "invalid or unsupported register size"); 2449 return AMDGPU::NoRegister; 2450 } 2451 2452 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2453 const MCRegisterClass RC = TRI->getRegClass(RCID); 2454 if (RegIdx >= RC.getNumRegs()) { 2455 Error(Loc, "register index is out of range"); 2456 return AMDGPU::NoRegister; 2457 } 2458 2459 return RC.getRegister(RegIdx); 2460 } 2461 2462 bool 2463 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2464 int64_t RegLo, RegHi; 2465 if (!skipToken(AsmToken::LBrac, "missing register index")) 2466 return false; 2467 2468 SMLoc FirstIdxLoc = getLoc(); 2469 SMLoc SecondIdxLoc; 2470 2471 if (!parseExpr(RegLo)) 2472 return false; 2473 2474 if (trySkipToken(AsmToken::Colon)) { 2475 SecondIdxLoc = getLoc(); 2476 if (!parseExpr(RegHi)) 2477 return false; 2478 } else { 2479 RegHi = RegLo; 2480 } 2481 2482 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2483 return false; 2484 2485 if (!isUInt<32>(RegLo)) { 2486 Error(FirstIdxLoc, "invalid register index"); 2487 return false; 2488 } 2489 2490 if (!isUInt<32>(RegHi)) { 2491 Error(SecondIdxLoc, "invalid register index"); 2492 return false; 2493 } 2494 2495 if (RegLo > RegHi) { 2496 Error(FirstIdxLoc, "first register index should not exceed second index"); 2497 return false; 2498 } 2499 2500 Num = static_cast<unsigned>(RegLo); 2501 Width = (RegHi - RegLo) + 1; 2502 return true; 2503 } 2504 2505 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2506 unsigned &RegNum, unsigned &RegWidth, 2507 SmallVectorImpl<AsmToken> &Tokens) { 2508 assert(isToken(AsmToken::Identifier)); 2509 unsigned Reg = getSpecialRegForName(getTokenStr()); 2510 if (Reg) { 2511 RegNum = 0; 2512 RegWidth = 1; 2513 RegKind = IS_SPECIAL; 2514 Tokens.push_back(getToken()); 2515 lex(); // skip register name 2516 } 2517 return Reg; 2518 } 2519 2520 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2521 unsigned &RegNum, unsigned &RegWidth, 2522 SmallVectorImpl<AsmToken> &Tokens) { 2523 assert(isToken(AsmToken::Identifier)); 2524 StringRef RegName = getTokenStr(); 2525 auto Loc = getLoc(); 2526 2527 const RegInfo *RI = getRegularRegInfo(RegName); 2528 if (!RI) { 2529 Error(Loc, "invalid register name"); 2530 return AMDGPU::NoRegister; 2531 } 2532 2533 Tokens.push_back(getToken()); 2534 lex(); // skip register name 2535 2536 RegKind = RI->Kind; 2537 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2538 if (!RegSuffix.empty()) { 2539 // Single 32-bit register: vXX. 2540 if (!getRegNum(RegSuffix, RegNum)) { 2541 Error(Loc, "invalid register index"); 2542 return AMDGPU::NoRegister; 2543 } 2544 RegWidth = 1; 2545 } else { 2546 // Range of registers: v[XX:YY]. ":YY" is optional. 2547 if (!ParseRegRange(RegNum, RegWidth)) 2548 return AMDGPU::NoRegister; 2549 } 2550 2551 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2552 } 2553 2554 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2555 unsigned &RegWidth, 2556 SmallVectorImpl<AsmToken> &Tokens) { 2557 unsigned Reg = AMDGPU::NoRegister; 2558 auto ListLoc = getLoc(); 2559 2560 if (!skipToken(AsmToken::LBrac, 2561 "expected a register or a list of registers")) { 2562 return AMDGPU::NoRegister; 2563 } 2564 2565 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2566 2567 auto Loc = getLoc(); 2568 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2569 return AMDGPU::NoRegister; 2570 if (RegWidth != 1) { 2571 Error(Loc, "expected a single 32-bit register"); 2572 return AMDGPU::NoRegister; 2573 } 2574 2575 for (; trySkipToken(AsmToken::Comma); ) { 2576 RegisterKind NextRegKind; 2577 unsigned NextReg, NextRegNum, NextRegWidth; 2578 Loc = getLoc(); 2579 2580 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2581 NextRegNum, NextRegWidth, 2582 Tokens)) { 2583 return AMDGPU::NoRegister; 2584 } 2585 if (NextRegWidth != 1) { 2586 Error(Loc, "expected a single 32-bit register"); 2587 return AMDGPU::NoRegister; 2588 } 2589 if (NextRegKind != RegKind) { 2590 Error(Loc, "registers in a list must be of the same kind"); 2591 return AMDGPU::NoRegister; 2592 } 2593 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2594 return AMDGPU::NoRegister; 2595 } 2596 2597 if (!skipToken(AsmToken::RBrac, 2598 "expected a comma or a closing square bracket")) { 2599 return AMDGPU::NoRegister; 2600 } 2601 2602 if (isRegularReg(RegKind)) 2603 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2604 2605 return Reg; 2606 } 2607 2608 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2609 unsigned &RegNum, unsigned &RegWidth, 2610 SmallVectorImpl<AsmToken> &Tokens) { 2611 auto Loc = getLoc(); 2612 Reg = AMDGPU::NoRegister; 2613 2614 if (isToken(AsmToken::Identifier)) { 2615 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2616 if (Reg == AMDGPU::NoRegister) 2617 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2618 } else { 2619 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2620 } 2621 2622 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2623 if (Reg == AMDGPU::NoRegister) { 2624 assert(Parser.hasPendingError()); 2625 return false; 2626 } 2627 2628 if (!subtargetHasRegister(*TRI, Reg)) { 2629 if (Reg == AMDGPU::SGPR_NULL) { 2630 Error(Loc, "'null' operand is not supported on this GPU"); 2631 } else { 2632 Error(Loc, "register not available on this GPU"); 2633 } 2634 return false; 2635 } 2636 2637 return true; 2638 } 2639 2640 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2641 unsigned &RegNum, unsigned &RegWidth, 2642 bool RestoreOnFailure /*=false*/) { 2643 Reg = AMDGPU::NoRegister; 2644 2645 SmallVector<AsmToken, 1> Tokens; 2646 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2647 if (RestoreOnFailure) { 2648 while (!Tokens.empty()) { 2649 getLexer().UnLex(Tokens.pop_back_val()); 2650 } 2651 } 2652 return true; 2653 } 2654 return false; 2655 } 2656 2657 Optional<StringRef> 2658 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2659 switch (RegKind) { 2660 case IS_VGPR: 2661 return StringRef(".amdgcn.next_free_vgpr"); 2662 case IS_SGPR: 2663 return StringRef(".amdgcn.next_free_sgpr"); 2664 default: 2665 return None; 2666 } 2667 } 2668 2669 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2670 auto SymbolName = getGprCountSymbolName(RegKind); 2671 assert(SymbolName && "initializing invalid register kind"); 2672 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2673 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2674 } 2675 2676 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2677 unsigned DwordRegIndex, 2678 unsigned RegWidth) { 2679 // Symbols are only defined for GCN targets 2680 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2681 return true; 2682 2683 auto SymbolName = getGprCountSymbolName(RegKind); 2684 if (!SymbolName) 2685 return true; 2686 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2687 2688 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2689 int64_t OldCount; 2690 2691 if (!Sym->isVariable()) 2692 return !Error(getLoc(), 2693 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2694 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2695 return !Error( 2696 getLoc(), 2697 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2698 2699 if (OldCount <= NewMax) 2700 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2701 2702 return true; 2703 } 2704 2705 std::unique_ptr<AMDGPUOperand> 2706 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2707 const auto &Tok = getToken(); 2708 SMLoc StartLoc = Tok.getLoc(); 2709 SMLoc EndLoc = Tok.getEndLoc(); 2710 RegisterKind RegKind; 2711 unsigned Reg, RegNum, RegWidth; 2712 2713 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2714 return nullptr; 2715 } 2716 if (isHsaAbiVersion3Or4(&getSTI())) { 2717 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2718 return nullptr; 2719 } else 2720 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2721 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2722 } 2723 2724 OperandMatchResultTy 2725 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2726 // TODO: add syntactic sugar for 1/(2*PI) 2727 2728 assert(!isRegister()); 2729 assert(!isModifier()); 2730 2731 const auto& Tok = getToken(); 2732 const auto& NextTok = peekToken(); 2733 bool IsReal = Tok.is(AsmToken::Real); 2734 SMLoc S = getLoc(); 2735 bool Negate = false; 2736 2737 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2738 lex(); 2739 IsReal = true; 2740 Negate = true; 2741 } 2742 2743 if (IsReal) { 2744 // Floating-point expressions are not supported. 2745 // Can only allow floating-point literals with an 2746 // optional sign. 2747 2748 StringRef Num = getTokenStr(); 2749 lex(); 2750 2751 APFloat RealVal(APFloat::IEEEdouble()); 2752 auto roundMode = APFloat::rmNearestTiesToEven; 2753 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2754 return MatchOperand_ParseFail; 2755 } 2756 if (Negate) 2757 RealVal.changeSign(); 2758 2759 Operands.push_back( 2760 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2761 AMDGPUOperand::ImmTyNone, true)); 2762 2763 return MatchOperand_Success; 2764 2765 } else { 2766 int64_t IntVal; 2767 const MCExpr *Expr; 2768 SMLoc S = getLoc(); 2769 2770 if (HasSP3AbsModifier) { 2771 // This is a workaround for handling expressions 2772 // as arguments of SP3 'abs' modifier, for example: 2773 // |1.0| 2774 // |-1| 2775 // |1+x| 2776 // This syntax is not compatible with syntax of standard 2777 // MC expressions (due to the trailing '|'). 2778 SMLoc EndLoc; 2779 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2780 return MatchOperand_ParseFail; 2781 } else { 2782 if (Parser.parseExpression(Expr)) 2783 return MatchOperand_ParseFail; 2784 } 2785 2786 if (Expr->evaluateAsAbsolute(IntVal)) { 2787 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2788 } else { 2789 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2790 } 2791 2792 return MatchOperand_Success; 2793 } 2794 2795 return MatchOperand_NoMatch; 2796 } 2797 2798 OperandMatchResultTy 2799 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2800 if (!isRegister()) 2801 return MatchOperand_NoMatch; 2802 2803 if (auto R = parseRegister()) { 2804 assert(R->isReg()); 2805 Operands.push_back(std::move(R)); 2806 return MatchOperand_Success; 2807 } 2808 return MatchOperand_ParseFail; 2809 } 2810 2811 OperandMatchResultTy 2812 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2813 auto res = parseReg(Operands); 2814 if (res != MatchOperand_NoMatch) { 2815 return res; 2816 } else if (isModifier()) { 2817 return MatchOperand_NoMatch; 2818 } else { 2819 return parseImm(Operands, HasSP3AbsMod); 2820 } 2821 } 2822 2823 bool 2824 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2825 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2826 const auto &str = Token.getString(); 2827 return str == "abs" || str == "neg" || str == "sext"; 2828 } 2829 return false; 2830 } 2831 2832 bool 2833 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2834 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2835 } 2836 2837 bool 2838 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2839 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2840 } 2841 2842 bool 2843 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2844 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2845 } 2846 2847 // Check if this is an operand modifier or an opcode modifier 2848 // which may look like an expression but it is not. We should 2849 // avoid parsing these modifiers as expressions. Currently 2850 // recognized sequences are: 2851 // |...| 2852 // abs(...) 2853 // neg(...) 2854 // sext(...) 2855 // -reg 2856 // -|...| 2857 // -abs(...) 2858 // name:... 2859 // Note that simple opcode modifiers like 'gds' may be parsed as 2860 // expressions; this is a special case. See getExpressionAsToken. 2861 // 2862 bool 2863 AMDGPUAsmParser::isModifier() { 2864 2865 AsmToken Tok = getToken(); 2866 AsmToken NextToken[2]; 2867 peekTokens(NextToken); 2868 2869 return isOperandModifier(Tok, NextToken[0]) || 2870 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2871 isOpcodeModifierWithVal(Tok, NextToken[0]); 2872 } 2873 2874 // Check if the current token is an SP3 'neg' modifier. 2875 // Currently this modifier is allowed in the following context: 2876 // 2877 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2878 // 2. Before an 'abs' modifier: -abs(...) 2879 // 3. Before an SP3 'abs' modifier: -|...| 2880 // 2881 // In all other cases "-" is handled as a part 2882 // of an expression that follows the sign. 2883 // 2884 // Note: When "-" is followed by an integer literal, 2885 // this is interpreted as integer negation rather 2886 // than a floating-point NEG modifier applied to N. 2887 // Beside being contr-intuitive, such use of floating-point 2888 // NEG modifier would have resulted in different meaning 2889 // of integer literals used with VOP1/2/C and VOP3, 2890 // for example: 2891 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2892 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2893 // Negative fp literals with preceding "-" are 2894 // handled likewise for unifomtity 2895 // 2896 bool 2897 AMDGPUAsmParser::parseSP3NegModifier() { 2898 2899 AsmToken NextToken[2]; 2900 peekTokens(NextToken); 2901 2902 if (isToken(AsmToken::Minus) && 2903 (isRegister(NextToken[0], NextToken[1]) || 2904 NextToken[0].is(AsmToken::Pipe) || 2905 isId(NextToken[0], "abs"))) { 2906 lex(); 2907 return true; 2908 } 2909 2910 return false; 2911 } 2912 2913 OperandMatchResultTy 2914 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2915 bool AllowImm) { 2916 bool Neg, SP3Neg; 2917 bool Abs, SP3Abs; 2918 SMLoc Loc; 2919 2920 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2921 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2922 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2923 return MatchOperand_ParseFail; 2924 } 2925 2926 SP3Neg = parseSP3NegModifier(); 2927 2928 Loc = getLoc(); 2929 Neg = trySkipId("neg"); 2930 if (Neg && SP3Neg) { 2931 Error(Loc, "expected register or immediate"); 2932 return MatchOperand_ParseFail; 2933 } 2934 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2935 return MatchOperand_ParseFail; 2936 2937 Abs = trySkipId("abs"); 2938 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2939 return MatchOperand_ParseFail; 2940 2941 Loc = getLoc(); 2942 SP3Abs = trySkipToken(AsmToken::Pipe); 2943 if (Abs && SP3Abs) { 2944 Error(Loc, "expected register or immediate"); 2945 return MatchOperand_ParseFail; 2946 } 2947 2948 OperandMatchResultTy Res; 2949 if (AllowImm) { 2950 Res = parseRegOrImm(Operands, SP3Abs); 2951 } else { 2952 Res = parseReg(Operands); 2953 } 2954 if (Res != MatchOperand_Success) { 2955 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2956 } 2957 2958 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2959 return MatchOperand_ParseFail; 2960 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2961 return MatchOperand_ParseFail; 2962 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2963 return MatchOperand_ParseFail; 2964 2965 AMDGPUOperand::Modifiers Mods; 2966 Mods.Abs = Abs || SP3Abs; 2967 Mods.Neg = Neg || SP3Neg; 2968 2969 if (Mods.hasFPModifiers()) { 2970 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2971 if (Op.isExpr()) { 2972 Error(Op.getStartLoc(), "expected an absolute expression"); 2973 return MatchOperand_ParseFail; 2974 } 2975 Op.setModifiers(Mods); 2976 } 2977 return MatchOperand_Success; 2978 } 2979 2980 OperandMatchResultTy 2981 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2982 bool AllowImm) { 2983 bool Sext = trySkipId("sext"); 2984 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2985 return MatchOperand_ParseFail; 2986 2987 OperandMatchResultTy Res; 2988 if (AllowImm) { 2989 Res = parseRegOrImm(Operands); 2990 } else { 2991 Res = parseReg(Operands); 2992 } 2993 if (Res != MatchOperand_Success) { 2994 return Sext? MatchOperand_ParseFail : Res; 2995 } 2996 2997 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2998 return MatchOperand_ParseFail; 2999 3000 AMDGPUOperand::Modifiers Mods; 3001 Mods.Sext = Sext; 3002 3003 if (Mods.hasIntModifiers()) { 3004 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3005 if (Op.isExpr()) { 3006 Error(Op.getStartLoc(), "expected an absolute expression"); 3007 return MatchOperand_ParseFail; 3008 } 3009 Op.setModifiers(Mods); 3010 } 3011 3012 return MatchOperand_Success; 3013 } 3014 3015 OperandMatchResultTy 3016 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3017 return parseRegOrImmWithFPInputMods(Operands, false); 3018 } 3019 3020 OperandMatchResultTy 3021 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3022 return parseRegOrImmWithIntInputMods(Operands, false); 3023 } 3024 3025 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3026 auto Loc = getLoc(); 3027 if (trySkipId("off")) { 3028 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3029 AMDGPUOperand::ImmTyOff, false)); 3030 return MatchOperand_Success; 3031 } 3032 3033 if (!isRegister()) 3034 return MatchOperand_NoMatch; 3035 3036 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3037 if (Reg) { 3038 Operands.push_back(std::move(Reg)); 3039 return MatchOperand_Success; 3040 } 3041 3042 return MatchOperand_ParseFail; 3043 3044 } 3045 3046 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3047 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3048 3049 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3050 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3051 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3052 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3053 return Match_InvalidOperand; 3054 3055 if ((TSFlags & SIInstrFlags::VOP3) && 3056 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3057 getForcedEncodingSize() != 64) 3058 return Match_PreferE32; 3059 3060 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3061 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3062 // v_mac_f32/16 allow only dst_sel == DWORD; 3063 auto OpNum = 3064 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3065 const auto &Op = Inst.getOperand(OpNum); 3066 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3067 return Match_InvalidOperand; 3068 } 3069 } 3070 3071 return Match_Success; 3072 } 3073 3074 static ArrayRef<unsigned> getAllVariants() { 3075 static const unsigned Variants[] = { 3076 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3077 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3078 }; 3079 3080 return makeArrayRef(Variants); 3081 } 3082 3083 // What asm variants we should check 3084 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3085 if (getForcedEncodingSize() == 32) { 3086 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3087 return makeArrayRef(Variants); 3088 } 3089 3090 if (isForcedVOP3()) { 3091 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3092 return makeArrayRef(Variants); 3093 } 3094 3095 if (isForcedSDWA()) { 3096 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3097 AMDGPUAsmVariants::SDWA9}; 3098 return makeArrayRef(Variants); 3099 } 3100 3101 if (isForcedDPP()) { 3102 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3103 return makeArrayRef(Variants); 3104 } 3105 3106 return getAllVariants(); 3107 } 3108 3109 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3110 if (getForcedEncodingSize() == 32) 3111 return "e32"; 3112 3113 if (isForcedVOP3()) 3114 return "e64"; 3115 3116 if (isForcedSDWA()) 3117 return "sdwa"; 3118 3119 if (isForcedDPP()) 3120 return "dpp"; 3121 3122 return ""; 3123 } 3124 3125 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3126 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3127 const unsigned Num = Desc.getNumImplicitUses(); 3128 for (unsigned i = 0; i < Num; ++i) { 3129 unsigned Reg = Desc.ImplicitUses[i]; 3130 switch (Reg) { 3131 case AMDGPU::FLAT_SCR: 3132 case AMDGPU::VCC: 3133 case AMDGPU::VCC_LO: 3134 case AMDGPU::VCC_HI: 3135 case AMDGPU::M0: 3136 return Reg; 3137 default: 3138 break; 3139 } 3140 } 3141 return AMDGPU::NoRegister; 3142 } 3143 3144 // NB: This code is correct only when used to check constant 3145 // bus limitations because GFX7 support no f16 inline constants. 3146 // Note that there are no cases when a GFX7 opcode violates 3147 // constant bus limitations due to the use of an f16 constant. 3148 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3149 unsigned OpIdx) const { 3150 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3151 3152 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3153 return false; 3154 } 3155 3156 const MCOperand &MO = Inst.getOperand(OpIdx); 3157 3158 int64_t Val = MO.getImm(); 3159 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3160 3161 switch (OpSize) { // expected operand size 3162 case 8: 3163 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3164 case 4: 3165 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3166 case 2: { 3167 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3168 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3169 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3170 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3171 return AMDGPU::isInlinableIntLiteral(Val); 3172 3173 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3174 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3175 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3176 return AMDGPU::isInlinableIntLiteralV216(Val); 3177 3178 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3179 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3180 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3181 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3182 3183 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3184 } 3185 default: 3186 llvm_unreachable("invalid operand size"); 3187 } 3188 } 3189 3190 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3191 if (!isGFX10Plus()) 3192 return 1; 3193 3194 switch (Opcode) { 3195 // 64-bit shift instructions can use only one scalar value input 3196 case AMDGPU::V_LSHLREV_B64_e64: 3197 case AMDGPU::V_LSHLREV_B64_gfx10: 3198 case AMDGPU::V_LSHRREV_B64_e64: 3199 case AMDGPU::V_LSHRREV_B64_gfx10: 3200 case AMDGPU::V_ASHRREV_I64_e64: 3201 case AMDGPU::V_ASHRREV_I64_gfx10: 3202 case AMDGPU::V_LSHL_B64_e64: 3203 case AMDGPU::V_LSHR_B64_e64: 3204 case AMDGPU::V_ASHR_I64_e64: 3205 return 1; 3206 default: 3207 return 2; 3208 } 3209 } 3210 3211 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3212 const MCOperand &MO = Inst.getOperand(OpIdx); 3213 if (MO.isImm()) { 3214 return !isInlineConstant(Inst, OpIdx); 3215 } else if (MO.isReg()) { 3216 auto Reg = MO.getReg(); 3217 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3218 auto PReg = mc2PseudoReg(Reg); 3219 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3220 } else { 3221 return true; 3222 } 3223 } 3224 3225 bool 3226 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3227 const OperandVector &Operands) { 3228 const unsigned Opcode = Inst.getOpcode(); 3229 const MCInstrDesc &Desc = MII.get(Opcode); 3230 unsigned LastSGPR = AMDGPU::NoRegister; 3231 unsigned ConstantBusUseCount = 0; 3232 unsigned NumLiterals = 0; 3233 unsigned LiteralSize; 3234 3235 if (Desc.TSFlags & 3236 (SIInstrFlags::VOPC | 3237 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3238 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3239 SIInstrFlags::SDWA)) { 3240 // Check special imm operands (used by madmk, etc) 3241 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3242 ++ConstantBusUseCount; 3243 } 3244 3245 SmallDenseSet<unsigned> SGPRsUsed; 3246 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3247 if (SGPRUsed != AMDGPU::NoRegister) { 3248 SGPRsUsed.insert(SGPRUsed); 3249 ++ConstantBusUseCount; 3250 } 3251 3252 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3253 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3254 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3255 3256 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3257 3258 for (int OpIdx : OpIndices) { 3259 if (OpIdx == -1) break; 3260 3261 const MCOperand &MO = Inst.getOperand(OpIdx); 3262 if (usesConstantBus(Inst, OpIdx)) { 3263 if (MO.isReg()) { 3264 LastSGPR = mc2PseudoReg(MO.getReg()); 3265 // Pairs of registers with a partial intersections like these 3266 // s0, s[0:1] 3267 // flat_scratch_lo, flat_scratch 3268 // flat_scratch_lo, flat_scratch_hi 3269 // are theoretically valid but they are disabled anyway. 3270 // Note that this code mimics SIInstrInfo::verifyInstruction 3271 if (!SGPRsUsed.count(LastSGPR)) { 3272 SGPRsUsed.insert(LastSGPR); 3273 ++ConstantBusUseCount; 3274 } 3275 } else { // Expression or a literal 3276 3277 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3278 continue; // special operand like VINTERP attr_chan 3279 3280 // An instruction may use only one literal. 3281 // This has been validated on the previous step. 3282 // See validateVOP3Literal. 3283 // This literal may be used as more than one operand. 3284 // If all these operands are of the same size, 3285 // this literal counts as one scalar value. 3286 // Otherwise it counts as 2 scalar values. 3287 // See "GFX10 Shader Programming", section 3.6.2.3. 3288 3289 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3290 if (Size < 4) Size = 4; 3291 3292 if (NumLiterals == 0) { 3293 NumLiterals = 1; 3294 LiteralSize = Size; 3295 } else if (LiteralSize != Size) { 3296 NumLiterals = 2; 3297 } 3298 } 3299 } 3300 } 3301 } 3302 ConstantBusUseCount += NumLiterals; 3303 3304 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3305 return true; 3306 3307 SMLoc LitLoc = getLitLoc(Operands); 3308 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3309 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3310 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3311 return false; 3312 } 3313 3314 bool 3315 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3316 const OperandVector &Operands) { 3317 const unsigned Opcode = Inst.getOpcode(); 3318 const MCInstrDesc &Desc = MII.get(Opcode); 3319 3320 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3321 if (DstIdx == -1 || 3322 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3323 return true; 3324 } 3325 3326 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3327 3328 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3329 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3330 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3331 3332 assert(DstIdx != -1); 3333 const MCOperand &Dst = Inst.getOperand(DstIdx); 3334 assert(Dst.isReg()); 3335 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3336 3337 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3338 3339 for (int SrcIdx : SrcIndices) { 3340 if (SrcIdx == -1) break; 3341 const MCOperand &Src = Inst.getOperand(SrcIdx); 3342 if (Src.isReg()) { 3343 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3344 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3345 Error(getRegLoc(SrcReg, Operands), 3346 "destination must be different than all sources"); 3347 return false; 3348 } 3349 } 3350 } 3351 3352 return true; 3353 } 3354 3355 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3356 3357 const unsigned Opc = Inst.getOpcode(); 3358 const MCInstrDesc &Desc = MII.get(Opc); 3359 3360 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3361 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3362 assert(ClampIdx != -1); 3363 return Inst.getOperand(ClampIdx).getImm() == 0; 3364 } 3365 3366 return true; 3367 } 3368 3369 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3370 3371 const unsigned Opc = Inst.getOpcode(); 3372 const MCInstrDesc &Desc = MII.get(Opc); 3373 3374 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3375 return true; 3376 3377 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3378 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3379 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3380 3381 assert(VDataIdx != -1); 3382 3383 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3384 return true; 3385 3386 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3387 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3388 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3389 if (DMask == 0) 3390 DMask = 1; 3391 3392 unsigned DataSize = 3393 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3394 if (hasPackedD16()) { 3395 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3396 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3397 DataSize = (DataSize + 1) / 2; 3398 } 3399 3400 return (VDataSize / 4) == DataSize + TFESize; 3401 } 3402 3403 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3404 const unsigned Opc = Inst.getOpcode(); 3405 const MCInstrDesc &Desc = MII.get(Opc); 3406 3407 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3408 return true; 3409 3410 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3411 3412 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3413 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3414 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3415 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3416 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3417 3418 assert(VAddr0Idx != -1); 3419 assert(SrsrcIdx != -1); 3420 assert(SrsrcIdx > VAddr0Idx); 3421 3422 if (DimIdx == -1) 3423 return true; // intersect_ray 3424 3425 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3426 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3427 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3428 unsigned VAddrSize = 3429 IsNSA ? SrsrcIdx - VAddr0Idx 3430 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3431 3432 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3433 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3434 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3435 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3436 if (!IsNSA) { 3437 if (AddrSize > 8) 3438 AddrSize = 16; 3439 else if (AddrSize > 4) 3440 AddrSize = 8; 3441 } 3442 3443 return VAddrSize == AddrSize; 3444 } 3445 3446 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3447 3448 const unsigned Opc = Inst.getOpcode(); 3449 const MCInstrDesc &Desc = MII.get(Opc); 3450 3451 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3452 return true; 3453 if (!Desc.mayLoad() || !Desc.mayStore()) 3454 return true; // Not atomic 3455 3456 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3457 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3458 3459 // This is an incomplete check because image_atomic_cmpswap 3460 // may only use 0x3 and 0xf while other atomic operations 3461 // may use 0x1 and 0x3. However these limitations are 3462 // verified when we check that dmask matches dst size. 3463 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3464 } 3465 3466 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3467 3468 const unsigned Opc = Inst.getOpcode(); 3469 const MCInstrDesc &Desc = MII.get(Opc); 3470 3471 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3472 return true; 3473 3474 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3475 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3476 3477 // GATHER4 instructions use dmask in a different fashion compared to 3478 // other MIMG instructions. The only useful DMASK values are 3479 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3480 // (red,red,red,red) etc.) The ISA document doesn't mention 3481 // this. 3482 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3483 } 3484 3485 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3486 const unsigned Opc = Inst.getOpcode(); 3487 const MCInstrDesc &Desc = MII.get(Opc); 3488 3489 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3490 return true; 3491 3492 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3493 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3494 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3495 3496 if (!BaseOpcode->MSAA) 3497 return true; 3498 3499 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3500 assert(DimIdx != -1); 3501 3502 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3503 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3504 3505 return DimInfo->MSAA; 3506 } 3507 3508 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3509 { 3510 switch (Opcode) { 3511 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3512 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3513 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3514 return true; 3515 default: 3516 return false; 3517 } 3518 } 3519 3520 // movrels* opcodes should only allow VGPRS as src0. 3521 // This is specified in .td description for vop1/vop3, 3522 // but sdwa is handled differently. See isSDWAOperand. 3523 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3524 const OperandVector &Operands) { 3525 3526 const unsigned Opc = Inst.getOpcode(); 3527 const MCInstrDesc &Desc = MII.get(Opc); 3528 3529 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3530 return true; 3531 3532 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3533 assert(Src0Idx != -1); 3534 3535 SMLoc ErrLoc; 3536 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3537 if (Src0.isReg()) { 3538 auto Reg = mc2PseudoReg(Src0.getReg()); 3539 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3540 if (!isSGPR(Reg, TRI)) 3541 return true; 3542 ErrLoc = getRegLoc(Reg, Operands); 3543 } else { 3544 ErrLoc = getConstLoc(Operands); 3545 } 3546 3547 Error(ErrLoc, "source operand must be a VGPR"); 3548 return false; 3549 } 3550 3551 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3552 const OperandVector &Operands) { 3553 3554 const unsigned Opc = Inst.getOpcode(); 3555 3556 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3557 return true; 3558 3559 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3560 assert(Src0Idx != -1); 3561 3562 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3563 if (!Src0.isReg()) 3564 return true; 3565 3566 auto Reg = mc2PseudoReg(Src0.getReg()); 3567 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3568 if (isSGPR(Reg, TRI)) { 3569 Error(getRegLoc(Reg, Operands), 3570 "source operand must be either a VGPR or an inline constant"); 3571 return false; 3572 } 3573 3574 return true; 3575 } 3576 3577 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3578 switch (Inst.getOpcode()) { 3579 default: 3580 return true; 3581 case V_DIV_SCALE_F32_gfx6_gfx7: 3582 case V_DIV_SCALE_F32_vi: 3583 case V_DIV_SCALE_F32_gfx10: 3584 case V_DIV_SCALE_F64_gfx6_gfx7: 3585 case V_DIV_SCALE_F64_vi: 3586 case V_DIV_SCALE_F64_gfx10: 3587 break; 3588 } 3589 3590 // TODO: Check that src0 = src1 or src2. 3591 3592 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3593 AMDGPU::OpName::src2_modifiers, 3594 AMDGPU::OpName::src2_modifiers}) { 3595 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3596 .getImm() & 3597 SISrcMods::ABS) { 3598 return false; 3599 } 3600 } 3601 3602 return true; 3603 } 3604 3605 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3606 3607 const unsigned Opc = Inst.getOpcode(); 3608 const MCInstrDesc &Desc = MII.get(Opc); 3609 3610 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3611 return true; 3612 3613 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3614 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3615 if (isCI() || isSI()) 3616 return false; 3617 } 3618 3619 return true; 3620 } 3621 3622 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3623 const unsigned Opc = Inst.getOpcode(); 3624 const MCInstrDesc &Desc = MII.get(Opc); 3625 3626 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3627 return true; 3628 3629 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3630 if (DimIdx < 0) 3631 return true; 3632 3633 long Imm = Inst.getOperand(DimIdx).getImm(); 3634 if (Imm < 0 || Imm >= 8) 3635 return false; 3636 3637 return true; 3638 } 3639 3640 static bool IsRevOpcode(const unsigned Opcode) 3641 { 3642 switch (Opcode) { 3643 case AMDGPU::V_SUBREV_F32_e32: 3644 case AMDGPU::V_SUBREV_F32_e64: 3645 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3646 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3647 case AMDGPU::V_SUBREV_F32_e32_vi: 3648 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3649 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3650 case AMDGPU::V_SUBREV_F32_e64_vi: 3651 3652 case AMDGPU::V_SUBREV_CO_U32_e32: 3653 case AMDGPU::V_SUBREV_CO_U32_e64: 3654 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3655 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3656 3657 case AMDGPU::V_SUBBREV_U32_e32: 3658 case AMDGPU::V_SUBBREV_U32_e64: 3659 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3660 case AMDGPU::V_SUBBREV_U32_e32_vi: 3661 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3662 case AMDGPU::V_SUBBREV_U32_e64_vi: 3663 3664 case AMDGPU::V_SUBREV_U32_e32: 3665 case AMDGPU::V_SUBREV_U32_e64: 3666 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3667 case AMDGPU::V_SUBREV_U32_e32_vi: 3668 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3669 case AMDGPU::V_SUBREV_U32_e64_vi: 3670 3671 case AMDGPU::V_SUBREV_F16_e32: 3672 case AMDGPU::V_SUBREV_F16_e64: 3673 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3674 case AMDGPU::V_SUBREV_F16_e32_vi: 3675 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3676 case AMDGPU::V_SUBREV_F16_e64_vi: 3677 3678 case AMDGPU::V_SUBREV_U16_e32: 3679 case AMDGPU::V_SUBREV_U16_e64: 3680 case AMDGPU::V_SUBREV_U16_e32_vi: 3681 case AMDGPU::V_SUBREV_U16_e64_vi: 3682 3683 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3684 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3685 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3686 3687 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3688 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3689 3690 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3691 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3692 3693 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3694 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3695 3696 case AMDGPU::V_LSHRREV_B32_e32: 3697 case AMDGPU::V_LSHRREV_B32_e64: 3698 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3699 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3700 case AMDGPU::V_LSHRREV_B32_e32_vi: 3701 case AMDGPU::V_LSHRREV_B32_e64_vi: 3702 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3703 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3704 3705 case AMDGPU::V_ASHRREV_I32_e32: 3706 case AMDGPU::V_ASHRREV_I32_e64: 3707 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3708 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3709 case AMDGPU::V_ASHRREV_I32_e32_vi: 3710 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3711 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3712 case AMDGPU::V_ASHRREV_I32_e64_vi: 3713 3714 case AMDGPU::V_LSHLREV_B32_e32: 3715 case AMDGPU::V_LSHLREV_B32_e64: 3716 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3717 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3718 case AMDGPU::V_LSHLREV_B32_e32_vi: 3719 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3720 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3721 case AMDGPU::V_LSHLREV_B32_e64_vi: 3722 3723 case AMDGPU::V_LSHLREV_B16_e32: 3724 case AMDGPU::V_LSHLREV_B16_e64: 3725 case AMDGPU::V_LSHLREV_B16_e32_vi: 3726 case AMDGPU::V_LSHLREV_B16_e64_vi: 3727 case AMDGPU::V_LSHLREV_B16_gfx10: 3728 3729 case AMDGPU::V_LSHRREV_B16_e32: 3730 case AMDGPU::V_LSHRREV_B16_e64: 3731 case AMDGPU::V_LSHRREV_B16_e32_vi: 3732 case AMDGPU::V_LSHRREV_B16_e64_vi: 3733 case AMDGPU::V_LSHRREV_B16_gfx10: 3734 3735 case AMDGPU::V_ASHRREV_I16_e32: 3736 case AMDGPU::V_ASHRREV_I16_e64: 3737 case AMDGPU::V_ASHRREV_I16_e32_vi: 3738 case AMDGPU::V_ASHRREV_I16_e64_vi: 3739 case AMDGPU::V_ASHRREV_I16_gfx10: 3740 3741 case AMDGPU::V_LSHLREV_B64_e64: 3742 case AMDGPU::V_LSHLREV_B64_gfx10: 3743 case AMDGPU::V_LSHLREV_B64_vi: 3744 3745 case AMDGPU::V_LSHRREV_B64_e64: 3746 case AMDGPU::V_LSHRREV_B64_gfx10: 3747 case AMDGPU::V_LSHRREV_B64_vi: 3748 3749 case AMDGPU::V_ASHRREV_I64_e64: 3750 case AMDGPU::V_ASHRREV_I64_gfx10: 3751 case AMDGPU::V_ASHRREV_I64_vi: 3752 3753 case AMDGPU::V_PK_LSHLREV_B16: 3754 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3755 case AMDGPU::V_PK_LSHLREV_B16_vi: 3756 3757 case AMDGPU::V_PK_LSHRREV_B16: 3758 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3759 case AMDGPU::V_PK_LSHRREV_B16_vi: 3760 case AMDGPU::V_PK_ASHRREV_I16: 3761 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3762 case AMDGPU::V_PK_ASHRREV_I16_vi: 3763 return true; 3764 default: 3765 return false; 3766 } 3767 } 3768 3769 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3770 3771 using namespace SIInstrFlags; 3772 const unsigned Opcode = Inst.getOpcode(); 3773 const MCInstrDesc &Desc = MII.get(Opcode); 3774 3775 // lds_direct register is defined so that it can be used 3776 // with 9-bit operands only. Ignore encodings which do not accept these. 3777 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3778 if ((Desc.TSFlags & Enc) == 0) 3779 return None; 3780 3781 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3782 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3783 if (SrcIdx == -1) 3784 break; 3785 const auto &Src = Inst.getOperand(SrcIdx); 3786 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3787 3788 if (isGFX90A()) 3789 return StringRef("lds_direct is not supported on this GPU"); 3790 3791 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3792 return StringRef("lds_direct cannot be used with this instruction"); 3793 3794 if (SrcName != OpName::src0) 3795 return StringRef("lds_direct may be used as src0 only"); 3796 } 3797 } 3798 3799 return None; 3800 } 3801 3802 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3803 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3804 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3805 if (Op.isFlatOffset()) 3806 return Op.getStartLoc(); 3807 } 3808 return getLoc(); 3809 } 3810 3811 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3812 const OperandVector &Operands) { 3813 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3814 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3815 return true; 3816 3817 auto Opcode = Inst.getOpcode(); 3818 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3819 assert(OpNum != -1); 3820 3821 const auto &Op = Inst.getOperand(OpNum); 3822 if (!hasFlatOffsets() && Op.getImm() != 0) { 3823 Error(getFlatOffsetLoc(Operands), 3824 "flat offset modifier is not supported on this GPU"); 3825 return false; 3826 } 3827 3828 // For FLAT segment the offset must be positive; 3829 // MSB is ignored and forced to zero. 3830 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3831 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3832 if (!isIntN(OffsetSize, Op.getImm())) { 3833 Error(getFlatOffsetLoc(Operands), 3834 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3835 return false; 3836 } 3837 } else { 3838 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3839 if (!isUIntN(OffsetSize, Op.getImm())) { 3840 Error(getFlatOffsetLoc(Operands), 3841 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3842 return false; 3843 } 3844 } 3845 3846 return true; 3847 } 3848 3849 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3850 // Start with second operand because SMEM Offset cannot be dst or src0. 3851 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3852 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3853 if (Op.isSMEMOffset()) 3854 return Op.getStartLoc(); 3855 } 3856 return getLoc(); 3857 } 3858 3859 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3860 const OperandVector &Operands) { 3861 if (isCI() || isSI()) 3862 return true; 3863 3864 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3865 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3866 return true; 3867 3868 auto Opcode = Inst.getOpcode(); 3869 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3870 if (OpNum == -1) 3871 return true; 3872 3873 const auto &Op = Inst.getOperand(OpNum); 3874 if (!Op.isImm()) 3875 return true; 3876 3877 uint64_t Offset = Op.getImm(); 3878 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3879 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3880 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3881 return true; 3882 3883 Error(getSMEMOffsetLoc(Operands), 3884 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3885 "expected a 21-bit signed offset"); 3886 3887 return false; 3888 } 3889 3890 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3891 unsigned Opcode = Inst.getOpcode(); 3892 const MCInstrDesc &Desc = MII.get(Opcode); 3893 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3894 return true; 3895 3896 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3897 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3898 3899 const int OpIndices[] = { Src0Idx, Src1Idx }; 3900 3901 unsigned NumExprs = 0; 3902 unsigned NumLiterals = 0; 3903 uint32_t LiteralValue; 3904 3905 for (int OpIdx : OpIndices) { 3906 if (OpIdx == -1) break; 3907 3908 const MCOperand &MO = Inst.getOperand(OpIdx); 3909 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3910 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3911 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3912 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3913 if (NumLiterals == 0 || LiteralValue != Value) { 3914 LiteralValue = Value; 3915 ++NumLiterals; 3916 } 3917 } else if (MO.isExpr()) { 3918 ++NumExprs; 3919 } 3920 } 3921 } 3922 3923 return NumLiterals + NumExprs <= 1; 3924 } 3925 3926 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3927 const unsigned Opc = Inst.getOpcode(); 3928 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3929 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3930 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3931 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3932 3933 if (OpSel & ~3) 3934 return false; 3935 } 3936 return true; 3937 } 3938 3939 // Check if VCC register matches wavefront size 3940 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3941 auto FB = getFeatureBits(); 3942 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3943 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3944 } 3945 3946 // VOP3 literal is only allowed in GFX10+ and only one can be used 3947 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3948 const OperandVector &Operands) { 3949 unsigned Opcode = Inst.getOpcode(); 3950 const MCInstrDesc &Desc = MII.get(Opcode); 3951 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3952 return true; 3953 3954 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3955 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3956 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3957 3958 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3959 3960 unsigned NumExprs = 0; 3961 unsigned NumLiterals = 0; 3962 uint32_t LiteralValue; 3963 3964 for (int OpIdx : OpIndices) { 3965 if (OpIdx == -1) break; 3966 3967 const MCOperand &MO = Inst.getOperand(OpIdx); 3968 if (!MO.isImm() && !MO.isExpr()) 3969 continue; 3970 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3971 continue; 3972 3973 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3974 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3975 Error(getConstLoc(Operands), 3976 "inline constants are not allowed for this operand"); 3977 return false; 3978 } 3979 3980 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3981 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3982 if (NumLiterals == 0 || LiteralValue != Value) { 3983 LiteralValue = Value; 3984 ++NumLiterals; 3985 } 3986 } else if (MO.isExpr()) { 3987 ++NumExprs; 3988 } 3989 } 3990 NumLiterals += NumExprs; 3991 3992 if (!NumLiterals) 3993 return true; 3994 3995 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3996 Error(getLitLoc(Operands), "literal operands are not supported"); 3997 return false; 3998 } 3999 4000 if (NumLiterals > 1) { 4001 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4002 return false; 4003 } 4004 4005 return true; 4006 } 4007 4008 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4009 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4010 const MCRegisterInfo *MRI) { 4011 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4012 if (OpIdx < 0) 4013 return -1; 4014 4015 const MCOperand &Op = Inst.getOperand(OpIdx); 4016 if (!Op.isReg()) 4017 return -1; 4018 4019 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4020 auto Reg = Sub ? Sub : Op.getReg(); 4021 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4022 return AGRP32.contains(Reg) ? 1 : 0; 4023 } 4024 4025 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4026 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4027 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4028 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4029 SIInstrFlags::DS)) == 0) 4030 return true; 4031 4032 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4033 : AMDGPU::OpName::vdata; 4034 4035 const MCRegisterInfo *MRI = getMRI(); 4036 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4037 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4038 4039 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4040 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4041 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4042 return false; 4043 } 4044 4045 auto FB = getFeatureBits(); 4046 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4047 if (DataAreg < 0 || DstAreg < 0) 4048 return true; 4049 return DstAreg == DataAreg; 4050 } 4051 4052 return DstAreg < 1 && DataAreg < 1; 4053 } 4054 4055 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4056 auto FB = getFeatureBits(); 4057 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4058 return true; 4059 4060 const MCRegisterInfo *MRI = getMRI(); 4061 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4062 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4063 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4064 const MCOperand &Op = Inst.getOperand(I); 4065 if (!Op.isReg()) 4066 continue; 4067 4068 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4069 if (!Sub) 4070 continue; 4071 4072 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4073 return false; 4074 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4075 return false; 4076 } 4077 4078 return true; 4079 } 4080 4081 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4082 const OperandVector &Operands, 4083 const SMLoc &IDLoc) { 4084 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4085 AMDGPU::OpName::cpol); 4086 if (CPolPos == -1) 4087 return true; 4088 4089 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4090 4091 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4092 if ((TSFlags & (SIInstrFlags::SMRD)) && 4093 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4094 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4095 return false; 4096 } 4097 4098 if (isGFX90A() && (CPol & CPol::SCC)) { 4099 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4100 StringRef CStr(S.getPointer()); 4101 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4102 Error(S, "scc is not supported on this GPU"); 4103 return false; 4104 } 4105 4106 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4107 return true; 4108 4109 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4110 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4111 Error(IDLoc, "instruction must use glc"); 4112 return false; 4113 } 4114 } else { 4115 if (CPol & CPol::GLC) { 4116 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4117 StringRef CStr(S.getPointer()); 4118 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4119 Error(S, "instruction must not use glc"); 4120 return false; 4121 } 4122 } 4123 4124 return true; 4125 } 4126 4127 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4128 const SMLoc &IDLoc, 4129 const OperandVector &Operands) { 4130 if (auto ErrMsg = validateLdsDirect(Inst)) { 4131 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4132 return false; 4133 } 4134 if (!validateSOPLiteral(Inst)) { 4135 Error(getLitLoc(Operands), 4136 "only one literal operand is allowed"); 4137 return false; 4138 } 4139 if (!validateVOP3Literal(Inst, Operands)) { 4140 return false; 4141 } 4142 if (!validateConstantBusLimitations(Inst, Operands)) { 4143 return false; 4144 } 4145 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4146 return false; 4147 } 4148 if (!validateIntClampSupported(Inst)) { 4149 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4150 "integer clamping is not supported on this GPU"); 4151 return false; 4152 } 4153 if (!validateOpSel(Inst)) { 4154 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4155 "invalid op_sel operand"); 4156 return false; 4157 } 4158 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4159 if (!validateMIMGD16(Inst)) { 4160 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4161 "d16 modifier is not supported on this GPU"); 4162 return false; 4163 } 4164 if (!validateMIMGDim(Inst)) { 4165 Error(IDLoc, "dim modifier is required on this GPU"); 4166 return false; 4167 } 4168 if (!validateMIMGMSAA(Inst)) { 4169 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4170 "invalid dim; must be MSAA type"); 4171 return false; 4172 } 4173 if (!validateMIMGDataSize(Inst)) { 4174 Error(IDLoc, 4175 "image data size does not match dmask and tfe"); 4176 return false; 4177 } 4178 if (!validateMIMGAddrSize(Inst)) { 4179 Error(IDLoc, 4180 "image address size does not match dim and a16"); 4181 return false; 4182 } 4183 if (!validateMIMGAtomicDMask(Inst)) { 4184 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4185 "invalid atomic image dmask"); 4186 return false; 4187 } 4188 if (!validateMIMGGatherDMask(Inst)) { 4189 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4190 "invalid image_gather dmask: only one bit must be set"); 4191 return false; 4192 } 4193 if (!validateMovrels(Inst, Operands)) { 4194 return false; 4195 } 4196 if (!validateFlatOffset(Inst, Operands)) { 4197 return false; 4198 } 4199 if (!validateSMEMOffset(Inst, Operands)) { 4200 return false; 4201 } 4202 if (!validateMAIAccWrite(Inst, Operands)) { 4203 return false; 4204 } 4205 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4206 return false; 4207 } 4208 4209 if (!validateAGPRLdSt(Inst)) { 4210 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4211 ? "invalid register class: data and dst should be all VGPR or AGPR" 4212 : "invalid register class: agpr loads and stores not supported on this GPU" 4213 ); 4214 return false; 4215 } 4216 if (!validateVGPRAlign(Inst)) { 4217 Error(IDLoc, 4218 "invalid register class: vgpr tuples must be 64 bit aligned"); 4219 return false; 4220 } 4221 4222 if (!validateDivScale(Inst)) { 4223 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4224 return false; 4225 } 4226 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4227 return false; 4228 } 4229 4230 return true; 4231 } 4232 4233 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4234 const FeatureBitset &FBS, 4235 unsigned VariantID = 0); 4236 4237 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4238 const FeatureBitset &AvailableFeatures, 4239 unsigned VariantID); 4240 4241 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4242 const FeatureBitset &FBS) { 4243 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4244 } 4245 4246 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4247 const FeatureBitset &FBS, 4248 ArrayRef<unsigned> Variants) { 4249 for (auto Variant : Variants) { 4250 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4251 return true; 4252 } 4253 4254 return false; 4255 } 4256 4257 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4258 const SMLoc &IDLoc) { 4259 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4260 4261 // Check if requested instruction variant is supported. 4262 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4263 return false; 4264 4265 // This instruction is not supported. 4266 // Clear any other pending errors because they are no longer relevant. 4267 getParser().clearPendingErrors(); 4268 4269 // Requested instruction variant is not supported. 4270 // Check if any other variants are supported. 4271 StringRef VariantName = getMatchedVariantName(); 4272 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4273 return Error(IDLoc, 4274 Twine(VariantName, 4275 " variant of this instruction is not supported")); 4276 } 4277 4278 // Finally check if this instruction is supported on any other GPU. 4279 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4280 return Error(IDLoc, "instruction not supported on this GPU"); 4281 } 4282 4283 // Instruction not supported on any GPU. Probably a typo. 4284 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4285 return Error(IDLoc, "invalid instruction" + Suggestion); 4286 } 4287 4288 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4289 OperandVector &Operands, 4290 MCStreamer &Out, 4291 uint64_t &ErrorInfo, 4292 bool MatchingInlineAsm) { 4293 MCInst Inst; 4294 unsigned Result = Match_Success; 4295 for (auto Variant : getMatchedVariants()) { 4296 uint64_t EI; 4297 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4298 Variant); 4299 // We order match statuses from least to most specific. We use most specific 4300 // status as resulting 4301 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4302 if ((R == Match_Success) || 4303 (R == Match_PreferE32) || 4304 (R == Match_MissingFeature && Result != Match_PreferE32) || 4305 (R == Match_InvalidOperand && Result != Match_MissingFeature 4306 && Result != Match_PreferE32) || 4307 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4308 && Result != Match_MissingFeature 4309 && Result != Match_PreferE32)) { 4310 Result = R; 4311 ErrorInfo = EI; 4312 } 4313 if (R == Match_Success) 4314 break; 4315 } 4316 4317 if (Result == Match_Success) { 4318 if (!validateInstruction(Inst, IDLoc, Operands)) { 4319 return true; 4320 } 4321 Inst.setLoc(IDLoc); 4322 Out.emitInstruction(Inst, getSTI()); 4323 return false; 4324 } 4325 4326 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4327 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4328 return true; 4329 } 4330 4331 switch (Result) { 4332 default: break; 4333 case Match_MissingFeature: 4334 // It has been verified that the specified instruction 4335 // mnemonic is valid. A match was found but it requires 4336 // features which are not supported on this GPU. 4337 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4338 4339 case Match_InvalidOperand: { 4340 SMLoc ErrorLoc = IDLoc; 4341 if (ErrorInfo != ~0ULL) { 4342 if (ErrorInfo >= Operands.size()) { 4343 return Error(IDLoc, "too few operands for instruction"); 4344 } 4345 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4346 if (ErrorLoc == SMLoc()) 4347 ErrorLoc = IDLoc; 4348 } 4349 return Error(ErrorLoc, "invalid operand for instruction"); 4350 } 4351 4352 case Match_PreferE32: 4353 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4354 "should be encoded as e32"); 4355 case Match_MnemonicFail: 4356 llvm_unreachable("Invalid instructions should have been handled already"); 4357 } 4358 llvm_unreachable("Implement any new match types added!"); 4359 } 4360 4361 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4362 int64_t Tmp = -1; 4363 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4364 return true; 4365 } 4366 if (getParser().parseAbsoluteExpression(Tmp)) { 4367 return true; 4368 } 4369 Ret = static_cast<uint32_t>(Tmp); 4370 return false; 4371 } 4372 4373 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4374 uint32_t &Minor) { 4375 if (ParseAsAbsoluteExpression(Major)) 4376 return TokError("invalid major version"); 4377 4378 if (!trySkipToken(AsmToken::Comma)) 4379 return TokError("minor version number required, comma expected"); 4380 4381 if (ParseAsAbsoluteExpression(Minor)) 4382 return TokError("invalid minor version"); 4383 4384 return false; 4385 } 4386 4387 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4388 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4389 return TokError("directive only supported for amdgcn architecture"); 4390 4391 std::string TargetIDDirective; 4392 SMLoc TargetStart = getTok().getLoc(); 4393 if (getParser().parseEscapedString(TargetIDDirective)) 4394 return true; 4395 4396 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4397 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4398 return getParser().Error(TargetRange.Start, 4399 (Twine(".amdgcn_target directive's target id ") + 4400 Twine(TargetIDDirective) + 4401 Twine(" does not match the specified target id ") + 4402 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4403 4404 return false; 4405 } 4406 4407 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4408 return Error(Range.Start, "value out of range", Range); 4409 } 4410 4411 bool AMDGPUAsmParser::calculateGPRBlocks( 4412 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4413 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4414 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4415 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4416 // TODO(scott.linder): These calculations are duplicated from 4417 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4418 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4419 4420 unsigned NumVGPRs = NextFreeVGPR; 4421 unsigned NumSGPRs = NextFreeSGPR; 4422 4423 if (Version.Major >= 10) 4424 NumSGPRs = 0; 4425 else { 4426 unsigned MaxAddressableNumSGPRs = 4427 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4428 4429 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4430 NumSGPRs > MaxAddressableNumSGPRs) 4431 return OutOfRangeError(SGPRRange); 4432 4433 NumSGPRs += 4434 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4435 4436 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4437 NumSGPRs > MaxAddressableNumSGPRs) 4438 return OutOfRangeError(SGPRRange); 4439 4440 if (Features.test(FeatureSGPRInitBug)) 4441 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4442 } 4443 4444 VGPRBlocks = 4445 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4446 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4447 4448 return false; 4449 } 4450 4451 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4452 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4453 return TokError("directive only supported for amdgcn architecture"); 4454 4455 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4456 return TokError("directive only supported for amdhsa OS"); 4457 4458 StringRef KernelName; 4459 if (getParser().parseIdentifier(KernelName)) 4460 return true; 4461 4462 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4463 4464 StringSet<> Seen; 4465 4466 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4467 4468 SMRange VGPRRange; 4469 uint64_t NextFreeVGPR = 0; 4470 uint64_t AccumOffset = 0; 4471 SMRange SGPRRange; 4472 uint64_t NextFreeSGPR = 0; 4473 unsigned UserSGPRCount = 0; 4474 bool ReserveVCC = true; 4475 bool ReserveFlatScr = true; 4476 Optional<bool> EnableWavefrontSize32; 4477 4478 while (true) { 4479 while (trySkipToken(AsmToken::EndOfStatement)); 4480 4481 StringRef ID; 4482 SMRange IDRange = getTok().getLocRange(); 4483 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4484 return true; 4485 4486 if (ID == ".end_amdhsa_kernel") 4487 break; 4488 4489 if (Seen.find(ID) != Seen.end()) 4490 return TokError(".amdhsa_ directives cannot be repeated"); 4491 Seen.insert(ID); 4492 4493 SMLoc ValStart = getLoc(); 4494 int64_t IVal; 4495 if (getParser().parseAbsoluteExpression(IVal)) 4496 return true; 4497 SMLoc ValEnd = getLoc(); 4498 SMRange ValRange = SMRange(ValStart, ValEnd); 4499 4500 if (IVal < 0) 4501 return OutOfRangeError(ValRange); 4502 4503 uint64_t Val = IVal; 4504 4505 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4506 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4507 return OutOfRangeError(RANGE); \ 4508 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4509 4510 if (ID == ".amdhsa_group_segment_fixed_size") { 4511 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4512 return OutOfRangeError(ValRange); 4513 KD.group_segment_fixed_size = Val; 4514 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4515 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4516 return OutOfRangeError(ValRange); 4517 KD.private_segment_fixed_size = Val; 4518 } else if (ID == ".amdhsa_kernarg_size") { 4519 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4520 return OutOfRangeError(ValRange); 4521 KD.kernarg_size = Val; 4522 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4523 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4524 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4525 Val, ValRange); 4526 if (Val) 4527 UserSGPRCount += 4; 4528 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4529 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4530 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4531 ValRange); 4532 if (Val) 4533 UserSGPRCount += 2; 4534 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4535 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4536 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4537 ValRange); 4538 if (Val) 4539 UserSGPRCount += 2; 4540 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4541 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4542 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4543 Val, ValRange); 4544 if (Val) 4545 UserSGPRCount += 2; 4546 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4547 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4548 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4549 ValRange); 4550 if (Val) 4551 UserSGPRCount += 2; 4552 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4553 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4555 ValRange); 4556 if (Val) 4557 UserSGPRCount += 2; 4558 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4559 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4560 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4561 Val, ValRange); 4562 if (Val) 4563 UserSGPRCount += 1; 4564 } else if (ID == ".amdhsa_wavefront_size32") { 4565 if (IVersion.Major < 10) 4566 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4567 EnableWavefrontSize32 = Val; 4568 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4569 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4570 Val, ValRange); 4571 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4572 PARSE_BITS_ENTRY( 4573 KD.compute_pgm_rsrc2, 4574 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4575 ValRange); 4576 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4577 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4578 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4579 ValRange); 4580 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4581 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4582 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4583 ValRange); 4584 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4585 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4586 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4587 ValRange); 4588 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4589 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4590 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4591 ValRange); 4592 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4593 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4594 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4595 ValRange); 4596 } else if (ID == ".amdhsa_next_free_vgpr") { 4597 VGPRRange = ValRange; 4598 NextFreeVGPR = Val; 4599 } else if (ID == ".amdhsa_next_free_sgpr") { 4600 SGPRRange = ValRange; 4601 NextFreeSGPR = Val; 4602 } else if (ID == ".amdhsa_accum_offset") { 4603 if (!isGFX90A()) 4604 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4605 AccumOffset = Val; 4606 } else if (ID == ".amdhsa_reserve_vcc") { 4607 if (!isUInt<1>(Val)) 4608 return OutOfRangeError(ValRange); 4609 ReserveVCC = Val; 4610 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4611 if (IVersion.Major < 7) 4612 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4613 if (!isUInt<1>(Val)) 4614 return OutOfRangeError(ValRange); 4615 ReserveFlatScr = Val; 4616 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4617 if (IVersion.Major < 8) 4618 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4619 if (!isUInt<1>(Val)) 4620 return OutOfRangeError(ValRange); 4621 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4622 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4623 IDRange); 4624 } else if (ID == ".amdhsa_float_round_mode_32") { 4625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4626 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4627 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4628 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4629 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4630 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4631 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4632 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4633 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4634 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4635 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4636 ValRange); 4637 } else if (ID == ".amdhsa_dx10_clamp") { 4638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4639 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4640 } else if (ID == ".amdhsa_ieee_mode") { 4641 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4642 Val, ValRange); 4643 } else if (ID == ".amdhsa_fp16_overflow") { 4644 if (IVersion.Major < 9) 4645 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4646 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4647 ValRange); 4648 } else if (ID == ".amdhsa_tg_split") { 4649 if (!isGFX90A()) 4650 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4651 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4652 ValRange); 4653 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4654 if (IVersion.Major < 10) 4655 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4656 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4657 ValRange); 4658 } else if (ID == ".amdhsa_memory_ordered") { 4659 if (IVersion.Major < 10) 4660 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4661 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4662 ValRange); 4663 } else if (ID == ".amdhsa_forward_progress") { 4664 if (IVersion.Major < 10) 4665 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4666 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4667 ValRange); 4668 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4669 PARSE_BITS_ENTRY( 4670 KD.compute_pgm_rsrc2, 4671 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4672 ValRange); 4673 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4674 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4675 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4676 Val, ValRange); 4677 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4678 PARSE_BITS_ENTRY( 4679 KD.compute_pgm_rsrc2, 4680 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4681 ValRange); 4682 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4683 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4684 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4685 Val, ValRange); 4686 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4687 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4688 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4689 Val, ValRange); 4690 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4691 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4692 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4693 Val, ValRange); 4694 } else if (ID == ".amdhsa_exception_int_div_zero") { 4695 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4696 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4697 Val, ValRange); 4698 } else { 4699 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4700 } 4701 4702 #undef PARSE_BITS_ENTRY 4703 } 4704 4705 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4706 return TokError(".amdhsa_next_free_vgpr directive is required"); 4707 4708 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4709 return TokError(".amdhsa_next_free_sgpr directive is required"); 4710 4711 unsigned VGPRBlocks; 4712 unsigned SGPRBlocks; 4713 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4714 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4715 EnableWavefrontSize32, NextFreeVGPR, 4716 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4717 SGPRBlocks)) 4718 return true; 4719 4720 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4721 VGPRBlocks)) 4722 return OutOfRangeError(VGPRRange); 4723 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4724 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4725 4726 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4727 SGPRBlocks)) 4728 return OutOfRangeError(SGPRRange); 4729 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4730 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4731 SGPRBlocks); 4732 4733 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4734 return TokError("too many user SGPRs enabled"); 4735 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4736 UserSGPRCount); 4737 4738 if (isGFX90A()) { 4739 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4740 return TokError(".amdhsa_accum_offset directive is required"); 4741 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4742 return TokError("accum_offset should be in range [4..256] in " 4743 "increments of 4"); 4744 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4745 return TokError("accum_offset exceeds total VGPR allocation"); 4746 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4747 (AccumOffset / 4 - 1)); 4748 } 4749 4750 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4751 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4752 ReserveFlatScr); 4753 return false; 4754 } 4755 4756 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4757 uint32_t Major; 4758 uint32_t Minor; 4759 4760 if (ParseDirectiveMajorMinor(Major, Minor)) 4761 return true; 4762 4763 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4764 return false; 4765 } 4766 4767 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4768 uint32_t Major; 4769 uint32_t Minor; 4770 uint32_t Stepping; 4771 StringRef VendorName; 4772 StringRef ArchName; 4773 4774 // If this directive has no arguments, then use the ISA version for the 4775 // targeted GPU. 4776 if (isToken(AsmToken::EndOfStatement)) { 4777 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4778 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4779 ISA.Stepping, 4780 "AMD", "AMDGPU"); 4781 return false; 4782 } 4783 4784 if (ParseDirectiveMajorMinor(Major, Minor)) 4785 return true; 4786 4787 if (!trySkipToken(AsmToken::Comma)) 4788 return TokError("stepping version number required, comma expected"); 4789 4790 if (ParseAsAbsoluteExpression(Stepping)) 4791 return TokError("invalid stepping version"); 4792 4793 if (!trySkipToken(AsmToken::Comma)) 4794 return TokError("vendor name required, comma expected"); 4795 4796 if (!parseString(VendorName, "invalid vendor name")) 4797 return true; 4798 4799 if (!trySkipToken(AsmToken::Comma)) 4800 return TokError("arch name required, comma expected"); 4801 4802 if (!parseString(ArchName, "invalid arch name")) 4803 return true; 4804 4805 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4806 VendorName, ArchName); 4807 return false; 4808 } 4809 4810 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4811 amd_kernel_code_t &Header) { 4812 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4813 // assembly for backwards compatibility. 4814 if (ID == "max_scratch_backing_memory_byte_size") { 4815 Parser.eatToEndOfStatement(); 4816 return false; 4817 } 4818 4819 SmallString<40> ErrStr; 4820 raw_svector_ostream Err(ErrStr); 4821 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4822 return TokError(Err.str()); 4823 } 4824 Lex(); 4825 4826 if (ID == "enable_wavefront_size32") { 4827 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4828 if (!isGFX10Plus()) 4829 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4830 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4831 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4832 } else { 4833 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4834 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4835 } 4836 } 4837 4838 if (ID == "wavefront_size") { 4839 if (Header.wavefront_size == 5) { 4840 if (!isGFX10Plus()) 4841 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4842 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4843 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4844 } else if (Header.wavefront_size == 6) { 4845 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4846 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4847 } 4848 } 4849 4850 if (ID == "enable_wgp_mode") { 4851 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4852 !isGFX10Plus()) 4853 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4854 } 4855 4856 if (ID == "enable_mem_ordered") { 4857 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4858 !isGFX10Plus()) 4859 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4860 } 4861 4862 if (ID == "enable_fwd_progress") { 4863 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4864 !isGFX10Plus()) 4865 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4866 } 4867 4868 return false; 4869 } 4870 4871 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4872 amd_kernel_code_t Header; 4873 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4874 4875 while (true) { 4876 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4877 // will set the current token to EndOfStatement. 4878 while(trySkipToken(AsmToken::EndOfStatement)); 4879 4880 StringRef ID; 4881 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4882 return true; 4883 4884 if (ID == ".end_amd_kernel_code_t") 4885 break; 4886 4887 if (ParseAMDKernelCodeTValue(ID, Header)) 4888 return true; 4889 } 4890 4891 getTargetStreamer().EmitAMDKernelCodeT(Header); 4892 4893 return false; 4894 } 4895 4896 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4897 StringRef KernelName; 4898 if (!parseId(KernelName, "expected symbol name")) 4899 return true; 4900 4901 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4902 ELF::STT_AMDGPU_HSA_KERNEL); 4903 4904 KernelScope.initialize(getContext()); 4905 return false; 4906 } 4907 4908 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4909 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4910 return Error(getLoc(), 4911 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4912 "architectures"); 4913 } 4914 4915 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4916 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4917 return Error(getParser().getTok().getLoc(), "target id must match options"); 4918 4919 getTargetStreamer().EmitISAVersion(); 4920 Lex(); 4921 4922 return false; 4923 } 4924 4925 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4926 const char *AssemblerDirectiveBegin; 4927 const char *AssemblerDirectiveEnd; 4928 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4929 isHsaAbiVersion3Or4(&getSTI()) 4930 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4931 HSAMD::V3::AssemblerDirectiveEnd) 4932 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4933 HSAMD::AssemblerDirectiveEnd); 4934 4935 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4936 return Error(getLoc(), 4937 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4938 "not available on non-amdhsa OSes")).str()); 4939 } 4940 4941 std::string HSAMetadataString; 4942 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4943 HSAMetadataString)) 4944 return true; 4945 4946 if (isHsaAbiVersion3Or4(&getSTI())) { 4947 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4948 return Error(getLoc(), "invalid HSA metadata"); 4949 } else { 4950 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4951 return Error(getLoc(), "invalid HSA metadata"); 4952 } 4953 4954 return false; 4955 } 4956 4957 /// Common code to parse out a block of text (typically YAML) between start and 4958 /// end directives. 4959 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4960 const char *AssemblerDirectiveEnd, 4961 std::string &CollectString) { 4962 4963 raw_string_ostream CollectStream(CollectString); 4964 4965 getLexer().setSkipSpace(false); 4966 4967 bool FoundEnd = false; 4968 while (!isToken(AsmToken::Eof)) { 4969 while (isToken(AsmToken::Space)) { 4970 CollectStream << getTokenStr(); 4971 Lex(); 4972 } 4973 4974 if (trySkipId(AssemblerDirectiveEnd)) { 4975 FoundEnd = true; 4976 break; 4977 } 4978 4979 CollectStream << Parser.parseStringToEndOfStatement() 4980 << getContext().getAsmInfo()->getSeparatorString(); 4981 4982 Parser.eatToEndOfStatement(); 4983 } 4984 4985 getLexer().setSkipSpace(true); 4986 4987 if (isToken(AsmToken::Eof) && !FoundEnd) { 4988 return TokError(Twine("expected directive ") + 4989 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4990 } 4991 4992 CollectStream.flush(); 4993 return false; 4994 } 4995 4996 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4997 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4998 std::string String; 4999 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5000 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5001 return true; 5002 5003 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5004 if (!PALMetadata->setFromString(String)) 5005 return Error(getLoc(), "invalid PAL metadata"); 5006 return false; 5007 } 5008 5009 /// Parse the assembler directive for old linear-format PAL metadata. 5010 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5011 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5012 return Error(getLoc(), 5013 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5014 "not available on non-amdpal OSes")).str()); 5015 } 5016 5017 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5018 PALMetadata->setLegacy(); 5019 for (;;) { 5020 uint32_t Key, Value; 5021 if (ParseAsAbsoluteExpression(Key)) { 5022 return TokError(Twine("invalid value in ") + 5023 Twine(PALMD::AssemblerDirective)); 5024 } 5025 if (!trySkipToken(AsmToken::Comma)) { 5026 return TokError(Twine("expected an even number of values in ") + 5027 Twine(PALMD::AssemblerDirective)); 5028 } 5029 if (ParseAsAbsoluteExpression(Value)) { 5030 return TokError(Twine("invalid value in ") + 5031 Twine(PALMD::AssemblerDirective)); 5032 } 5033 PALMetadata->setRegister(Key, Value); 5034 if (!trySkipToken(AsmToken::Comma)) 5035 break; 5036 } 5037 return false; 5038 } 5039 5040 /// ParseDirectiveAMDGPULDS 5041 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5042 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5043 if (getParser().checkForValidSection()) 5044 return true; 5045 5046 StringRef Name; 5047 SMLoc NameLoc = getLoc(); 5048 if (getParser().parseIdentifier(Name)) 5049 return TokError("expected identifier in directive"); 5050 5051 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5052 if (parseToken(AsmToken::Comma, "expected ','")) 5053 return true; 5054 5055 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5056 5057 int64_t Size; 5058 SMLoc SizeLoc = getLoc(); 5059 if (getParser().parseAbsoluteExpression(Size)) 5060 return true; 5061 if (Size < 0) 5062 return Error(SizeLoc, "size must be non-negative"); 5063 if (Size > LocalMemorySize) 5064 return Error(SizeLoc, "size is too large"); 5065 5066 int64_t Alignment = 4; 5067 if (trySkipToken(AsmToken::Comma)) { 5068 SMLoc AlignLoc = getLoc(); 5069 if (getParser().parseAbsoluteExpression(Alignment)) 5070 return true; 5071 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5072 return Error(AlignLoc, "alignment must be a power of two"); 5073 5074 // Alignment larger than the size of LDS is possible in theory, as long 5075 // as the linker manages to place to symbol at address 0, but we do want 5076 // to make sure the alignment fits nicely into a 32-bit integer. 5077 if (Alignment >= 1u << 31) 5078 return Error(AlignLoc, "alignment is too large"); 5079 } 5080 5081 if (parseToken(AsmToken::EndOfStatement, 5082 "unexpected token in '.amdgpu_lds' directive")) 5083 return true; 5084 5085 Symbol->redefineIfPossible(); 5086 if (!Symbol->isUndefined()) 5087 return Error(NameLoc, "invalid symbol redefinition"); 5088 5089 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5090 return false; 5091 } 5092 5093 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5094 StringRef IDVal = DirectiveID.getString(); 5095 5096 if (isHsaAbiVersion3Or4(&getSTI())) { 5097 if (IDVal == ".amdhsa_kernel") 5098 return ParseDirectiveAMDHSAKernel(); 5099 5100 // TODO: Restructure/combine with PAL metadata directive. 5101 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5102 return ParseDirectiveHSAMetadata(); 5103 } else { 5104 if (IDVal == ".hsa_code_object_version") 5105 return ParseDirectiveHSACodeObjectVersion(); 5106 5107 if (IDVal == ".hsa_code_object_isa") 5108 return ParseDirectiveHSACodeObjectISA(); 5109 5110 if (IDVal == ".amd_kernel_code_t") 5111 return ParseDirectiveAMDKernelCodeT(); 5112 5113 if (IDVal == ".amdgpu_hsa_kernel") 5114 return ParseDirectiveAMDGPUHsaKernel(); 5115 5116 if (IDVal == ".amd_amdgpu_isa") 5117 return ParseDirectiveISAVersion(); 5118 5119 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5120 return ParseDirectiveHSAMetadata(); 5121 } 5122 5123 if (IDVal == ".amdgcn_target") 5124 return ParseDirectiveAMDGCNTarget(); 5125 5126 if (IDVal == ".amdgpu_lds") 5127 return ParseDirectiveAMDGPULDS(); 5128 5129 if (IDVal == PALMD::AssemblerDirectiveBegin) 5130 return ParseDirectivePALMetadataBegin(); 5131 5132 if (IDVal == PALMD::AssemblerDirective) 5133 return ParseDirectivePALMetadata(); 5134 5135 return true; 5136 } 5137 5138 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5139 unsigned RegNo) { 5140 5141 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5142 R.isValid(); ++R) { 5143 if (*R == RegNo) 5144 return isGFX9Plus(); 5145 } 5146 5147 // GFX10 has 2 more SGPRs 104 and 105. 5148 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5149 R.isValid(); ++R) { 5150 if (*R == RegNo) 5151 return hasSGPR104_SGPR105(); 5152 } 5153 5154 switch (RegNo) { 5155 case AMDGPU::SRC_SHARED_BASE: 5156 case AMDGPU::SRC_SHARED_LIMIT: 5157 case AMDGPU::SRC_PRIVATE_BASE: 5158 case AMDGPU::SRC_PRIVATE_LIMIT: 5159 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5160 return isGFX9Plus(); 5161 case AMDGPU::TBA: 5162 case AMDGPU::TBA_LO: 5163 case AMDGPU::TBA_HI: 5164 case AMDGPU::TMA: 5165 case AMDGPU::TMA_LO: 5166 case AMDGPU::TMA_HI: 5167 return !isGFX9Plus(); 5168 case AMDGPU::XNACK_MASK: 5169 case AMDGPU::XNACK_MASK_LO: 5170 case AMDGPU::XNACK_MASK_HI: 5171 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5172 case AMDGPU::SGPR_NULL: 5173 return isGFX10Plus(); 5174 default: 5175 break; 5176 } 5177 5178 if (isCI()) 5179 return true; 5180 5181 if (isSI() || isGFX10Plus()) { 5182 // No flat_scr on SI. 5183 // On GFX10 flat scratch is not a valid register operand and can only be 5184 // accessed with s_setreg/s_getreg. 5185 switch (RegNo) { 5186 case AMDGPU::FLAT_SCR: 5187 case AMDGPU::FLAT_SCR_LO: 5188 case AMDGPU::FLAT_SCR_HI: 5189 return false; 5190 default: 5191 return true; 5192 } 5193 } 5194 5195 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5196 // SI/CI have. 5197 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5198 R.isValid(); ++R) { 5199 if (*R == RegNo) 5200 return hasSGPR102_SGPR103(); 5201 } 5202 5203 return true; 5204 } 5205 5206 OperandMatchResultTy 5207 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5208 OperandMode Mode) { 5209 // Try to parse with a custom parser 5210 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5211 5212 // If we successfully parsed the operand or if there as an error parsing, 5213 // we are done. 5214 // 5215 // If we are parsing after we reach EndOfStatement then this means we 5216 // are appending default values to the Operands list. This is only done 5217 // by custom parser, so we shouldn't continue on to the generic parsing. 5218 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5219 isToken(AsmToken::EndOfStatement)) 5220 return ResTy; 5221 5222 SMLoc RBraceLoc; 5223 SMLoc LBraceLoc = getLoc(); 5224 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5225 unsigned Prefix = Operands.size(); 5226 5227 for (;;) { 5228 auto Loc = getLoc(); 5229 ResTy = parseReg(Operands); 5230 if (ResTy == MatchOperand_NoMatch) 5231 Error(Loc, "expected a register"); 5232 if (ResTy != MatchOperand_Success) 5233 return MatchOperand_ParseFail; 5234 5235 RBraceLoc = getLoc(); 5236 if (trySkipToken(AsmToken::RBrac)) 5237 break; 5238 5239 if (!skipToken(AsmToken::Comma, 5240 "expected a comma or a closing square bracket")) { 5241 return MatchOperand_ParseFail; 5242 } 5243 } 5244 5245 if (Operands.size() - Prefix > 1) { 5246 Operands.insert(Operands.begin() + Prefix, 5247 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5248 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5249 } 5250 5251 return MatchOperand_Success; 5252 } 5253 5254 return parseRegOrImm(Operands); 5255 } 5256 5257 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5258 // Clear any forced encodings from the previous instruction. 5259 setForcedEncodingSize(0); 5260 setForcedDPP(false); 5261 setForcedSDWA(false); 5262 5263 if (Name.endswith("_e64")) { 5264 setForcedEncodingSize(64); 5265 return Name.substr(0, Name.size() - 4); 5266 } else if (Name.endswith("_e32")) { 5267 setForcedEncodingSize(32); 5268 return Name.substr(0, Name.size() - 4); 5269 } else if (Name.endswith("_dpp")) { 5270 setForcedDPP(true); 5271 return Name.substr(0, Name.size() - 4); 5272 } else if (Name.endswith("_sdwa")) { 5273 setForcedSDWA(true); 5274 return Name.substr(0, Name.size() - 5); 5275 } 5276 return Name; 5277 } 5278 5279 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5280 StringRef Name, 5281 SMLoc NameLoc, OperandVector &Operands) { 5282 // Add the instruction mnemonic 5283 Name = parseMnemonicSuffix(Name); 5284 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5285 5286 bool IsMIMG = Name.startswith("image_"); 5287 5288 while (!trySkipToken(AsmToken::EndOfStatement)) { 5289 OperandMode Mode = OperandMode_Default; 5290 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5291 Mode = OperandMode_NSA; 5292 CPolSeen = 0; 5293 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5294 5295 if (Res != MatchOperand_Success) { 5296 checkUnsupportedInstruction(Name, NameLoc); 5297 if (!Parser.hasPendingError()) { 5298 // FIXME: use real operand location rather than the current location. 5299 StringRef Msg = 5300 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5301 "not a valid operand."; 5302 Error(getLoc(), Msg); 5303 } 5304 while (!trySkipToken(AsmToken::EndOfStatement)) { 5305 lex(); 5306 } 5307 return true; 5308 } 5309 5310 // Eat the comma or space if there is one. 5311 trySkipToken(AsmToken::Comma); 5312 } 5313 5314 return false; 5315 } 5316 5317 //===----------------------------------------------------------------------===// 5318 // Utility functions 5319 //===----------------------------------------------------------------------===// 5320 5321 OperandMatchResultTy 5322 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5323 5324 if (!trySkipId(Prefix, AsmToken::Colon)) 5325 return MatchOperand_NoMatch; 5326 5327 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5328 } 5329 5330 OperandMatchResultTy 5331 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5332 AMDGPUOperand::ImmTy ImmTy, 5333 bool (*ConvertResult)(int64_t&)) { 5334 SMLoc S = getLoc(); 5335 int64_t Value = 0; 5336 5337 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5338 if (Res != MatchOperand_Success) 5339 return Res; 5340 5341 if (ConvertResult && !ConvertResult(Value)) { 5342 Error(S, "invalid " + StringRef(Prefix) + " value."); 5343 } 5344 5345 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5346 return MatchOperand_Success; 5347 } 5348 5349 OperandMatchResultTy 5350 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5351 OperandVector &Operands, 5352 AMDGPUOperand::ImmTy ImmTy, 5353 bool (*ConvertResult)(int64_t&)) { 5354 SMLoc S = getLoc(); 5355 if (!trySkipId(Prefix, AsmToken::Colon)) 5356 return MatchOperand_NoMatch; 5357 5358 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5359 return MatchOperand_ParseFail; 5360 5361 unsigned Val = 0; 5362 const unsigned MaxSize = 4; 5363 5364 // FIXME: How to verify the number of elements matches the number of src 5365 // operands? 5366 for (int I = 0; ; ++I) { 5367 int64_t Op; 5368 SMLoc Loc = getLoc(); 5369 if (!parseExpr(Op)) 5370 return MatchOperand_ParseFail; 5371 5372 if (Op != 0 && Op != 1) { 5373 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5374 return MatchOperand_ParseFail; 5375 } 5376 5377 Val |= (Op << I); 5378 5379 if (trySkipToken(AsmToken::RBrac)) 5380 break; 5381 5382 if (I + 1 == MaxSize) { 5383 Error(getLoc(), "expected a closing square bracket"); 5384 return MatchOperand_ParseFail; 5385 } 5386 5387 if (!skipToken(AsmToken::Comma, "expected a comma")) 5388 return MatchOperand_ParseFail; 5389 } 5390 5391 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5392 return MatchOperand_Success; 5393 } 5394 5395 OperandMatchResultTy 5396 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5397 AMDGPUOperand::ImmTy ImmTy) { 5398 int64_t Bit; 5399 SMLoc S = getLoc(); 5400 5401 if (trySkipId(Name)) { 5402 Bit = 1; 5403 } else if (trySkipId("no", Name)) { 5404 Bit = 0; 5405 } else { 5406 return MatchOperand_NoMatch; 5407 } 5408 5409 if (Name == "r128" && !hasMIMG_R128()) { 5410 Error(S, "r128 modifier is not supported on this GPU"); 5411 return MatchOperand_ParseFail; 5412 } 5413 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5414 Error(S, "a16 modifier is not supported on this GPU"); 5415 return MatchOperand_ParseFail; 5416 } 5417 5418 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5419 ImmTy = AMDGPUOperand::ImmTyR128A16; 5420 5421 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5422 return MatchOperand_Success; 5423 } 5424 5425 OperandMatchResultTy 5426 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5427 unsigned CPolOn = 0; 5428 unsigned CPolOff = 0; 5429 SMLoc S = getLoc(); 5430 5431 if (trySkipId("glc")) 5432 CPolOn = AMDGPU::CPol::GLC; 5433 else if (trySkipId("noglc")) 5434 CPolOff = AMDGPU::CPol::GLC; 5435 else if (trySkipId("slc")) 5436 CPolOn = AMDGPU::CPol::SLC; 5437 else if (trySkipId("noslc")) 5438 CPolOff = AMDGPU::CPol::SLC; 5439 else if (trySkipId("dlc")) 5440 CPolOn = AMDGPU::CPol::DLC; 5441 else if (trySkipId("nodlc")) 5442 CPolOff = AMDGPU::CPol::DLC; 5443 else if (trySkipId("scc")) 5444 CPolOn = AMDGPU::CPol::SCC; 5445 else if (trySkipId("noscc")) 5446 CPolOff = AMDGPU::CPol::SCC; 5447 else 5448 return MatchOperand_NoMatch; 5449 5450 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5451 Error(S, "dlc modifier is not supported on this GPU"); 5452 return MatchOperand_ParseFail; 5453 } 5454 5455 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5456 Error(S, "scc modifier is not supported on this GPU"); 5457 return MatchOperand_ParseFail; 5458 } 5459 5460 if (CPolSeen & (CPolOn | CPolOff)) { 5461 Error(S, "duplicate cache policy modifier"); 5462 return MatchOperand_ParseFail; 5463 } 5464 5465 CPolSeen |= (CPolOn | CPolOff); 5466 5467 for (unsigned I = 1; I != Operands.size(); ++I) { 5468 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5469 if (Op.isCPol()) { 5470 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5471 return MatchOperand_Success; 5472 } 5473 } 5474 5475 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5476 AMDGPUOperand::ImmTyCPol)); 5477 5478 return MatchOperand_Success; 5479 } 5480 5481 static void addOptionalImmOperand( 5482 MCInst& Inst, const OperandVector& Operands, 5483 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5484 AMDGPUOperand::ImmTy ImmT, 5485 int64_t Default = 0) { 5486 auto i = OptionalIdx.find(ImmT); 5487 if (i != OptionalIdx.end()) { 5488 unsigned Idx = i->second; 5489 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5490 } else { 5491 Inst.addOperand(MCOperand::createImm(Default)); 5492 } 5493 } 5494 5495 OperandMatchResultTy 5496 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5497 StringRef &Value, 5498 SMLoc &StringLoc) { 5499 if (!trySkipId(Prefix, AsmToken::Colon)) 5500 return MatchOperand_NoMatch; 5501 5502 StringLoc = getLoc(); 5503 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5504 : MatchOperand_ParseFail; 5505 } 5506 5507 //===----------------------------------------------------------------------===// 5508 // MTBUF format 5509 //===----------------------------------------------------------------------===// 5510 5511 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5512 int64_t MaxVal, 5513 int64_t &Fmt) { 5514 int64_t Val; 5515 SMLoc Loc = getLoc(); 5516 5517 auto Res = parseIntWithPrefix(Pref, Val); 5518 if (Res == MatchOperand_ParseFail) 5519 return false; 5520 if (Res == MatchOperand_NoMatch) 5521 return true; 5522 5523 if (Val < 0 || Val > MaxVal) { 5524 Error(Loc, Twine("out of range ", StringRef(Pref))); 5525 return false; 5526 } 5527 5528 Fmt = Val; 5529 return true; 5530 } 5531 5532 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5533 // values to live in a joint format operand in the MCInst encoding. 5534 OperandMatchResultTy 5535 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5536 using namespace llvm::AMDGPU::MTBUFFormat; 5537 5538 int64_t Dfmt = DFMT_UNDEF; 5539 int64_t Nfmt = NFMT_UNDEF; 5540 5541 // dfmt and nfmt can appear in either order, and each is optional. 5542 for (int I = 0; I < 2; ++I) { 5543 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5544 return MatchOperand_ParseFail; 5545 5546 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5547 return MatchOperand_ParseFail; 5548 } 5549 // Skip optional comma between dfmt/nfmt 5550 // but guard against 2 commas following each other. 5551 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5552 !peekToken().is(AsmToken::Comma)) { 5553 trySkipToken(AsmToken::Comma); 5554 } 5555 } 5556 5557 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5558 return MatchOperand_NoMatch; 5559 5560 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5561 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5562 5563 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5564 return MatchOperand_Success; 5565 } 5566 5567 OperandMatchResultTy 5568 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5569 using namespace llvm::AMDGPU::MTBUFFormat; 5570 5571 int64_t Fmt = UFMT_UNDEF; 5572 5573 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5574 return MatchOperand_ParseFail; 5575 5576 if (Fmt == UFMT_UNDEF) 5577 return MatchOperand_NoMatch; 5578 5579 Format = Fmt; 5580 return MatchOperand_Success; 5581 } 5582 5583 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5584 int64_t &Nfmt, 5585 StringRef FormatStr, 5586 SMLoc Loc) { 5587 using namespace llvm::AMDGPU::MTBUFFormat; 5588 int64_t Format; 5589 5590 Format = getDfmt(FormatStr); 5591 if (Format != DFMT_UNDEF) { 5592 Dfmt = Format; 5593 return true; 5594 } 5595 5596 Format = getNfmt(FormatStr, getSTI()); 5597 if (Format != NFMT_UNDEF) { 5598 Nfmt = Format; 5599 return true; 5600 } 5601 5602 Error(Loc, "unsupported format"); 5603 return false; 5604 } 5605 5606 OperandMatchResultTy 5607 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5608 SMLoc FormatLoc, 5609 int64_t &Format) { 5610 using namespace llvm::AMDGPU::MTBUFFormat; 5611 5612 int64_t Dfmt = DFMT_UNDEF; 5613 int64_t Nfmt = NFMT_UNDEF; 5614 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5615 return MatchOperand_ParseFail; 5616 5617 if (trySkipToken(AsmToken::Comma)) { 5618 StringRef Str; 5619 SMLoc Loc = getLoc(); 5620 if (!parseId(Str, "expected a format string") || 5621 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5622 return MatchOperand_ParseFail; 5623 } 5624 if (Dfmt == DFMT_UNDEF) { 5625 Error(Loc, "duplicate numeric format"); 5626 return MatchOperand_ParseFail; 5627 } else if (Nfmt == NFMT_UNDEF) { 5628 Error(Loc, "duplicate data format"); 5629 return MatchOperand_ParseFail; 5630 } 5631 } 5632 5633 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5634 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5635 5636 if (isGFX10Plus()) { 5637 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5638 if (Ufmt == UFMT_UNDEF) { 5639 Error(FormatLoc, "unsupported format"); 5640 return MatchOperand_ParseFail; 5641 } 5642 Format = Ufmt; 5643 } else { 5644 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5645 } 5646 5647 return MatchOperand_Success; 5648 } 5649 5650 OperandMatchResultTy 5651 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5652 SMLoc Loc, 5653 int64_t &Format) { 5654 using namespace llvm::AMDGPU::MTBUFFormat; 5655 5656 auto Id = getUnifiedFormat(FormatStr); 5657 if (Id == UFMT_UNDEF) 5658 return MatchOperand_NoMatch; 5659 5660 if (!isGFX10Plus()) { 5661 Error(Loc, "unified format is not supported on this GPU"); 5662 return MatchOperand_ParseFail; 5663 } 5664 5665 Format = Id; 5666 return MatchOperand_Success; 5667 } 5668 5669 OperandMatchResultTy 5670 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5671 using namespace llvm::AMDGPU::MTBUFFormat; 5672 SMLoc Loc = getLoc(); 5673 5674 if (!parseExpr(Format)) 5675 return MatchOperand_ParseFail; 5676 if (!isValidFormatEncoding(Format, getSTI())) { 5677 Error(Loc, "out of range format"); 5678 return MatchOperand_ParseFail; 5679 } 5680 5681 return MatchOperand_Success; 5682 } 5683 5684 OperandMatchResultTy 5685 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5686 using namespace llvm::AMDGPU::MTBUFFormat; 5687 5688 if (!trySkipId("format", AsmToken::Colon)) 5689 return MatchOperand_NoMatch; 5690 5691 if (trySkipToken(AsmToken::LBrac)) { 5692 StringRef FormatStr; 5693 SMLoc Loc = getLoc(); 5694 if (!parseId(FormatStr, "expected a format string")) 5695 return MatchOperand_ParseFail; 5696 5697 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5698 if (Res == MatchOperand_NoMatch) 5699 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5700 if (Res != MatchOperand_Success) 5701 return Res; 5702 5703 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5704 return MatchOperand_ParseFail; 5705 5706 return MatchOperand_Success; 5707 } 5708 5709 return parseNumericFormat(Format); 5710 } 5711 5712 OperandMatchResultTy 5713 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5714 using namespace llvm::AMDGPU::MTBUFFormat; 5715 5716 int64_t Format = getDefaultFormatEncoding(getSTI()); 5717 OperandMatchResultTy Res; 5718 SMLoc Loc = getLoc(); 5719 5720 // Parse legacy format syntax. 5721 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5722 if (Res == MatchOperand_ParseFail) 5723 return Res; 5724 5725 bool FormatFound = (Res == MatchOperand_Success); 5726 5727 Operands.push_back( 5728 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5729 5730 if (FormatFound) 5731 trySkipToken(AsmToken::Comma); 5732 5733 if (isToken(AsmToken::EndOfStatement)) { 5734 // We are expecting an soffset operand, 5735 // but let matcher handle the error. 5736 return MatchOperand_Success; 5737 } 5738 5739 // Parse soffset. 5740 Res = parseRegOrImm(Operands); 5741 if (Res != MatchOperand_Success) 5742 return Res; 5743 5744 trySkipToken(AsmToken::Comma); 5745 5746 if (!FormatFound) { 5747 Res = parseSymbolicOrNumericFormat(Format); 5748 if (Res == MatchOperand_ParseFail) 5749 return Res; 5750 if (Res == MatchOperand_Success) { 5751 auto Size = Operands.size(); 5752 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5753 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5754 Op.setImm(Format); 5755 } 5756 return MatchOperand_Success; 5757 } 5758 5759 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5760 Error(getLoc(), "duplicate format"); 5761 return MatchOperand_ParseFail; 5762 } 5763 return MatchOperand_Success; 5764 } 5765 5766 //===----------------------------------------------------------------------===// 5767 // ds 5768 //===----------------------------------------------------------------------===// 5769 5770 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5771 const OperandVector &Operands) { 5772 OptionalImmIndexMap OptionalIdx; 5773 5774 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5775 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5776 5777 // Add the register arguments 5778 if (Op.isReg()) { 5779 Op.addRegOperands(Inst, 1); 5780 continue; 5781 } 5782 5783 // Handle optional arguments 5784 OptionalIdx[Op.getImmTy()] = i; 5785 } 5786 5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5790 5791 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5792 } 5793 5794 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5795 bool IsGdsHardcoded) { 5796 OptionalImmIndexMap OptionalIdx; 5797 5798 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5799 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5800 5801 // Add the register arguments 5802 if (Op.isReg()) { 5803 Op.addRegOperands(Inst, 1); 5804 continue; 5805 } 5806 5807 if (Op.isToken() && Op.getToken() == "gds") { 5808 IsGdsHardcoded = true; 5809 continue; 5810 } 5811 5812 // Handle optional arguments 5813 OptionalIdx[Op.getImmTy()] = i; 5814 } 5815 5816 AMDGPUOperand::ImmTy OffsetType = 5817 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5818 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5819 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5820 AMDGPUOperand::ImmTyOffset; 5821 5822 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5823 5824 if (!IsGdsHardcoded) { 5825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5826 } 5827 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5828 } 5829 5830 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5831 OptionalImmIndexMap OptionalIdx; 5832 5833 unsigned OperandIdx[4]; 5834 unsigned EnMask = 0; 5835 int SrcIdx = 0; 5836 5837 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5838 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5839 5840 // Add the register arguments 5841 if (Op.isReg()) { 5842 assert(SrcIdx < 4); 5843 OperandIdx[SrcIdx] = Inst.size(); 5844 Op.addRegOperands(Inst, 1); 5845 ++SrcIdx; 5846 continue; 5847 } 5848 5849 if (Op.isOff()) { 5850 assert(SrcIdx < 4); 5851 OperandIdx[SrcIdx] = Inst.size(); 5852 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5853 ++SrcIdx; 5854 continue; 5855 } 5856 5857 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5858 Op.addImmOperands(Inst, 1); 5859 continue; 5860 } 5861 5862 if (Op.isToken() && Op.getToken() == "done") 5863 continue; 5864 5865 // Handle optional arguments 5866 OptionalIdx[Op.getImmTy()] = i; 5867 } 5868 5869 assert(SrcIdx == 4); 5870 5871 bool Compr = false; 5872 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5873 Compr = true; 5874 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5875 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5876 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5877 } 5878 5879 for (auto i = 0; i < SrcIdx; ++i) { 5880 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5881 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5882 } 5883 } 5884 5885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5887 5888 Inst.addOperand(MCOperand::createImm(EnMask)); 5889 } 5890 5891 //===----------------------------------------------------------------------===// 5892 // s_waitcnt 5893 //===----------------------------------------------------------------------===// 5894 5895 static bool 5896 encodeCnt( 5897 const AMDGPU::IsaVersion ISA, 5898 int64_t &IntVal, 5899 int64_t CntVal, 5900 bool Saturate, 5901 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5902 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5903 { 5904 bool Failed = false; 5905 5906 IntVal = encode(ISA, IntVal, CntVal); 5907 if (CntVal != decode(ISA, IntVal)) { 5908 if (Saturate) { 5909 IntVal = encode(ISA, IntVal, -1); 5910 } else { 5911 Failed = true; 5912 } 5913 } 5914 return Failed; 5915 } 5916 5917 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5918 5919 SMLoc CntLoc = getLoc(); 5920 StringRef CntName = getTokenStr(); 5921 5922 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5923 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5924 return false; 5925 5926 int64_t CntVal; 5927 SMLoc ValLoc = getLoc(); 5928 if (!parseExpr(CntVal)) 5929 return false; 5930 5931 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5932 5933 bool Failed = true; 5934 bool Sat = CntName.endswith("_sat"); 5935 5936 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5937 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5938 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5939 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5940 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5941 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5942 } else { 5943 Error(CntLoc, "invalid counter name " + CntName); 5944 return false; 5945 } 5946 5947 if (Failed) { 5948 Error(ValLoc, "too large value for " + CntName); 5949 return false; 5950 } 5951 5952 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5953 return false; 5954 5955 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5956 if (isToken(AsmToken::EndOfStatement)) { 5957 Error(getLoc(), "expected a counter name"); 5958 return false; 5959 } 5960 } 5961 5962 return true; 5963 } 5964 5965 OperandMatchResultTy 5966 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5967 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5968 int64_t Waitcnt = getWaitcntBitMask(ISA); 5969 SMLoc S = getLoc(); 5970 5971 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5972 while (!isToken(AsmToken::EndOfStatement)) { 5973 if (!parseCnt(Waitcnt)) 5974 return MatchOperand_ParseFail; 5975 } 5976 } else { 5977 if (!parseExpr(Waitcnt)) 5978 return MatchOperand_ParseFail; 5979 } 5980 5981 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5982 return MatchOperand_Success; 5983 } 5984 5985 bool 5986 AMDGPUOperand::isSWaitCnt() const { 5987 return isImm(); 5988 } 5989 5990 //===----------------------------------------------------------------------===// 5991 // hwreg 5992 //===----------------------------------------------------------------------===// 5993 5994 bool 5995 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5996 OperandInfoTy &Offset, 5997 OperandInfoTy &Width) { 5998 using namespace llvm::AMDGPU::Hwreg; 5999 6000 // The register may be specified by name or using a numeric code 6001 HwReg.Loc = getLoc(); 6002 if (isToken(AsmToken::Identifier) && 6003 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6004 HwReg.IsSymbolic = true; 6005 lex(); // skip register name 6006 } else if (!parseExpr(HwReg.Id, "a register name")) { 6007 return false; 6008 } 6009 6010 if (trySkipToken(AsmToken::RParen)) 6011 return true; 6012 6013 // parse optional params 6014 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6015 return false; 6016 6017 Offset.Loc = getLoc(); 6018 if (!parseExpr(Offset.Id)) 6019 return false; 6020 6021 if (!skipToken(AsmToken::Comma, "expected a comma")) 6022 return false; 6023 6024 Width.Loc = getLoc(); 6025 return parseExpr(Width.Id) && 6026 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6027 } 6028 6029 bool 6030 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6031 const OperandInfoTy &Offset, 6032 const OperandInfoTy &Width) { 6033 6034 using namespace llvm::AMDGPU::Hwreg; 6035 6036 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6037 Error(HwReg.Loc, 6038 "specified hardware register is not supported on this GPU"); 6039 return false; 6040 } 6041 if (!isValidHwreg(HwReg.Id)) { 6042 Error(HwReg.Loc, 6043 "invalid code of hardware register: only 6-bit values are legal"); 6044 return false; 6045 } 6046 if (!isValidHwregOffset(Offset.Id)) { 6047 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6048 return false; 6049 } 6050 if (!isValidHwregWidth(Width.Id)) { 6051 Error(Width.Loc, 6052 "invalid bitfield width: only values from 1 to 32 are legal"); 6053 return false; 6054 } 6055 return true; 6056 } 6057 6058 OperandMatchResultTy 6059 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6060 using namespace llvm::AMDGPU::Hwreg; 6061 6062 int64_t ImmVal = 0; 6063 SMLoc Loc = getLoc(); 6064 6065 if (trySkipId("hwreg", AsmToken::LParen)) { 6066 OperandInfoTy HwReg(ID_UNKNOWN_); 6067 OperandInfoTy Offset(OFFSET_DEFAULT_); 6068 OperandInfoTy Width(WIDTH_DEFAULT_); 6069 if (parseHwregBody(HwReg, Offset, Width) && 6070 validateHwreg(HwReg, Offset, Width)) { 6071 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6072 } else { 6073 return MatchOperand_ParseFail; 6074 } 6075 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6076 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6077 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6078 return MatchOperand_ParseFail; 6079 } 6080 } else { 6081 return MatchOperand_ParseFail; 6082 } 6083 6084 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6085 return MatchOperand_Success; 6086 } 6087 6088 bool AMDGPUOperand::isHwreg() const { 6089 return isImmTy(ImmTyHwreg); 6090 } 6091 6092 //===----------------------------------------------------------------------===// 6093 // sendmsg 6094 //===----------------------------------------------------------------------===// 6095 6096 bool 6097 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6098 OperandInfoTy &Op, 6099 OperandInfoTy &Stream) { 6100 using namespace llvm::AMDGPU::SendMsg; 6101 6102 Msg.Loc = getLoc(); 6103 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6104 Msg.IsSymbolic = true; 6105 lex(); // skip message name 6106 } else if (!parseExpr(Msg.Id, "a message name")) { 6107 return false; 6108 } 6109 6110 if (trySkipToken(AsmToken::Comma)) { 6111 Op.IsDefined = true; 6112 Op.Loc = getLoc(); 6113 if (isToken(AsmToken::Identifier) && 6114 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6115 lex(); // skip operation name 6116 } else if (!parseExpr(Op.Id, "an operation name")) { 6117 return false; 6118 } 6119 6120 if (trySkipToken(AsmToken::Comma)) { 6121 Stream.IsDefined = true; 6122 Stream.Loc = getLoc(); 6123 if (!parseExpr(Stream.Id)) 6124 return false; 6125 } 6126 } 6127 6128 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6129 } 6130 6131 bool 6132 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6133 const OperandInfoTy &Op, 6134 const OperandInfoTy &Stream) { 6135 using namespace llvm::AMDGPU::SendMsg; 6136 6137 // Validation strictness depends on whether message is specified 6138 // in a symbolc or in a numeric form. In the latter case 6139 // only encoding possibility is checked. 6140 bool Strict = Msg.IsSymbolic; 6141 6142 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6143 Error(Msg.Loc, "invalid message id"); 6144 return false; 6145 } 6146 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6147 if (Op.IsDefined) { 6148 Error(Op.Loc, "message does not support operations"); 6149 } else { 6150 Error(Msg.Loc, "missing message operation"); 6151 } 6152 return false; 6153 } 6154 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6155 Error(Op.Loc, "invalid operation id"); 6156 return false; 6157 } 6158 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6159 Error(Stream.Loc, "message operation does not support streams"); 6160 return false; 6161 } 6162 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6163 Error(Stream.Loc, "invalid message stream id"); 6164 return false; 6165 } 6166 return true; 6167 } 6168 6169 OperandMatchResultTy 6170 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6171 using namespace llvm::AMDGPU::SendMsg; 6172 6173 int64_t ImmVal = 0; 6174 SMLoc Loc = getLoc(); 6175 6176 if (trySkipId("sendmsg", AsmToken::LParen)) { 6177 OperandInfoTy Msg(ID_UNKNOWN_); 6178 OperandInfoTy Op(OP_NONE_); 6179 OperandInfoTy Stream(STREAM_ID_NONE_); 6180 if (parseSendMsgBody(Msg, Op, Stream) && 6181 validateSendMsg(Msg, Op, Stream)) { 6182 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6183 } else { 6184 return MatchOperand_ParseFail; 6185 } 6186 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6187 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6188 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6189 return MatchOperand_ParseFail; 6190 } 6191 } else { 6192 return MatchOperand_ParseFail; 6193 } 6194 6195 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6196 return MatchOperand_Success; 6197 } 6198 6199 bool AMDGPUOperand::isSendMsg() const { 6200 return isImmTy(ImmTySendMsg); 6201 } 6202 6203 //===----------------------------------------------------------------------===// 6204 // v_interp 6205 //===----------------------------------------------------------------------===// 6206 6207 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6208 StringRef Str; 6209 SMLoc S = getLoc(); 6210 6211 if (!parseId(Str)) 6212 return MatchOperand_NoMatch; 6213 6214 int Slot = StringSwitch<int>(Str) 6215 .Case("p10", 0) 6216 .Case("p20", 1) 6217 .Case("p0", 2) 6218 .Default(-1); 6219 6220 if (Slot == -1) { 6221 Error(S, "invalid interpolation slot"); 6222 return MatchOperand_ParseFail; 6223 } 6224 6225 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6226 AMDGPUOperand::ImmTyInterpSlot)); 6227 return MatchOperand_Success; 6228 } 6229 6230 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6231 StringRef Str; 6232 SMLoc S = getLoc(); 6233 6234 if (!parseId(Str)) 6235 return MatchOperand_NoMatch; 6236 6237 if (!Str.startswith("attr")) { 6238 Error(S, "invalid interpolation attribute"); 6239 return MatchOperand_ParseFail; 6240 } 6241 6242 StringRef Chan = Str.take_back(2); 6243 int AttrChan = StringSwitch<int>(Chan) 6244 .Case(".x", 0) 6245 .Case(".y", 1) 6246 .Case(".z", 2) 6247 .Case(".w", 3) 6248 .Default(-1); 6249 if (AttrChan == -1) { 6250 Error(S, "invalid or missing interpolation attribute channel"); 6251 return MatchOperand_ParseFail; 6252 } 6253 6254 Str = Str.drop_back(2).drop_front(4); 6255 6256 uint8_t Attr; 6257 if (Str.getAsInteger(10, Attr)) { 6258 Error(S, "invalid or missing interpolation attribute number"); 6259 return MatchOperand_ParseFail; 6260 } 6261 6262 if (Attr > 63) { 6263 Error(S, "out of bounds interpolation attribute number"); 6264 return MatchOperand_ParseFail; 6265 } 6266 6267 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6268 6269 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6270 AMDGPUOperand::ImmTyInterpAttr)); 6271 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6272 AMDGPUOperand::ImmTyAttrChan)); 6273 return MatchOperand_Success; 6274 } 6275 6276 //===----------------------------------------------------------------------===// 6277 // exp 6278 //===----------------------------------------------------------------------===// 6279 6280 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6281 using namespace llvm::AMDGPU::Exp; 6282 6283 StringRef Str; 6284 SMLoc S = getLoc(); 6285 6286 if (!parseId(Str)) 6287 return MatchOperand_NoMatch; 6288 6289 unsigned Id = getTgtId(Str); 6290 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6291 Error(S, (Id == ET_INVALID) ? 6292 "invalid exp target" : 6293 "exp target is not supported on this GPU"); 6294 return MatchOperand_ParseFail; 6295 } 6296 6297 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6298 AMDGPUOperand::ImmTyExpTgt)); 6299 return MatchOperand_Success; 6300 } 6301 6302 //===----------------------------------------------------------------------===// 6303 // parser helpers 6304 //===----------------------------------------------------------------------===// 6305 6306 bool 6307 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6308 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6309 } 6310 6311 bool 6312 AMDGPUAsmParser::isId(const StringRef Id) const { 6313 return isId(getToken(), Id); 6314 } 6315 6316 bool 6317 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6318 return getTokenKind() == Kind; 6319 } 6320 6321 bool 6322 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6323 if (isId(Id)) { 6324 lex(); 6325 return true; 6326 } 6327 return false; 6328 } 6329 6330 bool 6331 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6332 if (isToken(AsmToken::Identifier)) { 6333 StringRef Tok = getTokenStr(); 6334 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6335 lex(); 6336 return true; 6337 } 6338 } 6339 return false; 6340 } 6341 6342 bool 6343 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6344 if (isId(Id) && peekToken().is(Kind)) { 6345 lex(); 6346 lex(); 6347 return true; 6348 } 6349 return false; 6350 } 6351 6352 bool 6353 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6354 if (isToken(Kind)) { 6355 lex(); 6356 return true; 6357 } 6358 return false; 6359 } 6360 6361 bool 6362 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6363 const StringRef ErrMsg) { 6364 if (!trySkipToken(Kind)) { 6365 Error(getLoc(), ErrMsg); 6366 return false; 6367 } 6368 return true; 6369 } 6370 6371 bool 6372 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6373 SMLoc S = getLoc(); 6374 6375 const MCExpr *Expr; 6376 if (Parser.parseExpression(Expr)) 6377 return false; 6378 6379 if (Expr->evaluateAsAbsolute(Imm)) 6380 return true; 6381 6382 if (Expected.empty()) { 6383 Error(S, "expected absolute expression"); 6384 } else { 6385 Error(S, Twine("expected ", Expected) + 6386 Twine(" or an absolute expression")); 6387 } 6388 return false; 6389 } 6390 6391 bool 6392 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6393 SMLoc S = getLoc(); 6394 6395 const MCExpr *Expr; 6396 if (Parser.parseExpression(Expr)) 6397 return false; 6398 6399 int64_t IntVal; 6400 if (Expr->evaluateAsAbsolute(IntVal)) { 6401 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6402 } else { 6403 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6404 } 6405 return true; 6406 } 6407 6408 bool 6409 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6410 if (isToken(AsmToken::String)) { 6411 Val = getToken().getStringContents(); 6412 lex(); 6413 return true; 6414 } else { 6415 Error(getLoc(), ErrMsg); 6416 return false; 6417 } 6418 } 6419 6420 bool 6421 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6422 if (isToken(AsmToken::Identifier)) { 6423 Val = getTokenStr(); 6424 lex(); 6425 return true; 6426 } else { 6427 if (!ErrMsg.empty()) 6428 Error(getLoc(), ErrMsg); 6429 return false; 6430 } 6431 } 6432 6433 AsmToken 6434 AMDGPUAsmParser::getToken() const { 6435 return Parser.getTok(); 6436 } 6437 6438 AsmToken 6439 AMDGPUAsmParser::peekToken() { 6440 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6441 } 6442 6443 void 6444 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6445 auto TokCount = getLexer().peekTokens(Tokens); 6446 6447 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6448 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6449 } 6450 6451 AsmToken::TokenKind 6452 AMDGPUAsmParser::getTokenKind() const { 6453 return getLexer().getKind(); 6454 } 6455 6456 SMLoc 6457 AMDGPUAsmParser::getLoc() const { 6458 return getToken().getLoc(); 6459 } 6460 6461 StringRef 6462 AMDGPUAsmParser::getTokenStr() const { 6463 return getToken().getString(); 6464 } 6465 6466 void 6467 AMDGPUAsmParser::lex() { 6468 Parser.Lex(); 6469 } 6470 6471 SMLoc 6472 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6473 const OperandVector &Operands) const { 6474 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6475 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6476 if (Test(Op)) 6477 return Op.getStartLoc(); 6478 } 6479 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6480 } 6481 6482 SMLoc 6483 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6484 const OperandVector &Operands) const { 6485 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6486 return getOperandLoc(Test, Operands); 6487 } 6488 6489 SMLoc 6490 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6491 const OperandVector &Operands) const { 6492 auto Test = [=](const AMDGPUOperand& Op) { 6493 return Op.isRegKind() && Op.getReg() == Reg; 6494 }; 6495 return getOperandLoc(Test, Operands); 6496 } 6497 6498 SMLoc 6499 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6500 auto Test = [](const AMDGPUOperand& Op) { 6501 return Op.IsImmKindLiteral() || Op.isExpr(); 6502 }; 6503 return getOperandLoc(Test, Operands); 6504 } 6505 6506 SMLoc 6507 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6508 auto Test = [](const AMDGPUOperand& Op) { 6509 return Op.isImmKindConst(); 6510 }; 6511 return getOperandLoc(Test, Operands); 6512 } 6513 6514 //===----------------------------------------------------------------------===// 6515 // swizzle 6516 //===----------------------------------------------------------------------===// 6517 6518 LLVM_READNONE 6519 static unsigned 6520 encodeBitmaskPerm(const unsigned AndMask, 6521 const unsigned OrMask, 6522 const unsigned XorMask) { 6523 using namespace llvm::AMDGPU::Swizzle; 6524 6525 return BITMASK_PERM_ENC | 6526 (AndMask << BITMASK_AND_SHIFT) | 6527 (OrMask << BITMASK_OR_SHIFT) | 6528 (XorMask << BITMASK_XOR_SHIFT); 6529 } 6530 6531 bool 6532 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6533 const unsigned MinVal, 6534 const unsigned MaxVal, 6535 const StringRef ErrMsg, 6536 SMLoc &Loc) { 6537 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6538 return false; 6539 } 6540 Loc = getLoc(); 6541 if (!parseExpr(Op)) { 6542 return false; 6543 } 6544 if (Op < MinVal || Op > MaxVal) { 6545 Error(Loc, ErrMsg); 6546 return false; 6547 } 6548 6549 return true; 6550 } 6551 6552 bool 6553 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6554 const unsigned MinVal, 6555 const unsigned MaxVal, 6556 const StringRef ErrMsg) { 6557 SMLoc Loc; 6558 for (unsigned i = 0; i < OpNum; ++i) { 6559 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6560 return false; 6561 } 6562 6563 return true; 6564 } 6565 6566 bool 6567 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6568 using namespace llvm::AMDGPU::Swizzle; 6569 6570 int64_t Lane[LANE_NUM]; 6571 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6572 "expected a 2-bit lane id")) { 6573 Imm = QUAD_PERM_ENC; 6574 for (unsigned I = 0; I < LANE_NUM; ++I) { 6575 Imm |= Lane[I] << (LANE_SHIFT * I); 6576 } 6577 return true; 6578 } 6579 return false; 6580 } 6581 6582 bool 6583 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6584 using namespace llvm::AMDGPU::Swizzle; 6585 6586 SMLoc Loc; 6587 int64_t GroupSize; 6588 int64_t LaneIdx; 6589 6590 if (!parseSwizzleOperand(GroupSize, 6591 2, 32, 6592 "group size must be in the interval [2,32]", 6593 Loc)) { 6594 return false; 6595 } 6596 if (!isPowerOf2_64(GroupSize)) { 6597 Error(Loc, "group size must be a power of two"); 6598 return false; 6599 } 6600 if (parseSwizzleOperand(LaneIdx, 6601 0, GroupSize - 1, 6602 "lane id must be in the interval [0,group size - 1]", 6603 Loc)) { 6604 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6605 return true; 6606 } 6607 return false; 6608 } 6609 6610 bool 6611 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6612 using namespace llvm::AMDGPU::Swizzle; 6613 6614 SMLoc Loc; 6615 int64_t GroupSize; 6616 6617 if (!parseSwizzleOperand(GroupSize, 6618 2, 32, 6619 "group size must be in the interval [2,32]", 6620 Loc)) { 6621 return false; 6622 } 6623 if (!isPowerOf2_64(GroupSize)) { 6624 Error(Loc, "group size must be a power of two"); 6625 return false; 6626 } 6627 6628 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6629 return true; 6630 } 6631 6632 bool 6633 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6634 using namespace llvm::AMDGPU::Swizzle; 6635 6636 SMLoc Loc; 6637 int64_t GroupSize; 6638 6639 if (!parseSwizzleOperand(GroupSize, 6640 1, 16, 6641 "group size must be in the interval [1,16]", 6642 Loc)) { 6643 return false; 6644 } 6645 if (!isPowerOf2_64(GroupSize)) { 6646 Error(Loc, "group size must be a power of two"); 6647 return false; 6648 } 6649 6650 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6651 return true; 6652 } 6653 6654 bool 6655 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6656 using namespace llvm::AMDGPU::Swizzle; 6657 6658 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6659 return false; 6660 } 6661 6662 StringRef Ctl; 6663 SMLoc StrLoc = getLoc(); 6664 if (!parseString(Ctl)) { 6665 return false; 6666 } 6667 if (Ctl.size() != BITMASK_WIDTH) { 6668 Error(StrLoc, "expected a 5-character mask"); 6669 return false; 6670 } 6671 6672 unsigned AndMask = 0; 6673 unsigned OrMask = 0; 6674 unsigned XorMask = 0; 6675 6676 for (size_t i = 0; i < Ctl.size(); ++i) { 6677 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6678 switch(Ctl[i]) { 6679 default: 6680 Error(StrLoc, "invalid mask"); 6681 return false; 6682 case '0': 6683 break; 6684 case '1': 6685 OrMask |= Mask; 6686 break; 6687 case 'p': 6688 AndMask |= Mask; 6689 break; 6690 case 'i': 6691 AndMask |= Mask; 6692 XorMask |= Mask; 6693 break; 6694 } 6695 } 6696 6697 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6698 return true; 6699 } 6700 6701 bool 6702 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6703 6704 SMLoc OffsetLoc = getLoc(); 6705 6706 if (!parseExpr(Imm, "a swizzle macro")) { 6707 return false; 6708 } 6709 if (!isUInt<16>(Imm)) { 6710 Error(OffsetLoc, "expected a 16-bit offset"); 6711 return false; 6712 } 6713 return true; 6714 } 6715 6716 bool 6717 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6718 using namespace llvm::AMDGPU::Swizzle; 6719 6720 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6721 6722 SMLoc ModeLoc = getLoc(); 6723 bool Ok = false; 6724 6725 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6726 Ok = parseSwizzleQuadPerm(Imm); 6727 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6728 Ok = parseSwizzleBitmaskPerm(Imm); 6729 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6730 Ok = parseSwizzleBroadcast(Imm); 6731 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6732 Ok = parseSwizzleSwap(Imm); 6733 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6734 Ok = parseSwizzleReverse(Imm); 6735 } else { 6736 Error(ModeLoc, "expected a swizzle mode"); 6737 } 6738 6739 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6740 } 6741 6742 return false; 6743 } 6744 6745 OperandMatchResultTy 6746 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6747 SMLoc S = getLoc(); 6748 int64_t Imm = 0; 6749 6750 if (trySkipId("offset")) { 6751 6752 bool Ok = false; 6753 if (skipToken(AsmToken::Colon, "expected a colon")) { 6754 if (trySkipId("swizzle")) { 6755 Ok = parseSwizzleMacro(Imm); 6756 } else { 6757 Ok = parseSwizzleOffset(Imm); 6758 } 6759 } 6760 6761 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6762 6763 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6764 } else { 6765 // Swizzle "offset" operand is optional. 6766 // If it is omitted, try parsing other optional operands. 6767 return parseOptionalOpr(Operands); 6768 } 6769 } 6770 6771 bool 6772 AMDGPUOperand::isSwizzle() const { 6773 return isImmTy(ImmTySwizzle); 6774 } 6775 6776 //===----------------------------------------------------------------------===// 6777 // VGPR Index Mode 6778 //===----------------------------------------------------------------------===// 6779 6780 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6781 6782 using namespace llvm::AMDGPU::VGPRIndexMode; 6783 6784 if (trySkipToken(AsmToken::RParen)) { 6785 return OFF; 6786 } 6787 6788 int64_t Imm = 0; 6789 6790 while (true) { 6791 unsigned Mode = 0; 6792 SMLoc S = getLoc(); 6793 6794 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6795 if (trySkipId(IdSymbolic[ModeId])) { 6796 Mode = 1 << ModeId; 6797 break; 6798 } 6799 } 6800 6801 if (Mode == 0) { 6802 Error(S, (Imm == 0)? 6803 "expected a VGPR index mode or a closing parenthesis" : 6804 "expected a VGPR index mode"); 6805 return UNDEF; 6806 } 6807 6808 if (Imm & Mode) { 6809 Error(S, "duplicate VGPR index mode"); 6810 return UNDEF; 6811 } 6812 Imm |= Mode; 6813 6814 if (trySkipToken(AsmToken::RParen)) 6815 break; 6816 if (!skipToken(AsmToken::Comma, 6817 "expected a comma or a closing parenthesis")) 6818 return UNDEF; 6819 } 6820 6821 return Imm; 6822 } 6823 6824 OperandMatchResultTy 6825 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6826 6827 using namespace llvm::AMDGPU::VGPRIndexMode; 6828 6829 int64_t Imm = 0; 6830 SMLoc S = getLoc(); 6831 6832 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6833 Imm = parseGPRIdxMacro(); 6834 if (Imm == UNDEF) 6835 return MatchOperand_ParseFail; 6836 } else { 6837 if (getParser().parseAbsoluteExpression(Imm)) 6838 return MatchOperand_ParseFail; 6839 if (Imm < 0 || !isUInt<4>(Imm)) { 6840 Error(S, "invalid immediate: only 4-bit values are legal"); 6841 return MatchOperand_ParseFail; 6842 } 6843 } 6844 6845 Operands.push_back( 6846 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6847 return MatchOperand_Success; 6848 } 6849 6850 bool AMDGPUOperand::isGPRIdxMode() const { 6851 return isImmTy(ImmTyGprIdxMode); 6852 } 6853 6854 //===----------------------------------------------------------------------===// 6855 // sopp branch targets 6856 //===----------------------------------------------------------------------===// 6857 6858 OperandMatchResultTy 6859 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6860 6861 // Make sure we are not parsing something 6862 // that looks like a label or an expression but is not. 6863 // This will improve error messages. 6864 if (isRegister() || isModifier()) 6865 return MatchOperand_NoMatch; 6866 6867 if (!parseExpr(Operands)) 6868 return MatchOperand_ParseFail; 6869 6870 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6871 assert(Opr.isImm() || Opr.isExpr()); 6872 SMLoc Loc = Opr.getStartLoc(); 6873 6874 // Currently we do not support arbitrary expressions as branch targets. 6875 // Only labels and absolute expressions are accepted. 6876 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6877 Error(Loc, "expected an absolute expression or a label"); 6878 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6879 Error(Loc, "expected a 16-bit signed jump offset"); 6880 } 6881 6882 return MatchOperand_Success; 6883 } 6884 6885 //===----------------------------------------------------------------------===// 6886 // Boolean holding registers 6887 //===----------------------------------------------------------------------===// 6888 6889 OperandMatchResultTy 6890 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6891 return parseReg(Operands); 6892 } 6893 6894 //===----------------------------------------------------------------------===// 6895 // mubuf 6896 //===----------------------------------------------------------------------===// 6897 6898 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6899 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6900 } 6901 6902 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6903 const OperandVector &Operands, 6904 bool IsAtomic, 6905 bool IsLds) { 6906 bool IsLdsOpcode = IsLds; 6907 bool HasLdsModifier = false; 6908 OptionalImmIndexMap OptionalIdx; 6909 unsigned FirstOperandIdx = 1; 6910 bool IsAtomicReturn = false; 6911 6912 if (IsAtomic) { 6913 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6914 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6915 if (!Op.isCPol()) 6916 continue; 6917 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6918 break; 6919 } 6920 6921 if (!IsAtomicReturn) { 6922 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6923 if (NewOpc != -1) 6924 Inst.setOpcode(NewOpc); 6925 } 6926 6927 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6928 SIInstrFlags::IsAtomicRet; 6929 } 6930 6931 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6932 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6933 6934 // Add the register arguments 6935 if (Op.isReg()) { 6936 Op.addRegOperands(Inst, 1); 6937 // Insert a tied src for atomic return dst. 6938 // This cannot be postponed as subsequent calls to 6939 // addImmOperands rely on correct number of MC operands. 6940 if (IsAtomicReturn && i == FirstOperandIdx) 6941 Op.addRegOperands(Inst, 1); 6942 continue; 6943 } 6944 6945 // Handle the case where soffset is an immediate 6946 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6947 Op.addImmOperands(Inst, 1); 6948 continue; 6949 } 6950 6951 HasLdsModifier |= Op.isLDS(); 6952 6953 // Handle tokens like 'offen' which are sometimes hard-coded into the 6954 // asm string. There are no MCInst operands for these. 6955 if (Op.isToken()) { 6956 continue; 6957 } 6958 assert(Op.isImm()); 6959 6960 // Handle optional arguments 6961 OptionalIdx[Op.getImmTy()] = i; 6962 } 6963 6964 // This is a workaround for an llvm quirk which may result in an 6965 // incorrect instruction selection. Lds and non-lds versions of 6966 // MUBUF instructions are identical except that lds versions 6967 // have mandatory 'lds' modifier. However this modifier follows 6968 // optional modifiers and llvm asm matcher regards this 'lds' 6969 // modifier as an optional one. As a result, an lds version 6970 // of opcode may be selected even if it has no 'lds' modifier. 6971 if (IsLdsOpcode && !HasLdsModifier) { 6972 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6973 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6974 Inst.setOpcode(NoLdsOpcode); 6975 IsLdsOpcode = false; 6976 } 6977 } 6978 6979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6981 6982 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6984 } 6985 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6986 } 6987 6988 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6989 OptionalImmIndexMap OptionalIdx; 6990 6991 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6992 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6993 6994 // Add the register arguments 6995 if (Op.isReg()) { 6996 Op.addRegOperands(Inst, 1); 6997 continue; 6998 } 6999 7000 // Handle the case where soffset is an immediate 7001 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7002 Op.addImmOperands(Inst, 1); 7003 continue; 7004 } 7005 7006 // Handle tokens like 'offen' which are sometimes hard-coded into the 7007 // asm string. There are no MCInst operands for these. 7008 if (Op.isToken()) { 7009 continue; 7010 } 7011 assert(Op.isImm()); 7012 7013 // Handle optional arguments 7014 OptionalIdx[Op.getImmTy()] = i; 7015 } 7016 7017 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7018 AMDGPUOperand::ImmTyOffset); 7019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7020 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7021 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7022 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7023 } 7024 7025 //===----------------------------------------------------------------------===// 7026 // mimg 7027 //===----------------------------------------------------------------------===// 7028 7029 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7030 bool IsAtomic) { 7031 unsigned I = 1; 7032 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7033 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7034 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7035 } 7036 7037 if (IsAtomic) { 7038 // Add src, same as dst 7039 assert(Desc.getNumDefs() == 1); 7040 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7041 } 7042 7043 OptionalImmIndexMap OptionalIdx; 7044 7045 for (unsigned E = Operands.size(); I != E; ++I) { 7046 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7047 7048 // Add the register arguments 7049 if (Op.isReg()) { 7050 Op.addRegOperands(Inst, 1); 7051 } else if (Op.isImmModifier()) { 7052 OptionalIdx[Op.getImmTy()] = I; 7053 } else if (!Op.isToken()) { 7054 llvm_unreachable("unexpected operand type"); 7055 } 7056 } 7057 7058 bool IsGFX10Plus = isGFX10Plus(); 7059 7060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7061 if (IsGFX10Plus) 7062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7066 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7068 if (IsGFX10Plus) 7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7071 if (!IsGFX10Plus) 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7074 } 7075 7076 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7077 cvtMIMG(Inst, Operands, true); 7078 } 7079 7080 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7081 OptionalImmIndexMap OptionalIdx; 7082 bool IsAtomicReturn = false; 7083 7084 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7085 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7086 if (!Op.isCPol()) 7087 continue; 7088 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7089 break; 7090 } 7091 7092 if (!IsAtomicReturn) { 7093 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7094 if (NewOpc != -1) 7095 Inst.setOpcode(NewOpc); 7096 } 7097 7098 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7099 SIInstrFlags::IsAtomicRet; 7100 7101 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7102 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7103 7104 // Add the register arguments 7105 if (Op.isReg()) { 7106 Op.addRegOperands(Inst, 1); 7107 if (IsAtomicReturn && i == 1) 7108 Op.addRegOperands(Inst, 1); 7109 continue; 7110 } 7111 7112 // Handle the case where soffset is an immediate 7113 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7114 Op.addImmOperands(Inst, 1); 7115 continue; 7116 } 7117 7118 // Handle tokens like 'offen' which are sometimes hard-coded into the 7119 // asm string. There are no MCInst operands for these. 7120 if (Op.isToken()) { 7121 continue; 7122 } 7123 assert(Op.isImm()); 7124 7125 // Handle optional arguments 7126 OptionalIdx[Op.getImmTy()] = i; 7127 } 7128 7129 if ((int)Inst.getNumOperands() <= 7130 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7133 } 7134 7135 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7136 const OperandVector &Operands) { 7137 for (unsigned I = 1; I < Operands.size(); ++I) { 7138 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7139 if (Operand.isReg()) 7140 Operand.addRegOperands(Inst, 1); 7141 } 7142 7143 Inst.addOperand(MCOperand::createImm(1)); // a16 7144 } 7145 7146 //===----------------------------------------------------------------------===// 7147 // smrd 7148 //===----------------------------------------------------------------------===// 7149 7150 bool AMDGPUOperand::isSMRDOffset8() const { 7151 return isImm() && isUInt<8>(getImm()); 7152 } 7153 7154 bool AMDGPUOperand::isSMEMOffset() const { 7155 return isImm(); // Offset range is checked later by validator. 7156 } 7157 7158 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7159 // 32-bit literals are only supported on CI and we only want to use them 7160 // when the offset is > 8-bits. 7161 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7162 } 7163 7164 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7165 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7166 } 7167 7168 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7169 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7170 } 7171 7172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7173 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7174 } 7175 7176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7177 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7178 } 7179 7180 //===----------------------------------------------------------------------===// 7181 // vop3 7182 //===----------------------------------------------------------------------===// 7183 7184 static bool ConvertOmodMul(int64_t &Mul) { 7185 if (Mul != 1 && Mul != 2 && Mul != 4) 7186 return false; 7187 7188 Mul >>= 1; 7189 return true; 7190 } 7191 7192 static bool ConvertOmodDiv(int64_t &Div) { 7193 if (Div == 1) { 7194 Div = 0; 7195 return true; 7196 } 7197 7198 if (Div == 2) { 7199 Div = 3; 7200 return true; 7201 } 7202 7203 return false; 7204 } 7205 7206 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7207 // This is intentional and ensures compatibility with sp3. 7208 // See bug 35397 for details. 7209 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7210 if (BoundCtrl == 0 || BoundCtrl == 1) { 7211 BoundCtrl = 1; 7212 return true; 7213 } 7214 return false; 7215 } 7216 7217 // Note: the order in this table matches the order of operands in AsmString. 7218 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7219 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7220 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7221 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7222 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7223 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7224 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7225 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7226 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7227 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7228 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7229 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7230 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7231 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7232 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7233 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7234 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7235 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7236 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7237 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7238 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7239 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7240 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7241 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7242 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7243 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7244 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7245 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7246 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7247 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7248 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7249 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7250 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7251 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7252 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7253 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7254 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7255 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7256 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7257 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7258 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7259 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7260 }; 7261 7262 void AMDGPUAsmParser::onBeginOfFile() { 7263 if (!getParser().getStreamer().getTargetStreamer() || 7264 getSTI().getTargetTriple().getArch() == Triple::r600) 7265 return; 7266 7267 if (!getTargetStreamer().getTargetID()) 7268 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7269 7270 if (isHsaAbiVersion3Or4(&getSTI())) 7271 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7272 } 7273 7274 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7275 7276 OperandMatchResultTy res = parseOptionalOpr(Operands); 7277 7278 // This is a hack to enable hardcoded mandatory operands which follow 7279 // optional operands. 7280 // 7281 // Current design assumes that all operands after the first optional operand 7282 // are also optional. However implementation of some instructions violates 7283 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7284 // 7285 // To alleviate this problem, we have to (implicitly) parse extra operands 7286 // to make sure autogenerated parser of custom operands never hit hardcoded 7287 // mandatory operands. 7288 7289 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7290 if (res != MatchOperand_Success || 7291 isToken(AsmToken::EndOfStatement)) 7292 break; 7293 7294 trySkipToken(AsmToken::Comma); 7295 res = parseOptionalOpr(Operands); 7296 } 7297 7298 return res; 7299 } 7300 7301 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7302 OperandMatchResultTy res; 7303 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7304 // try to parse any optional operand here 7305 if (Op.IsBit) { 7306 res = parseNamedBit(Op.Name, Operands, Op.Type); 7307 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7308 res = parseOModOperand(Operands); 7309 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7310 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7311 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7312 res = parseSDWASel(Operands, Op.Name, Op.Type); 7313 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7314 res = parseSDWADstUnused(Operands); 7315 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7316 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7317 Op.Type == AMDGPUOperand::ImmTyNegLo || 7318 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7319 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7320 Op.ConvertResult); 7321 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7322 res = parseDim(Operands); 7323 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7324 res = parseCPol(Operands); 7325 } else { 7326 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7327 } 7328 if (res != MatchOperand_NoMatch) { 7329 return res; 7330 } 7331 } 7332 return MatchOperand_NoMatch; 7333 } 7334 7335 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7336 StringRef Name = getTokenStr(); 7337 if (Name == "mul") { 7338 return parseIntWithPrefix("mul", Operands, 7339 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7340 } 7341 7342 if (Name == "div") { 7343 return parseIntWithPrefix("div", Operands, 7344 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7345 } 7346 7347 return MatchOperand_NoMatch; 7348 } 7349 7350 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7351 cvtVOP3P(Inst, Operands); 7352 7353 int Opc = Inst.getOpcode(); 7354 7355 int SrcNum; 7356 const int Ops[] = { AMDGPU::OpName::src0, 7357 AMDGPU::OpName::src1, 7358 AMDGPU::OpName::src2 }; 7359 for (SrcNum = 0; 7360 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7361 ++SrcNum); 7362 assert(SrcNum > 0); 7363 7364 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7365 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7366 7367 if ((OpSel & (1 << SrcNum)) != 0) { 7368 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7369 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7370 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7371 } 7372 } 7373 7374 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7375 // 1. This operand is input modifiers 7376 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7377 // 2. This is not last operand 7378 && Desc.NumOperands > (OpNum + 1) 7379 // 3. Next operand is register class 7380 && Desc.OpInfo[OpNum + 1].RegClass != -1 7381 // 4. Next register is not tied to any other operand 7382 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7383 } 7384 7385 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7386 { 7387 OptionalImmIndexMap OptionalIdx; 7388 unsigned Opc = Inst.getOpcode(); 7389 7390 unsigned I = 1; 7391 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7392 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7393 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7394 } 7395 7396 for (unsigned E = Operands.size(); I != E; ++I) { 7397 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7398 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7399 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7400 } else if (Op.isInterpSlot() || 7401 Op.isInterpAttr() || 7402 Op.isAttrChan()) { 7403 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7404 } else if (Op.isImmModifier()) { 7405 OptionalIdx[Op.getImmTy()] = I; 7406 } else { 7407 llvm_unreachable("unhandled operand type"); 7408 } 7409 } 7410 7411 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7413 } 7414 7415 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7417 } 7418 7419 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7421 } 7422 } 7423 7424 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7425 OptionalImmIndexMap &OptionalIdx) { 7426 unsigned Opc = Inst.getOpcode(); 7427 7428 unsigned I = 1; 7429 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7430 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7431 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7432 } 7433 7434 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7435 // This instruction has src modifiers 7436 for (unsigned E = Operands.size(); I != E; ++I) { 7437 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7438 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7439 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7440 } else if (Op.isImmModifier()) { 7441 OptionalIdx[Op.getImmTy()] = I; 7442 } else if (Op.isRegOrImm()) { 7443 Op.addRegOrImmOperands(Inst, 1); 7444 } else { 7445 llvm_unreachable("unhandled operand type"); 7446 } 7447 } 7448 } else { 7449 // No src modifiers 7450 for (unsigned E = Operands.size(); I != E; ++I) { 7451 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7452 if (Op.isMod()) { 7453 OptionalIdx[Op.getImmTy()] = I; 7454 } else { 7455 Op.addRegOrImmOperands(Inst, 1); 7456 } 7457 } 7458 } 7459 7460 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7462 } 7463 7464 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7466 } 7467 7468 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7469 // it has src2 register operand that is tied to dst operand 7470 // we don't allow modifiers for this operand in assembler so src2_modifiers 7471 // should be 0. 7472 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7473 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7474 Opc == AMDGPU::V_MAC_F32_e64_vi || 7475 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7476 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7477 Opc == AMDGPU::V_MAC_F16_e64_vi || 7478 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7479 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7480 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7481 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7482 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7483 auto it = Inst.begin(); 7484 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7485 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7486 ++it; 7487 // Copy the operand to ensure it's not invalidated when Inst grows. 7488 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7489 } 7490 } 7491 7492 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7493 OptionalImmIndexMap OptionalIdx; 7494 cvtVOP3(Inst, Operands, OptionalIdx); 7495 } 7496 7497 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7498 OptionalImmIndexMap &OptIdx) { 7499 const int Opc = Inst.getOpcode(); 7500 const MCInstrDesc &Desc = MII.get(Opc); 7501 7502 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7503 7504 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7505 assert(!IsPacked); 7506 Inst.addOperand(Inst.getOperand(0)); 7507 } 7508 7509 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7510 // instruction, and then figure out where to actually put the modifiers 7511 7512 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7513 if (OpSelIdx != -1) { 7514 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7515 } 7516 7517 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7518 if (OpSelHiIdx != -1) { 7519 int DefaultVal = IsPacked ? -1 : 0; 7520 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7521 DefaultVal); 7522 } 7523 7524 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7525 if (NegLoIdx != -1) { 7526 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7527 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7528 } 7529 7530 const int Ops[] = { AMDGPU::OpName::src0, 7531 AMDGPU::OpName::src1, 7532 AMDGPU::OpName::src2 }; 7533 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7534 AMDGPU::OpName::src1_modifiers, 7535 AMDGPU::OpName::src2_modifiers }; 7536 7537 unsigned OpSel = 0; 7538 unsigned OpSelHi = 0; 7539 unsigned NegLo = 0; 7540 unsigned NegHi = 0; 7541 7542 if (OpSelIdx != -1) 7543 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7544 7545 if (OpSelHiIdx != -1) 7546 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7547 7548 if (NegLoIdx != -1) { 7549 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7550 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7551 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7552 } 7553 7554 for (int J = 0; J < 3; ++J) { 7555 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7556 if (OpIdx == -1) 7557 break; 7558 7559 uint32_t ModVal = 0; 7560 7561 if ((OpSel & (1 << J)) != 0) 7562 ModVal |= SISrcMods::OP_SEL_0; 7563 7564 if ((OpSelHi & (1 << J)) != 0) 7565 ModVal |= SISrcMods::OP_SEL_1; 7566 7567 if ((NegLo & (1 << J)) != 0) 7568 ModVal |= SISrcMods::NEG; 7569 7570 if ((NegHi & (1 << J)) != 0) 7571 ModVal |= SISrcMods::NEG_HI; 7572 7573 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7574 7575 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7576 } 7577 } 7578 7579 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7580 OptionalImmIndexMap OptIdx; 7581 cvtVOP3(Inst, Operands, OptIdx); 7582 cvtVOP3P(Inst, Operands, OptIdx); 7583 } 7584 7585 //===----------------------------------------------------------------------===// 7586 // dpp 7587 //===----------------------------------------------------------------------===// 7588 7589 bool AMDGPUOperand::isDPP8() const { 7590 return isImmTy(ImmTyDPP8); 7591 } 7592 7593 bool AMDGPUOperand::isDPPCtrl() const { 7594 using namespace AMDGPU::DPP; 7595 7596 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7597 if (result) { 7598 int64_t Imm = getImm(); 7599 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7600 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7601 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7602 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7603 (Imm == DppCtrl::WAVE_SHL1) || 7604 (Imm == DppCtrl::WAVE_ROL1) || 7605 (Imm == DppCtrl::WAVE_SHR1) || 7606 (Imm == DppCtrl::WAVE_ROR1) || 7607 (Imm == DppCtrl::ROW_MIRROR) || 7608 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7609 (Imm == DppCtrl::BCAST15) || 7610 (Imm == DppCtrl::BCAST31) || 7611 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7612 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7613 } 7614 return false; 7615 } 7616 7617 //===----------------------------------------------------------------------===// 7618 // mAI 7619 //===----------------------------------------------------------------------===// 7620 7621 bool AMDGPUOperand::isBLGP() const { 7622 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7623 } 7624 7625 bool AMDGPUOperand::isCBSZ() const { 7626 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7627 } 7628 7629 bool AMDGPUOperand::isABID() const { 7630 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7631 } 7632 7633 bool AMDGPUOperand::isS16Imm() const { 7634 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7635 } 7636 7637 bool AMDGPUOperand::isU16Imm() const { 7638 return isImm() && isUInt<16>(getImm()); 7639 } 7640 7641 //===----------------------------------------------------------------------===// 7642 // dim 7643 //===----------------------------------------------------------------------===// 7644 7645 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7646 // We want to allow "dim:1D" etc., 7647 // but the initial 1 is tokenized as an integer. 7648 std::string Token; 7649 if (isToken(AsmToken::Integer)) { 7650 SMLoc Loc = getToken().getEndLoc(); 7651 Token = std::string(getTokenStr()); 7652 lex(); 7653 if (getLoc() != Loc) 7654 return false; 7655 } 7656 7657 StringRef Suffix; 7658 if (!parseId(Suffix)) 7659 return false; 7660 Token += Suffix; 7661 7662 StringRef DimId = Token; 7663 if (DimId.startswith("SQ_RSRC_IMG_")) 7664 DimId = DimId.drop_front(12); 7665 7666 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7667 if (!DimInfo) 7668 return false; 7669 7670 Encoding = DimInfo->Encoding; 7671 return true; 7672 } 7673 7674 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7675 if (!isGFX10Plus()) 7676 return MatchOperand_NoMatch; 7677 7678 SMLoc S = getLoc(); 7679 7680 if (!trySkipId("dim", AsmToken::Colon)) 7681 return MatchOperand_NoMatch; 7682 7683 unsigned Encoding; 7684 SMLoc Loc = getLoc(); 7685 if (!parseDimId(Encoding)) { 7686 Error(Loc, "invalid dim value"); 7687 return MatchOperand_ParseFail; 7688 } 7689 7690 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7691 AMDGPUOperand::ImmTyDim)); 7692 return MatchOperand_Success; 7693 } 7694 7695 //===----------------------------------------------------------------------===// 7696 // dpp 7697 //===----------------------------------------------------------------------===// 7698 7699 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7700 SMLoc S = getLoc(); 7701 7702 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7703 return MatchOperand_NoMatch; 7704 7705 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7706 7707 int64_t Sels[8]; 7708 7709 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7710 return MatchOperand_ParseFail; 7711 7712 for (size_t i = 0; i < 8; ++i) { 7713 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7714 return MatchOperand_ParseFail; 7715 7716 SMLoc Loc = getLoc(); 7717 if (getParser().parseAbsoluteExpression(Sels[i])) 7718 return MatchOperand_ParseFail; 7719 if (0 > Sels[i] || 7 < Sels[i]) { 7720 Error(Loc, "expected a 3-bit value"); 7721 return MatchOperand_ParseFail; 7722 } 7723 } 7724 7725 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7726 return MatchOperand_ParseFail; 7727 7728 unsigned DPP8 = 0; 7729 for (size_t i = 0; i < 8; ++i) 7730 DPP8 |= (Sels[i] << (i * 3)); 7731 7732 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7733 return MatchOperand_Success; 7734 } 7735 7736 bool 7737 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7738 const OperandVector &Operands) { 7739 if (Ctrl == "row_newbcast") 7740 return isGFX90A(); 7741 7742 // DPP64 is supported for row_newbcast only. 7743 const MCRegisterInfo *MRI = getMRI(); 7744 if (Operands.size() > 2 && Operands[1]->isReg() && 7745 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7746 return false; 7747 7748 if (Ctrl == "row_share" || 7749 Ctrl == "row_xmask") 7750 return isGFX10Plus(); 7751 7752 if (Ctrl == "wave_shl" || 7753 Ctrl == "wave_shr" || 7754 Ctrl == "wave_rol" || 7755 Ctrl == "wave_ror" || 7756 Ctrl == "row_bcast") 7757 return isVI() || isGFX9(); 7758 7759 return Ctrl == "row_mirror" || 7760 Ctrl == "row_half_mirror" || 7761 Ctrl == "quad_perm" || 7762 Ctrl == "row_shl" || 7763 Ctrl == "row_shr" || 7764 Ctrl == "row_ror"; 7765 } 7766 7767 int64_t 7768 AMDGPUAsmParser::parseDPPCtrlPerm() { 7769 // quad_perm:[%d,%d,%d,%d] 7770 7771 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7772 return -1; 7773 7774 int64_t Val = 0; 7775 for (int i = 0; i < 4; ++i) { 7776 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7777 return -1; 7778 7779 int64_t Temp; 7780 SMLoc Loc = getLoc(); 7781 if (getParser().parseAbsoluteExpression(Temp)) 7782 return -1; 7783 if (Temp < 0 || Temp > 3) { 7784 Error(Loc, "expected a 2-bit value"); 7785 return -1; 7786 } 7787 7788 Val += (Temp << i * 2); 7789 } 7790 7791 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7792 return -1; 7793 7794 return Val; 7795 } 7796 7797 int64_t 7798 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7799 using namespace AMDGPU::DPP; 7800 7801 // sel:%d 7802 7803 int64_t Val; 7804 SMLoc Loc = getLoc(); 7805 7806 if (getParser().parseAbsoluteExpression(Val)) 7807 return -1; 7808 7809 struct DppCtrlCheck { 7810 int64_t Ctrl; 7811 int Lo; 7812 int Hi; 7813 }; 7814 7815 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7816 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7817 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7818 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7819 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7820 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7821 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7822 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7823 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7824 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7825 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7826 .Default({-1, 0, 0}); 7827 7828 bool Valid; 7829 if (Check.Ctrl == -1) { 7830 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7831 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7832 } else { 7833 Valid = Check.Lo <= Val && Val <= Check.Hi; 7834 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7835 } 7836 7837 if (!Valid) { 7838 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7839 return -1; 7840 } 7841 7842 return Val; 7843 } 7844 7845 OperandMatchResultTy 7846 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7847 using namespace AMDGPU::DPP; 7848 7849 if (!isToken(AsmToken::Identifier) || 7850 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7851 return MatchOperand_NoMatch; 7852 7853 SMLoc S = getLoc(); 7854 int64_t Val = -1; 7855 StringRef Ctrl; 7856 7857 parseId(Ctrl); 7858 7859 if (Ctrl == "row_mirror") { 7860 Val = DppCtrl::ROW_MIRROR; 7861 } else if (Ctrl == "row_half_mirror") { 7862 Val = DppCtrl::ROW_HALF_MIRROR; 7863 } else { 7864 if (skipToken(AsmToken::Colon, "expected a colon")) { 7865 if (Ctrl == "quad_perm") { 7866 Val = parseDPPCtrlPerm(); 7867 } else { 7868 Val = parseDPPCtrlSel(Ctrl); 7869 } 7870 } 7871 } 7872 7873 if (Val == -1) 7874 return MatchOperand_ParseFail; 7875 7876 Operands.push_back( 7877 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7878 return MatchOperand_Success; 7879 } 7880 7881 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7882 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7883 } 7884 7885 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7886 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7887 } 7888 7889 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7890 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7891 } 7892 7893 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7894 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7895 } 7896 7897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7898 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7899 } 7900 7901 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7902 OptionalImmIndexMap OptionalIdx; 7903 7904 unsigned I = 1; 7905 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7906 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7907 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7908 } 7909 7910 int Fi = 0; 7911 for (unsigned E = Operands.size(); I != E; ++I) { 7912 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7913 MCOI::TIED_TO); 7914 if (TiedTo != -1) { 7915 assert((unsigned)TiedTo < Inst.getNumOperands()); 7916 // handle tied old or src2 for MAC instructions 7917 Inst.addOperand(Inst.getOperand(TiedTo)); 7918 } 7919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7920 // Add the register arguments 7921 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7922 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7923 // Skip it. 7924 continue; 7925 } 7926 7927 if (IsDPP8) { 7928 if (Op.isDPP8()) { 7929 Op.addImmOperands(Inst, 1); 7930 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7931 Op.addRegWithFPInputModsOperands(Inst, 2); 7932 } else if (Op.isFI()) { 7933 Fi = Op.getImm(); 7934 } else if (Op.isReg()) { 7935 Op.addRegOperands(Inst, 1); 7936 } else { 7937 llvm_unreachable("Invalid operand type"); 7938 } 7939 } else { 7940 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7941 Op.addRegWithFPInputModsOperands(Inst, 2); 7942 } else if (Op.isDPPCtrl()) { 7943 Op.addImmOperands(Inst, 1); 7944 } else if (Op.isImm()) { 7945 // Handle optional arguments 7946 OptionalIdx[Op.getImmTy()] = I; 7947 } else { 7948 llvm_unreachable("Invalid operand type"); 7949 } 7950 } 7951 } 7952 7953 if (IsDPP8) { 7954 using namespace llvm::AMDGPU::DPP; 7955 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7956 } else { 7957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7960 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7961 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7962 } 7963 } 7964 } 7965 7966 //===----------------------------------------------------------------------===// 7967 // sdwa 7968 //===----------------------------------------------------------------------===// 7969 7970 OperandMatchResultTy 7971 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7972 AMDGPUOperand::ImmTy Type) { 7973 using namespace llvm::AMDGPU::SDWA; 7974 7975 SMLoc S = getLoc(); 7976 StringRef Value; 7977 OperandMatchResultTy res; 7978 7979 SMLoc StringLoc; 7980 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7981 if (res != MatchOperand_Success) { 7982 return res; 7983 } 7984 7985 int64_t Int; 7986 Int = StringSwitch<int64_t>(Value) 7987 .Case("BYTE_0", SdwaSel::BYTE_0) 7988 .Case("BYTE_1", SdwaSel::BYTE_1) 7989 .Case("BYTE_2", SdwaSel::BYTE_2) 7990 .Case("BYTE_3", SdwaSel::BYTE_3) 7991 .Case("WORD_0", SdwaSel::WORD_0) 7992 .Case("WORD_1", SdwaSel::WORD_1) 7993 .Case("DWORD", SdwaSel::DWORD) 7994 .Default(0xffffffff); 7995 7996 if (Int == 0xffffffff) { 7997 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7998 return MatchOperand_ParseFail; 7999 } 8000 8001 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8002 return MatchOperand_Success; 8003 } 8004 8005 OperandMatchResultTy 8006 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8007 using namespace llvm::AMDGPU::SDWA; 8008 8009 SMLoc S = getLoc(); 8010 StringRef Value; 8011 OperandMatchResultTy res; 8012 8013 SMLoc StringLoc; 8014 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8015 if (res != MatchOperand_Success) { 8016 return res; 8017 } 8018 8019 int64_t Int; 8020 Int = StringSwitch<int64_t>(Value) 8021 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8022 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8023 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8024 .Default(0xffffffff); 8025 8026 if (Int == 0xffffffff) { 8027 Error(StringLoc, "invalid dst_unused value"); 8028 return MatchOperand_ParseFail; 8029 } 8030 8031 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8032 return MatchOperand_Success; 8033 } 8034 8035 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8036 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8037 } 8038 8039 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8040 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8041 } 8042 8043 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8044 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8045 } 8046 8047 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8048 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8049 } 8050 8051 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8052 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8053 } 8054 8055 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8056 uint64_t BasicInstType, 8057 bool SkipDstVcc, 8058 bool SkipSrcVcc) { 8059 using namespace llvm::AMDGPU::SDWA; 8060 8061 OptionalImmIndexMap OptionalIdx; 8062 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8063 bool SkippedVcc = false; 8064 8065 unsigned I = 1; 8066 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8067 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8068 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8069 } 8070 8071 for (unsigned E = Operands.size(); I != E; ++I) { 8072 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8073 if (SkipVcc && !SkippedVcc && Op.isReg() && 8074 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8075 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8076 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8077 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8078 // Skip VCC only if we didn't skip it on previous iteration. 8079 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8080 if (BasicInstType == SIInstrFlags::VOP2 && 8081 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8082 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8083 SkippedVcc = true; 8084 continue; 8085 } else if (BasicInstType == SIInstrFlags::VOPC && 8086 Inst.getNumOperands() == 0) { 8087 SkippedVcc = true; 8088 continue; 8089 } 8090 } 8091 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8092 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8093 } else if (Op.isImm()) { 8094 // Handle optional arguments 8095 OptionalIdx[Op.getImmTy()] = I; 8096 } else { 8097 llvm_unreachable("Invalid operand type"); 8098 } 8099 SkippedVcc = false; 8100 } 8101 8102 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8103 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8104 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8105 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8106 switch (BasicInstType) { 8107 case SIInstrFlags::VOP1: 8108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8109 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8111 } 8112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8115 break; 8116 8117 case SIInstrFlags::VOP2: 8118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8119 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8121 } 8122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8126 break; 8127 8128 case SIInstrFlags::VOPC: 8129 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8130 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8133 break; 8134 8135 default: 8136 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8137 } 8138 } 8139 8140 // special case v_mac_{f16, f32}: 8141 // it has src2 register operand that is tied to dst operand 8142 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8143 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8144 auto it = Inst.begin(); 8145 std::advance( 8146 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8147 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8148 } 8149 } 8150 8151 //===----------------------------------------------------------------------===// 8152 // mAI 8153 //===----------------------------------------------------------------------===// 8154 8155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8156 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8157 } 8158 8159 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8160 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8161 } 8162 8163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8164 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8165 } 8166 8167 /// Force static initialization. 8168 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8169 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8170 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8171 } 8172 8173 #define GET_REGISTER_MATCHER 8174 #define GET_MATCHER_IMPLEMENTATION 8175 #define GET_MNEMONIC_SPELL_CHECKER 8176 #define GET_MNEMONIC_CHECKER 8177 #include "AMDGPUGenAsmMatcher.inc" 8178 8179 // This fuction should be defined after auto-generated include so that we have 8180 // MatchClassKind enum defined 8181 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8182 unsigned Kind) { 8183 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8184 // But MatchInstructionImpl() expects to meet token and fails to validate 8185 // operand. This method checks if we are given immediate operand but expect to 8186 // get corresponding token. 8187 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8188 switch (Kind) { 8189 case MCK_addr64: 8190 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8191 case MCK_gds: 8192 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8193 case MCK_lds: 8194 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8195 case MCK_idxen: 8196 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8197 case MCK_offen: 8198 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8199 case MCK_SSrcB32: 8200 // When operands have expression values, they will return true for isToken, 8201 // because it is not possible to distinguish between a token and an 8202 // expression at parse time. MatchInstructionImpl() will always try to 8203 // match an operand as a token, when isToken returns true, and when the 8204 // name of the expression is not a valid token, the match will fail, 8205 // so we need to handle it here. 8206 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8207 case MCK_SSrcF32: 8208 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8209 case MCK_SoppBrTarget: 8210 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8211 case MCK_VReg32OrOff: 8212 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8213 case MCK_InterpSlot: 8214 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8215 case MCK_Attr: 8216 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8217 case MCK_AttrChan: 8218 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8219 case MCK_ImmSMEMOffset: 8220 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8221 case MCK_SReg_64: 8222 case MCK_SReg_64_XEXEC: 8223 // Null is defined as a 32-bit register but 8224 // it should also be enabled with 64-bit operands. 8225 // The following code enables it for SReg_64 operands 8226 // used as source and destination. Remaining source 8227 // operands are handled in isInlinableImm. 8228 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8229 default: 8230 return Match_InvalidOperand; 8231 } 8232 } 8233 8234 //===----------------------------------------------------------------------===// 8235 // endpgm 8236 //===----------------------------------------------------------------------===// 8237 8238 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8239 SMLoc S = getLoc(); 8240 int64_t Imm = 0; 8241 8242 if (!parseExpr(Imm)) { 8243 // The operand is optional, if not present default to 0 8244 Imm = 0; 8245 } 8246 8247 if (!isUInt<16>(Imm)) { 8248 Error(S, "expected a 16-bit value"); 8249 return MatchOperand_ParseFail; 8250 } 8251 8252 Operands.push_back( 8253 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8254 return MatchOperand_Success; 8255 } 8256 8257 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8258