1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool isSI() const { 1333 return AMDGPU::isSI(getSTI()); 1334 } 1335 1336 bool isCI() const { 1337 return AMDGPU::isCI(getSTI()); 1338 } 1339 1340 bool isVI() const { 1341 return AMDGPU::isVI(getSTI()); 1342 } 1343 1344 bool isGFX9() const { 1345 return AMDGPU::isGFX9(getSTI()); 1346 } 1347 1348 bool isGFX90A() const { 1349 return AMDGPU::isGFX90A(getSTI()); 1350 } 1351 1352 bool isGFX9Plus() const { 1353 return AMDGPU::isGFX9Plus(getSTI()); 1354 } 1355 1356 bool isGFX10() const { 1357 return AMDGPU::isGFX10(getSTI()); 1358 } 1359 1360 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1361 1362 bool isGFX10_BEncoding() const { 1363 return AMDGPU::isGFX10_BEncoding(getSTI()); 1364 } 1365 1366 bool hasInv2PiInlineImm() const { 1367 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1368 } 1369 1370 bool hasFlatOffsets() const { 1371 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1372 } 1373 1374 bool hasSGPR102_SGPR103() const { 1375 return !isVI() && !isGFX9(); 1376 } 1377 1378 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1379 1380 bool hasIntClamp() const { 1381 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1382 } 1383 1384 AMDGPUTargetStreamer &getTargetStreamer() { 1385 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1386 return static_cast<AMDGPUTargetStreamer &>(TS); 1387 } 1388 1389 const MCRegisterInfo *getMRI() const { 1390 // We need this const_cast because for some reason getContext() is not const 1391 // in MCAsmParser. 1392 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1393 } 1394 1395 const MCInstrInfo *getMII() const { 1396 return &MII; 1397 } 1398 1399 const FeatureBitset &getFeatureBits() const { 1400 return getSTI().getFeatureBits(); 1401 } 1402 1403 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1404 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1405 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1406 1407 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1408 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1409 bool isForcedDPP() const { return ForcedDPP; } 1410 bool isForcedSDWA() const { return ForcedSDWA; } 1411 ArrayRef<unsigned> getMatchedVariants() const; 1412 StringRef getMatchedVariantName() const; 1413 1414 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1415 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1416 bool RestoreOnFailure); 1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1418 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1419 SMLoc &EndLoc) override; 1420 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1421 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1422 unsigned Kind) override; 1423 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1424 OperandVector &Operands, MCStreamer &Out, 1425 uint64_t &ErrorInfo, 1426 bool MatchingInlineAsm) override; 1427 bool ParseDirective(AsmToken DirectiveID) override; 1428 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1429 OperandMode Mode = OperandMode_Default); 1430 StringRef parseMnemonicSuffix(StringRef Name); 1431 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1432 SMLoc NameLoc, OperandVector &Operands) override; 1433 //bool ProcessInstruction(MCInst &Inst); 1434 1435 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1436 1437 OperandMatchResultTy 1438 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1439 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1440 bool (*ConvertResult)(int64_t &) = nullptr); 1441 1442 OperandMatchResultTy 1443 parseOperandArrayWithPrefix(const char *Prefix, 1444 OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t&) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseNamedBit(StringRef Name, OperandVector &Operands, 1450 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1451 OperandMatchResultTy parseCPol(OperandVector &Operands); 1452 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1453 StringRef &Value, 1454 SMLoc &StringLoc); 1455 1456 bool isModifier(); 1457 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1458 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1459 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1460 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool parseSP3NegModifier(); 1462 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1463 OperandMatchResultTy parseReg(OperandVector &Operands); 1464 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1465 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1466 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1467 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1468 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1469 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1470 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1471 OperandMatchResultTy parseUfmt(int64_t &Format); 1472 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1473 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1474 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1475 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1476 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1477 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1478 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1479 1480 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1481 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1482 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1483 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1484 1485 bool parseCnt(int64_t &IntVal); 1486 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1487 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1488 1489 private: 1490 struct OperandInfoTy { 1491 SMLoc Loc; 1492 int64_t Id; 1493 bool IsSymbolic = false; 1494 bool IsDefined = false; 1495 1496 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1497 }; 1498 1499 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1500 bool validateSendMsg(const OperandInfoTy &Msg, 1501 const OperandInfoTy &Op, 1502 const OperandInfoTy &Stream); 1503 1504 bool parseHwregBody(OperandInfoTy &HwReg, 1505 OperandInfoTy &Offset, 1506 OperandInfoTy &Width); 1507 bool validateHwreg(const OperandInfoTy &HwReg, 1508 const OperandInfoTy &Offset, 1509 const OperandInfoTy &Width); 1510 1511 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1512 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1513 1514 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1515 const OperandVector &Operands) const; 1516 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1517 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1518 SMLoc getLitLoc(const OperandVector &Operands) const; 1519 SMLoc getConstLoc(const OperandVector &Operands) const; 1520 1521 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1522 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1523 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1524 bool validateSOPLiteral(const MCInst &Inst) const; 1525 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1527 bool validateIntClampSupported(const MCInst &Inst); 1528 bool validateMIMGAtomicDMask(const MCInst &Inst); 1529 bool validateMIMGGatherDMask(const MCInst &Inst); 1530 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1531 bool validateMIMGDataSize(const MCInst &Inst); 1532 bool validateMIMGAddrSize(const MCInst &Inst); 1533 bool validateMIMGD16(const MCInst &Inst); 1534 bool validateMIMGDim(const MCInst &Inst); 1535 bool validateMIMGMSAA(const MCInst &Inst); 1536 bool validateOpSel(const MCInst &Inst); 1537 bool validateVccOperand(unsigned Reg) const; 1538 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateAGPRLdSt(const MCInst &Inst) const; 1541 bool validateVGPRAlign(const MCInst &Inst) const; 1542 bool validateDivScale(const MCInst &Inst); 1543 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1544 const SMLoc &IDLoc); 1545 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1546 unsigned getConstantBusLimit(unsigned Opcode) const; 1547 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1548 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1549 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1550 1551 bool isSupportedMnemo(StringRef Mnemo, 1552 const FeatureBitset &FBS); 1553 bool isSupportedMnemo(StringRef Mnemo, 1554 const FeatureBitset &FBS, 1555 ArrayRef<unsigned> Variants); 1556 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1557 1558 bool isId(const StringRef Id) const; 1559 bool isId(const AsmToken &Token, const StringRef Id) const; 1560 bool isToken(const AsmToken::TokenKind Kind) const; 1561 bool trySkipId(const StringRef Id); 1562 bool trySkipId(const StringRef Pref, const StringRef Id); 1563 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1564 bool trySkipToken(const AsmToken::TokenKind Kind); 1565 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1566 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1567 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1568 1569 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1570 AsmToken::TokenKind getTokenKind() const; 1571 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1572 bool parseExpr(OperandVector &Operands); 1573 StringRef getTokenStr() const; 1574 AsmToken peekToken(); 1575 AsmToken getToken() const; 1576 SMLoc getLoc() const; 1577 void lex(); 1578 1579 public: 1580 void onBeginOfFile() override; 1581 1582 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1583 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1584 1585 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1586 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1587 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1588 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1589 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1590 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1591 1592 bool parseSwizzleOperand(int64_t &Op, 1593 const unsigned MinVal, 1594 const unsigned MaxVal, 1595 const StringRef ErrMsg, 1596 SMLoc &Loc); 1597 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1598 const unsigned MinVal, 1599 const unsigned MaxVal, 1600 const StringRef ErrMsg); 1601 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1602 bool parseSwizzleOffset(int64_t &Imm); 1603 bool parseSwizzleMacro(int64_t &Imm); 1604 bool parseSwizzleQuadPerm(int64_t &Imm); 1605 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1606 bool parseSwizzleBroadcast(int64_t &Imm); 1607 bool parseSwizzleSwap(int64_t &Imm); 1608 bool parseSwizzleReverse(int64_t &Imm); 1609 1610 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1611 int64_t parseGPRIdxMacro(); 1612 1613 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1614 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1615 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1616 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1617 1618 AMDGPUOperand::Ptr defaultCPol() const; 1619 1620 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1621 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1622 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1623 AMDGPUOperand::Ptr defaultFlatOffset() const; 1624 1625 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1626 1627 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1628 OptionalImmIndexMap &OptionalIdx); 1629 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1630 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1631 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1632 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1633 OptionalImmIndexMap &OptionalIdx); 1634 1635 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1636 1637 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1638 bool IsAtomic = false); 1639 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1640 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1641 1642 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1643 1644 bool parseDimId(unsigned &Encoding); 1645 OperandMatchResultTy parseDim(OperandVector &Operands); 1646 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1647 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1648 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1649 int64_t parseDPPCtrlSel(StringRef Ctrl); 1650 int64_t parseDPPCtrlPerm(); 1651 AMDGPUOperand::Ptr defaultRowMask() const; 1652 AMDGPUOperand::Ptr defaultBankMask() const; 1653 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1654 AMDGPUOperand::Ptr defaultFI() const; 1655 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1656 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1657 1658 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1659 AMDGPUOperand::ImmTy Type); 1660 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1661 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1662 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1663 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1664 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1667 uint64_t BasicInstType, 1668 bool SkipDstVcc = false, 1669 bool SkipSrcVcc = false); 1670 1671 AMDGPUOperand::Ptr defaultBLGP() const; 1672 AMDGPUOperand::Ptr defaultCBSZ() const; 1673 AMDGPUOperand::Ptr defaultABID() const; 1674 1675 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1676 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1677 }; 1678 1679 struct OptionalOperand { 1680 const char *Name; 1681 AMDGPUOperand::ImmTy Type; 1682 bool IsBit; 1683 bool (*ConvertResult)(int64_t&); 1684 }; 1685 1686 } // end anonymous namespace 1687 1688 // May be called with integer type with equivalent bitwidth. 1689 static const fltSemantics *getFltSemantics(unsigned Size) { 1690 switch (Size) { 1691 case 4: 1692 return &APFloat::IEEEsingle(); 1693 case 8: 1694 return &APFloat::IEEEdouble(); 1695 case 2: 1696 return &APFloat::IEEEhalf(); 1697 default: 1698 llvm_unreachable("unsupported fp type"); 1699 } 1700 } 1701 1702 static const fltSemantics *getFltSemantics(MVT VT) { 1703 return getFltSemantics(VT.getSizeInBits() / 8); 1704 } 1705 1706 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1707 switch (OperandType) { 1708 case AMDGPU::OPERAND_REG_IMM_INT32: 1709 case AMDGPU::OPERAND_REG_IMM_FP32: 1710 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1711 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1712 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1713 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1715 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1716 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1717 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1718 return &APFloat::IEEEsingle(); 1719 case AMDGPU::OPERAND_REG_IMM_INT64: 1720 case AMDGPU::OPERAND_REG_IMM_FP64: 1721 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1722 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1723 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1724 return &APFloat::IEEEdouble(); 1725 case AMDGPU::OPERAND_REG_IMM_INT16: 1726 case AMDGPU::OPERAND_REG_IMM_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1735 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1736 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1737 return &APFloat::IEEEhalf(); 1738 default: 1739 llvm_unreachable("unsupported fp type"); 1740 } 1741 } 1742 1743 //===----------------------------------------------------------------------===// 1744 // Operand 1745 //===----------------------------------------------------------------------===// 1746 1747 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1748 bool Lost; 1749 1750 // Convert literal to single precision 1751 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1752 APFloat::rmNearestTiesToEven, 1753 &Lost); 1754 // We allow precision lost but not overflow or underflow 1755 if (Status != APFloat::opOK && 1756 Lost && 1757 ((Status & APFloat::opOverflow) != 0 || 1758 (Status & APFloat::opUnderflow) != 0)) { 1759 return false; 1760 } 1761 1762 return true; 1763 } 1764 1765 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1766 return isUIntN(Size, Val) || isIntN(Size, Val); 1767 } 1768 1769 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1770 if (VT.getScalarType() == MVT::i16) { 1771 // FP immediate values are broken. 1772 return isInlinableIntLiteral(Val); 1773 } 1774 1775 // f16/v2f16 operands work correctly for all values. 1776 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1777 } 1778 1779 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1780 1781 // This is a hack to enable named inline values like 1782 // shared_base with both 32-bit and 64-bit operands. 1783 // Note that these values are defined as 1784 // 32-bit operands only. 1785 if (isInlineValue()) { 1786 return true; 1787 } 1788 1789 if (!isImmTy(ImmTyNone)) { 1790 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1791 return false; 1792 } 1793 // TODO: We should avoid using host float here. It would be better to 1794 // check the float bit values which is what a few other places do. 1795 // We've had bot failures before due to weird NaN support on mips hosts. 1796 1797 APInt Literal(64, Imm.Val); 1798 1799 if (Imm.IsFPImm) { // We got fp literal token 1800 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1801 return AMDGPU::isInlinableLiteral64(Imm.Val, 1802 AsmParser->hasInv2PiInlineImm()); 1803 } 1804 1805 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1806 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1807 return false; 1808 1809 if (type.getScalarSizeInBits() == 16) { 1810 return isInlineableLiteralOp16( 1811 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1812 type, AsmParser->hasInv2PiInlineImm()); 1813 } 1814 1815 // Check if single precision literal is inlinable 1816 return AMDGPU::isInlinableLiteral32( 1817 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1818 AsmParser->hasInv2PiInlineImm()); 1819 } 1820 1821 // We got int literal token. 1822 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1823 return AMDGPU::isInlinableLiteral64(Imm.Val, 1824 AsmParser->hasInv2PiInlineImm()); 1825 } 1826 1827 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1828 return false; 1829 } 1830 1831 if (type.getScalarSizeInBits() == 16) { 1832 return isInlineableLiteralOp16( 1833 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1834 type, AsmParser->hasInv2PiInlineImm()); 1835 } 1836 1837 return AMDGPU::isInlinableLiteral32( 1838 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1839 AsmParser->hasInv2PiInlineImm()); 1840 } 1841 1842 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1843 // Check that this immediate can be added as literal 1844 if (!isImmTy(ImmTyNone)) { 1845 return false; 1846 } 1847 1848 if (!Imm.IsFPImm) { 1849 // We got int literal token. 1850 1851 if (type == MVT::f64 && hasFPModifiers()) { 1852 // Cannot apply fp modifiers to int literals preserving the same semantics 1853 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1854 // disable these cases. 1855 return false; 1856 } 1857 1858 unsigned Size = type.getSizeInBits(); 1859 if (Size == 64) 1860 Size = 32; 1861 1862 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1863 // types. 1864 return isSafeTruncation(Imm.Val, Size); 1865 } 1866 1867 // We got fp literal token 1868 if (type == MVT::f64) { // Expected 64-bit fp operand 1869 // We would set low 64-bits of literal to zeroes but we accept this literals 1870 return true; 1871 } 1872 1873 if (type == MVT::i64) { // Expected 64-bit int operand 1874 // We don't allow fp literals in 64-bit integer instructions. It is 1875 // unclear how we should encode them. 1876 return false; 1877 } 1878 1879 // We allow fp literals with f16x2 operands assuming that the specified 1880 // literal goes into the lower half and the upper half is zero. We also 1881 // require that the literal may be losslesly converted to f16. 1882 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1883 (type == MVT::v2i16)? MVT::i16 : 1884 (type == MVT::v2f32)? MVT::f32 : type; 1885 1886 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1887 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1888 } 1889 1890 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1891 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1892 } 1893 1894 bool AMDGPUOperand::isVRegWithInputMods() const { 1895 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1896 // GFX90A allows DPP on 64-bit operands. 1897 (isRegClass(AMDGPU::VReg_64RegClassID) && 1898 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1899 } 1900 1901 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1902 if (AsmParser->isVI()) 1903 return isVReg32(); 1904 else if (AsmParser->isGFX9Plus()) 1905 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1906 else 1907 return false; 1908 } 1909 1910 bool AMDGPUOperand::isSDWAFP16Operand() const { 1911 return isSDWAOperand(MVT::f16); 1912 } 1913 1914 bool AMDGPUOperand::isSDWAFP32Operand() const { 1915 return isSDWAOperand(MVT::f32); 1916 } 1917 1918 bool AMDGPUOperand::isSDWAInt16Operand() const { 1919 return isSDWAOperand(MVT::i16); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAInt32Operand() const { 1923 return isSDWAOperand(MVT::i32); 1924 } 1925 1926 bool AMDGPUOperand::isBoolReg() const { 1927 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1928 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1929 } 1930 1931 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1932 { 1933 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1934 assert(Size == 2 || Size == 4 || Size == 8); 1935 1936 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1937 1938 if (Imm.Mods.Abs) { 1939 Val &= ~FpSignMask; 1940 } 1941 if (Imm.Mods.Neg) { 1942 Val ^= FpSignMask; 1943 } 1944 1945 return Val; 1946 } 1947 1948 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1949 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1950 Inst.getNumOperands())) { 1951 addLiteralImmOperand(Inst, Imm.Val, 1952 ApplyModifiers & 1953 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1954 } else { 1955 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1956 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1957 setImmKindNone(); 1958 } 1959 } 1960 1961 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1962 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1963 auto OpNum = Inst.getNumOperands(); 1964 // Check that this operand accepts literals 1965 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1966 1967 if (ApplyModifiers) { 1968 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1969 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1970 Val = applyInputFPModifiers(Val, Size); 1971 } 1972 1973 APInt Literal(64, Val); 1974 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1975 1976 if (Imm.IsFPImm) { // We got fp literal token 1977 switch (OpTy) { 1978 case AMDGPU::OPERAND_REG_IMM_INT64: 1979 case AMDGPU::OPERAND_REG_IMM_FP64: 1980 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1981 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1982 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1983 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1984 AsmParser->hasInv2PiInlineImm())) { 1985 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1986 setImmKindConst(); 1987 return; 1988 } 1989 1990 // Non-inlineable 1991 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1992 // For fp operands we check if low 32 bits are zeros 1993 if (Literal.getLoBits(32) != 0) { 1994 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1995 "Can't encode literal as exact 64-bit floating-point operand. " 1996 "Low 32-bits will be set to zero"); 1997 } 1998 1999 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2000 setImmKindLiteral(); 2001 return; 2002 } 2003 2004 // We don't allow fp literals in 64-bit integer instructions. It is 2005 // unclear how we should encode them. This case should be checked earlier 2006 // in predicate methods (isLiteralImm()) 2007 llvm_unreachable("fp literal in 64-bit integer instruction."); 2008 2009 case AMDGPU::OPERAND_REG_IMM_INT32: 2010 case AMDGPU::OPERAND_REG_IMM_FP32: 2011 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2012 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2013 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2014 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2015 case AMDGPU::OPERAND_REG_IMM_INT16: 2016 case AMDGPU::OPERAND_REG_IMM_FP16: 2017 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2018 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2019 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2020 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2021 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2025 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2026 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2027 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2028 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2029 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2030 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2031 bool lost; 2032 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2033 // Convert literal to single precision 2034 FPLiteral.convert(*getOpFltSemantics(OpTy), 2035 APFloat::rmNearestTiesToEven, &lost); 2036 // We allow precision lost but not overflow or underflow. This should be 2037 // checked earlier in isLiteralImm() 2038 2039 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2040 Inst.addOperand(MCOperand::createImm(ImmVal)); 2041 setImmKindLiteral(); 2042 return; 2043 } 2044 default: 2045 llvm_unreachable("invalid operand size"); 2046 } 2047 2048 return; 2049 } 2050 2051 // We got int literal token. 2052 // Only sign extend inline immediates. 2053 switch (OpTy) { 2054 case AMDGPU::OPERAND_REG_IMM_INT32: 2055 case AMDGPU::OPERAND_REG_IMM_FP32: 2056 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2057 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2058 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2059 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2060 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2061 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2062 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2063 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2064 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2066 if (isSafeTruncation(Val, 32) && 2067 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2068 AsmParser->hasInv2PiInlineImm())) { 2069 Inst.addOperand(MCOperand::createImm(Val)); 2070 setImmKindConst(); 2071 return; 2072 } 2073 2074 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2075 setImmKindLiteral(); 2076 return; 2077 2078 case AMDGPU::OPERAND_REG_IMM_INT64: 2079 case AMDGPU::OPERAND_REG_IMM_FP64: 2080 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2081 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2082 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2083 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2084 Inst.addOperand(MCOperand::createImm(Val)); 2085 setImmKindConst(); 2086 return; 2087 } 2088 2089 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2090 setImmKindLiteral(); 2091 return; 2092 2093 case AMDGPU::OPERAND_REG_IMM_INT16: 2094 case AMDGPU::OPERAND_REG_IMM_FP16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2097 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2099 if (isSafeTruncation(Val, 16) && 2100 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2101 AsmParser->hasInv2PiInlineImm())) { 2102 Inst.addOperand(MCOperand::createImm(Val)); 2103 setImmKindConst(); 2104 return; 2105 } 2106 2107 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2108 setImmKindLiteral(); 2109 return; 2110 2111 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2112 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2115 assert(isSafeTruncation(Val, 16)); 2116 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2117 AsmParser->hasInv2PiInlineImm())); 2118 2119 Inst.addOperand(MCOperand::createImm(Val)); 2120 return; 2121 } 2122 default: 2123 llvm_unreachable("invalid operand size"); 2124 } 2125 } 2126 2127 template <unsigned Bitwidth> 2128 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2129 APInt Literal(64, Imm.Val); 2130 setImmKindNone(); 2131 2132 if (!Imm.IsFPImm) { 2133 // We got int literal token. 2134 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2135 return; 2136 } 2137 2138 bool Lost; 2139 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2140 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2141 APFloat::rmNearestTiesToEven, &Lost); 2142 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2143 } 2144 2145 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2146 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2147 } 2148 2149 static bool isInlineValue(unsigned Reg) { 2150 switch (Reg) { 2151 case AMDGPU::SRC_SHARED_BASE: 2152 case AMDGPU::SRC_SHARED_LIMIT: 2153 case AMDGPU::SRC_PRIVATE_BASE: 2154 case AMDGPU::SRC_PRIVATE_LIMIT: 2155 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2156 return true; 2157 case AMDGPU::SRC_VCCZ: 2158 case AMDGPU::SRC_EXECZ: 2159 case AMDGPU::SRC_SCC: 2160 return true; 2161 case AMDGPU::SGPR_NULL: 2162 return true; 2163 default: 2164 return false; 2165 } 2166 } 2167 2168 bool AMDGPUOperand::isInlineValue() const { 2169 return isRegKind() && ::isInlineValue(getReg()); 2170 } 2171 2172 //===----------------------------------------------------------------------===// 2173 // AsmParser 2174 //===----------------------------------------------------------------------===// 2175 2176 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2177 if (Is == IS_VGPR) { 2178 switch (RegWidth) { 2179 default: return -1; 2180 case 1: return AMDGPU::VGPR_32RegClassID; 2181 case 2: return AMDGPU::VReg_64RegClassID; 2182 case 3: return AMDGPU::VReg_96RegClassID; 2183 case 4: return AMDGPU::VReg_128RegClassID; 2184 case 5: return AMDGPU::VReg_160RegClassID; 2185 case 6: return AMDGPU::VReg_192RegClassID; 2186 case 8: return AMDGPU::VReg_256RegClassID; 2187 case 16: return AMDGPU::VReg_512RegClassID; 2188 case 32: return AMDGPU::VReg_1024RegClassID; 2189 } 2190 } else if (Is == IS_TTMP) { 2191 switch (RegWidth) { 2192 default: return -1; 2193 case 1: return AMDGPU::TTMP_32RegClassID; 2194 case 2: return AMDGPU::TTMP_64RegClassID; 2195 case 4: return AMDGPU::TTMP_128RegClassID; 2196 case 8: return AMDGPU::TTMP_256RegClassID; 2197 case 16: return AMDGPU::TTMP_512RegClassID; 2198 } 2199 } else if (Is == IS_SGPR) { 2200 switch (RegWidth) { 2201 default: return -1; 2202 case 1: return AMDGPU::SGPR_32RegClassID; 2203 case 2: return AMDGPU::SGPR_64RegClassID; 2204 case 3: return AMDGPU::SGPR_96RegClassID; 2205 case 4: return AMDGPU::SGPR_128RegClassID; 2206 case 5: return AMDGPU::SGPR_160RegClassID; 2207 case 6: return AMDGPU::SGPR_192RegClassID; 2208 case 8: return AMDGPU::SGPR_256RegClassID; 2209 case 16: return AMDGPU::SGPR_512RegClassID; 2210 } 2211 } else if (Is == IS_AGPR) { 2212 switch (RegWidth) { 2213 default: return -1; 2214 case 1: return AMDGPU::AGPR_32RegClassID; 2215 case 2: return AMDGPU::AReg_64RegClassID; 2216 case 3: return AMDGPU::AReg_96RegClassID; 2217 case 4: return AMDGPU::AReg_128RegClassID; 2218 case 5: return AMDGPU::AReg_160RegClassID; 2219 case 6: return AMDGPU::AReg_192RegClassID; 2220 case 8: return AMDGPU::AReg_256RegClassID; 2221 case 16: return AMDGPU::AReg_512RegClassID; 2222 case 32: return AMDGPU::AReg_1024RegClassID; 2223 } 2224 } 2225 return -1; 2226 } 2227 2228 static unsigned getSpecialRegForName(StringRef RegName) { 2229 return StringSwitch<unsigned>(RegName) 2230 .Case("exec", AMDGPU::EXEC) 2231 .Case("vcc", AMDGPU::VCC) 2232 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2233 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2234 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2235 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2236 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2237 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2238 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2239 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2240 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2241 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2242 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2243 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2244 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2245 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2246 .Case("m0", AMDGPU::M0) 2247 .Case("vccz", AMDGPU::SRC_VCCZ) 2248 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2249 .Case("execz", AMDGPU::SRC_EXECZ) 2250 .Case("src_execz", AMDGPU::SRC_EXECZ) 2251 .Case("scc", AMDGPU::SRC_SCC) 2252 .Case("src_scc", AMDGPU::SRC_SCC) 2253 .Case("tba", AMDGPU::TBA) 2254 .Case("tma", AMDGPU::TMA) 2255 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2256 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2257 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2258 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2259 .Case("vcc_lo", AMDGPU::VCC_LO) 2260 .Case("vcc_hi", AMDGPU::VCC_HI) 2261 .Case("exec_lo", AMDGPU::EXEC_LO) 2262 .Case("exec_hi", AMDGPU::EXEC_HI) 2263 .Case("tma_lo", AMDGPU::TMA_LO) 2264 .Case("tma_hi", AMDGPU::TMA_HI) 2265 .Case("tba_lo", AMDGPU::TBA_LO) 2266 .Case("tba_hi", AMDGPU::TBA_HI) 2267 .Case("pc", AMDGPU::PC_REG) 2268 .Case("null", AMDGPU::SGPR_NULL) 2269 .Default(AMDGPU::NoRegister); 2270 } 2271 2272 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2273 SMLoc &EndLoc, bool RestoreOnFailure) { 2274 auto R = parseRegister(); 2275 if (!R) return true; 2276 assert(R->isReg()); 2277 RegNo = R->getReg(); 2278 StartLoc = R->getStartLoc(); 2279 EndLoc = R->getEndLoc(); 2280 return false; 2281 } 2282 2283 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2284 SMLoc &EndLoc) { 2285 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2286 } 2287 2288 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2289 SMLoc &StartLoc, 2290 SMLoc &EndLoc) { 2291 bool Result = 2292 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2293 bool PendingErrors = getParser().hasPendingError(); 2294 getParser().clearPendingErrors(); 2295 if (PendingErrors) 2296 return MatchOperand_ParseFail; 2297 if (Result) 2298 return MatchOperand_NoMatch; 2299 return MatchOperand_Success; 2300 } 2301 2302 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2303 RegisterKind RegKind, unsigned Reg1, 2304 SMLoc Loc) { 2305 switch (RegKind) { 2306 case IS_SPECIAL: 2307 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2308 Reg = AMDGPU::EXEC; 2309 RegWidth = 2; 2310 return true; 2311 } 2312 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2313 Reg = AMDGPU::FLAT_SCR; 2314 RegWidth = 2; 2315 return true; 2316 } 2317 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2318 Reg = AMDGPU::XNACK_MASK; 2319 RegWidth = 2; 2320 return true; 2321 } 2322 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2323 Reg = AMDGPU::VCC; 2324 RegWidth = 2; 2325 return true; 2326 } 2327 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2328 Reg = AMDGPU::TBA; 2329 RegWidth = 2; 2330 return true; 2331 } 2332 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2333 Reg = AMDGPU::TMA; 2334 RegWidth = 2; 2335 return true; 2336 } 2337 Error(Loc, "register does not fit in the list"); 2338 return false; 2339 case IS_VGPR: 2340 case IS_SGPR: 2341 case IS_AGPR: 2342 case IS_TTMP: 2343 if (Reg1 != Reg + RegWidth) { 2344 Error(Loc, "registers in a list must have consecutive indices"); 2345 return false; 2346 } 2347 RegWidth++; 2348 return true; 2349 default: 2350 llvm_unreachable("unexpected register kind"); 2351 } 2352 } 2353 2354 struct RegInfo { 2355 StringLiteral Name; 2356 RegisterKind Kind; 2357 }; 2358 2359 static constexpr RegInfo RegularRegisters[] = { 2360 {{"v"}, IS_VGPR}, 2361 {{"s"}, IS_SGPR}, 2362 {{"ttmp"}, IS_TTMP}, 2363 {{"acc"}, IS_AGPR}, 2364 {{"a"}, IS_AGPR}, 2365 }; 2366 2367 static bool isRegularReg(RegisterKind Kind) { 2368 return Kind == IS_VGPR || 2369 Kind == IS_SGPR || 2370 Kind == IS_TTMP || 2371 Kind == IS_AGPR; 2372 } 2373 2374 static const RegInfo* getRegularRegInfo(StringRef Str) { 2375 for (const RegInfo &Reg : RegularRegisters) 2376 if (Str.startswith(Reg.Name)) 2377 return &Reg; 2378 return nullptr; 2379 } 2380 2381 static bool getRegNum(StringRef Str, unsigned& Num) { 2382 return !Str.getAsInteger(10, Num); 2383 } 2384 2385 bool 2386 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2387 const AsmToken &NextToken) const { 2388 2389 // A list of consecutive registers: [s0,s1,s2,s3] 2390 if (Token.is(AsmToken::LBrac)) 2391 return true; 2392 2393 if (!Token.is(AsmToken::Identifier)) 2394 return false; 2395 2396 // A single register like s0 or a range of registers like s[0:1] 2397 2398 StringRef Str = Token.getString(); 2399 const RegInfo *Reg = getRegularRegInfo(Str); 2400 if (Reg) { 2401 StringRef RegName = Reg->Name; 2402 StringRef RegSuffix = Str.substr(RegName.size()); 2403 if (!RegSuffix.empty()) { 2404 unsigned Num; 2405 // A single register with an index: rXX 2406 if (getRegNum(RegSuffix, Num)) 2407 return true; 2408 } else { 2409 // A range of registers: r[XX:YY]. 2410 if (NextToken.is(AsmToken::LBrac)) 2411 return true; 2412 } 2413 } 2414 2415 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2416 } 2417 2418 bool 2419 AMDGPUAsmParser::isRegister() 2420 { 2421 return isRegister(getToken(), peekToken()); 2422 } 2423 2424 unsigned 2425 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2426 unsigned RegNum, 2427 unsigned RegWidth, 2428 SMLoc Loc) { 2429 2430 assert(isRegularReg(RegKind)); 2431 2432 unsigned AlignSize = 1; 2433 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2434 // SGPR and TTMP registers must be aligned. 2435 // Max required alignment is 4 dwords. 2436 AlignSize = std::min(RegWidth, 4u); 2437 } 2438 2439 if (RegNum % AlignSize != 0) { 2440 Error(Loc, "invalid register alignment"); 2441 return AMDGPU::NoRegister; 2442 } 2443 2444 unsigned RegIdx = RegNum / AlignSize; 2445 int RCID = getRegClass(RegKind, RegWidth); 2446 if (RCID == -1) { 2447 Error(Loc, "invalid or unsupported register size"); 2448 return AMDGPU::NoRegister; 2449 } 2450 2451 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2452 const MCRegisterClass RC = TRI->getRegClass(RCID); 2453 if (RegIdx >= RC.getNumRegs()) { 2454 Error(Loc, "register index is out of range"); 2455 return AMDGPU::NoRegister; 2456 } 2457 2458 return RC.getRegister(RegIdx); 2459 } 2460 2461 bool 2462 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2463 int64_t RegLo, RegHi; 2464 if (!skipToken(AsmToken::LBrac, "missing register index")) 2465 return false; 2466 2467 SMLoc FirstIdxLoc = getLoc(); 2468 SMLoc SecondIdxLoc; 2469 2470 if (!parseExpr(RegLo)) 2471 return false; 2472 2473 if (trySkipToken(AsmToken::Colon)) { 2474 SecondIdxLoc = getLoc(); 2475 if (!parseExpr(RegHi)) 2476 return false; 2477 } else { 2478 RegHi = RegLo; 2479 } 2480 2481 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2482 return false; 2483 2484 if (!isUInt<32>(RegLo)) { 2485 Error(FirstIdxLoc, "invalid register index"); 2486 return false; 2487 } 2488 2489 if (!isUInt<32>(RegHi)) { 2490 Error(SecondIdxLoc, "invalid register index"); 2491 return false; 2492 } 2493 2494 if (RegLo > RegHi) { 2495 Error(FirstIdxLoc, "first register index should not exceed second index"); 2496 return false; 2497 } 2498 2499 Num = static_cast<unsigned>(RegLo); 2500 Width = (RegHi - RegLo) + 1; 2501 return true; 2502 } 2503 2504 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2505 unsigned &RegNum, unsigned &RegWidth, 2506 SmallVectorImpl<AsmToken> &Tokens) { 2507 assert(isToken(AsmToken::Identifier)); 2508 unsigned Reg = getSpecialRegForName(getTokenStr()); 2509 if (Reg) { 2510 RegNum = 0; 2511 RegWidth = 1; 2512 RegKind = IS_SPECIAL; 2513 Tokens.push_back(getToken()); 2514 lex(); // skip register name 2515 } 2516 return Reg; 2517 } 2518 2519 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2520 unsigned &RegNum, unsigned &RegWidth, 2521 SmallVectorImpl<AsmToken> &Tokens) { 2522 assert(isToken(AsmToken::Identifier)); 2523 StringRef RegName = getTokenStr(); 2524 auto Loc = getLoc(); 2525 2526 const RegInfo *RI = getRegularRegInfo(RegName); 2527 if (!RI) { 2528 Error(Loc, "invalid register name"); 2529 return AMDGPU::NoRegister; 2530 } 2531 2532 Tokens.push_back(getToken()); 2533 lex(); // skip register name 2534 2535 RegKind = RI->Kind; 2536 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2537 if (!RegSuffix.empty()) { 2538 // Single 32-bit register: vXX. 2539 if (!getRegNum(RegSuffix, RegNum)) { 2540 Error(Loc, "invalid register index"); 2541 return AMDGPU::NoRegister; 2542 } 2543 RegWidth = 1; 2544 } else { 2545 // Range of registers: v[XX:YY]. ":YY" is optional. 2546 if (!ParseRegRange(RegNum, RegWidth)) 2547 return AMDGPU::NoRegister; 2548 } 2549 2550 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2551 } 2552 2553 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2554 unsigned &RegWidth, 2555 SmallVectorImpl<AsmToken> &Tokens) { 2556 unsigned Reg = AMDGPU::NoRegister; 2557 auto ListLoc = getLoc(); 2558 2559 if (!skipToken(AsmToken::LBrac, 2560 "expected a register or a list of registers")) { 2561 return AMDGPU::NoRegister; 2562 } 2563 2564 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2565 2566 auto Loc = getLoc(); 2567 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2568 return AMDGPU::NoRegister; 2569 if (RegWidth != 1) { 2570 Error(Loc, "expected a single 32-bit register"); 2571 return AMDGPU::NoRegister; 2572 } 2573 2574 for (; trySkipToken(AsmToken::Comma); ) { 2575 RegisterKind NextRegKind; 2576 unsigned NextReg, NextRegNum, NextRegWidth; 2577 Loc = getLoc(); 2578 2579 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2580 NextRegNum, NextRegWidth, 2581 Tokens)) { 2582 return AMDGPU::NoRegister; 2583 } 2584 if (NextRegWidth != 1) { 2585 Error(Loc, "expected a single 32-bit register"); 2586 return AMDGPU::NoRegister; 2587 } 2588 if (NextRegKind != RegKind) { 2589 Error(Loc, "registers in a list must be of the same kind"); 2590 return AMDGPU::NoRegister; 2591 } 2592 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2593 return AMDGPU::NoRegister; 2594 } 2595 2596 if (!skipToken(AsmToken::RBrac, 2597 "expected a comma or a closing square bracket")) { 2598 return AMDGPU::NoRegister; 2599 } 2600 2601 if (isRegularReg(RegKind)) 2602 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2603 2604 return Reg; 2605 } 2606 2607 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2608 unsigned &RegNum, unsigned &RegWidth, 2609 SmallVectorImpl<AsmToken> &Tokens) { 2610 auto Loc = getLoc(); 2611 Reg = AMDGPU::NoRegister; 2612 2613 if (isToken(AsmToken::Identifier)) { 2614 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2615 if (Reg == AMDGPU::NoRegister) 2616 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2617 } else { 2618 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2619 } 2620 2621 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2622 if (Reg == AMDGPU::NoRegister) { 2623 assert(Parser.hasPendingError()); 2624 return false; 2625 } 2626 2627 if (!subtargetHasRegister(*TRI, Reg)) { 2628 if (Reg == AMDGPU::SGPR_NULL) { 2629 Error(Loc, "'null' operand is not supported on this GPU"); 2630 } else { 2631 Error(Loc, "register not available on this GPU"); 2632 } 2633 return false; 2634 } 2635 2636 return true; 2637 } 2638 2639 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2640 unsigned &RegNum, unsigned &RegWidth, 2641 bool RestoreOnFailure /*=false*/) { 2642 Reg = AMDGPU::NoRegister; 2643 2644 SmallVector<AsmToken, 1> Tokens; 2645 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2646 if (RestoreOnFailure) { 2647 while (!Tokens.empty()) { 2648 getLexer().UnLex(Tokens.pop_back_val()); 2649 } 2650 } 2651 return true; 2652 } 2653 return false; 2654 } 2655 2656 Optional<StringRef> 2657 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2658 switch (RegKind) { 2659 case IS_VGPR: 2660 return StringRef(".amdgcn.next_free_vgpr"); 2661 case IS_SGPR: 2662 return StringRef(".amdgcn.next_free_sgpr"); 2663 default: 2664 return None; 2665 } 2666 } 2667 2668 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2669 auto SymbolName = getGprCountSymbolName(RegKind); 2670 assert(SymbolName && "initializing invalid register kind"); 2671 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2672 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2673 } 2674 2675 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2676 unsigned DwordRegIndex, 2677 unsigned RegWidth) { 2678 // Symbols are only defined for GCN targets 2679 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2680 return true; 2681 2682 auto SymbolName = getGprCountSymbolName(RegKind); 2683 if (!SymbolName) 2684 return true; 2685 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2686 2687 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2688 int64_t OldCount; 2689 2690 if (!Sym->isVariable()) 2691 return !Error(getLoc(), 2692 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2693 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2694 return !Error( 2695 getLoc(), 2696 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2697 2698 if (OldCount <= NewMax) 2699 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2700 2701 return true; 2702 } 2703 2704 std::unique_ptr<AMDGPUOperand> 2705 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2706 const auto &Tok = getToken(); 2707 SMLoc StartLoc = Tok.getLoc(); 2708 SMLoc EndLoc = Tok.getEndLoc(); 2709 RegisterKind RegKind; 2710 unsigned Reg, RegNum, RegWidth; 2711 2712 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2713 return nullptr; 2714 } 2715 if (isHsaAbiVersion3Or4(&getSTI())) { 2716 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2717 return nullptr; 2718 } else 2719 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2720 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2721 } 2722 2723 OperandMatchResultTy 2724 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2725 // TODO: add syntactic sugar for 1/(2*PI) 2726 2727 assert(!isRegister()); 2728 assert(!isModifier()); 2729 2730 const auto& Tok = getToken(); 2731 const auto& NextTok = peekToken(); 2732 bool IsReal = Tok.is(AsmToken::Real); 2733 SMLoc S = getLoc(); 2734 bool Negate = false; 2735 2736 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2737 lex(); 2738 IsReal = true; 2739 Negate = true; 2740 } 2741 2742 if (IsReal) { 2743 // Floating-point expressions are not supported. 2744 // Can only allow floating-point literals with an 2745 // optional sign. 2746 2747 StringRef Num = getTokenStr(); 2748 lex(); 2749 2750 APFloat RealVal(APFloat::IEEEdouble()); 2751 auto roundMode = APFloat::rmNearestTiesToEven; 2752 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2753 return MatchOperand_ParseFail; 2754 } 2755 if (Negate) 2756 RealVal.changeSign(); 2757 2758 Operands.push_back( 2759 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2760 AMDGPUOperand::ImmTyNone, true)); 2761 2762 return MatchOperand_Success; 2763 2764 } else { 2765 int64_t IntVal; 2766 const MCExpr *Expr; 2767 SMLoc S = getLoc(); 2768 2769 if (HasSP3AbsModifier) { 2770 // This is a workaround for handling expressions 2771 // as arguments of SP3 'abs' modifier, for example: 2772 // |1.0| 2773 // |-1| 2774 // |1+x| 2775 // This syntax is not compatible with syntax of standard 2776 // MC expressions (due to the trailing '|'). 2777 SMLoc EndLoc; 2778 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2779 return MatchOperand_ParseFail; 2780 } else { 2781 if (Parser.parseExpression(Expr)) 2782 return MatchOperand_ParseFail; 2783 } 2784 2785 if (Expr->evaluateAsAbsolute(IntVal)) { 2786 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2787 } else { 2788 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2789 } 2790 2791 return MatchOperand_Success; 2792 } 2793 2794 return MatchOperand_NoMatch; 2795 } 2796 2797 OperandMatchResultTy 2798 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2799 if (!isRegister()) 2800 return MatchOperand_NoMatch; 2801 2802 if (auto R = parseRegister()) { 2803 assert(R->isReg()); 2804 Operands.push_back(std::move(R)); 2805 return MatchOperand_Success; 2806 } 2807 return MatchOperand_ParseFail; 2808 } 2809 2810 OperandMatchResultTy 2811 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2812 auto res = parseReg(Operands); 2813 if (res != MatchOperand_NoMatch) { 2814 return res; 2815 } else if (isModifier()) { 2816 return MatchOperand_NoMatch; 2817 } else { 2818 return parseImm(Operands, HasSP3AbsMod); 2819 } 2820 } 2821 2822 bool 2823 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2824 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2825 const auto &str = Token.getString(); 2826 return str == "abs" || str == "neg" || str == "sext"; 2827 } 2828 return false; 2829 } 2830 2831 bool 2832 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2833 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2834 } 2835 2836 bool 2837 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2838 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2839 } 2840 2841 bool 2842 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2843 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2844 } 2845 2846 // Check if this is an operand modifier or an opcode modifier 2847 // which may look like an expression but it is not. We should 2848 // avoid parsing these modifiers as expressions. Currently 2849 // recognized sequences are: 2850 // |...| 2851 // abs(...) 2852 // neg(...) 2853 // sext(...) 2854 // -reg 2855 // -|...| 2856 // -abs(...) 2857 // name:... 2858 // Note that simple opcode modifiers like 'gds' may be parsed as 2859 // expressions; this is a special case. See getExpressionAsToken. 2860 // 2861 bool 2862 AMDGPUAsmParser::isModifier() { 2863 2864 AsmToken Tok = getToken(); 2865 AsmToken NextToken[2]; 2866 peekTokens(NextToken); 2867 2868 return isOperandModifier(Tok, NextToken[0]) || 2869 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2870 isOpcodeModifierWithVal(Tok, NextToken[0]); 2871 } 2872 2873 // Check if the current token is an SP3 'neg' modifier. 2874 // Currently this modifier is allowed in the following context: 2875 // 2876 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2877 // 2. Before an 'abs' modifier: -abs(...) 2878 // 3. Before an SP3 'abs' modifier: -|...| 2879 // 2880 // In all other cases "-" is handled as a part 2881 // of an expression that follows the sign. 2882 // 2883 // Note: When "-" is followed by an integer literal, 2884 // this is interpreted as integer negation rather 2885 // than a floating-point NEG modifier applied to N. 2886 // Beside being contr-intuitive, such use of floating-point 2887 // NEG modifier would have resulted in different meaning 2888 // of integer literals used with VOP1/2/C and VOP3, 2889 // for example: 2890 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2891 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2892 // Negative fp literals with preceding "-" are 2893 // handled likewise for unifomtity 2894 // 2895 bool 2896 AMDGPUAsmParser::parseSP3NegModifier() { 2897 2898 AsmToken NextToken[2]; 2899 peekTokens(NextToken); 2900 2901 if (isToken(AsmToken::Minus) && 2902 (isRegister(NextToken[0], NextToken[1]) || 2903 NextToken[0].is(AsmToken::Pipe) || 2904 isId(NextToken[0], "abs"))) { 2905 lex(); 2906 return true; 2907 } 2908 2909 return false; 2910 } 2911 2912 OperandMatchResultTy 2913 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2914 bool AllowImm) { 2915 bool Neg, SP3Neg; 2916 bool Abs, SP3Abs; 2917 SMLoc Loc; 2918 2919 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2920 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2921 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2922 return MatchOperand_ParseFail; 2923 } 2924 2925 SP3Neg = parseSP3NegModifier(); 2926 2927 Loc = getLoc(); 2928 Neg = trySkipId("neg"); 2929 if (Neg && SP3Neg) { 2930 Error(Loc, "expected register or immediate"); 2931 return MatchOperand_ParseFail; 2932 } 2933 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2934 return MatchOperand_ParseFail; 2935 2936 Abs = trySkipId("abs"); 2937 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2938 return MatchOperand_ParseFail; 2939 2940 Loc = getLoc(); 2941 SP3Abs = trySkipToken(AsmToken::Pipe); 2942 if (Abs && SP3Abs) { 2943 Error(Loc, "expected register or immediate"); 2944 return MatchOperand_ParseFail; 2945 } 2946 2947 OperandMatchResultTy Res; 2948 if (AllowImm) { 2949 Res = parseRegOrImm(Operands, SP3Abs); 2950 } else { 2951 Res = parseReg(Operands); 2952 } 2953 if (Res != MatchOperand_Success) { 2954 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2955 } 2956 2957 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2958 return MatchOperand_ParseFail; 2959 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2960 return MatchOperand_ParseFail; 2961 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2962 return MatchOperand_ParseFail; 2963 2964 AMDGPUOperand::Modifiers Mods; 2965 Mods.Abs = Abs || SP3Abs; 2966 Mods.Neg = Neg || SP3Neg; 2967 2968 if (Mods.hasFPModifiers()) { 2969 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2970 if (Op.isExpr()) { 2971 Error(Op.getStartLoc(), "expected an absolute expression"); 2972 return MatchOperand_ParseFail; 2973 } 2974 Op.setModifiers(Mods); 2975 } 2976 return MatchOperand_Success; 2977 } 2978 2979 OperandMatchResultTy 2980 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2981 bool AllowImm) { 2982 bool Sext = trySkipId("sext"); 2983 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2984 return MatchOperand_ParseFail; 2985 2986 OperandMatchResultTy Res; 2987 if (AllowImm) { 2988 Res = parseRegOrImm(Operands); 2989 } else { 2990 Res = parseReg(Operands); 2991 } 2992 if (Res != MatchOperand_Success) { 2993 return Sext? MatchOperand_ParseFail : Res; 2994 } 2995 2996 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2997 return MatchOperand_ParseFail; 2998 2999 AMDGPUOperand::Modifiers Mods; 3000 Mods.Sext = Sext; 3001 3002 if (Mods.hasIntModifiers()) { 3003 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3004 if (Op.isExpr()) { 3005 Error(Op.getStartLoc(), "expected an absolute expression"); 3006 return MatchOperand_ParseFail; 3007 } 3008 Op.setModifiers(Mods); 3009 } 3010 3011 return MatchOperand_Success; 3012 } 3013 3014 OperandMatchResultTy 3015 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3016 return parseRegOrImmWithFPInputMods(Operands, false); 3017 } 3018 3019 OperandMatchResultTy 3020 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3021 return parseRegOrImmWithIntInputMods(Operands, false); 3022 } 3023 3024 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3025 auto Loc = getLoc(); 3026 if (trySkipId("off")) { 3027 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3028 AMDGPUOperand::ImmTyOff, false)); 3029 return MatchOperand_Success; 3030 } 3031 3032 if (!isRegister()) 3033 return MatchOperand_NoMatch; 3034 3035 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3036 if (Reg) { 3037 Operands.push_back(std::move(Reg)); 3038 return MatchOperand_Success; 3039 } 3040 3041 return MatchOperand_ParseFail; 3042 3043 } 3044 3045 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3046 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3047 3048 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3049 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3050 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3051 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3052 return Match_InvalidOperand; 3053 3054 if ((TSFlags & SIInstrFlags::VOP3) && 3055 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3056 getForcedEncodingSize() != 64) 3057 return Match_PreferE32; 3058 3059 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3060 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3061 // v_mac_f32/16 allow only dst_sel == DWORD; 3062 auto OpNum = 3063 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3064 const auto &Op = Inst.getOperand(OpNum); 3065 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3066 return Match_InvalidOperand; 3067 } 3068 } 3069 3070 return Match_Success; 3071 } 3072 3073 static ArrayRef<unsigned> getAllVariants() { 3074 static const unsigned Variants[] = { 3075 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3076 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3077 }; 3078 3079 return makeArrayRef(Variants); 3080 } 3081 3082 // What asm variants we should check 3083 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3084 if (getForcedEncodingSize() == 32) { 3085 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3086 return makeArrayRef(Variants); 3087 } 3088 3089 if (isForcedVOP3()) { 3090 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3091 return makeArrayRef(Variants); 3092 } 3093 3094 if (isForcedSDWA()) { 3095 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3096 AMDGPUAsmVariants::SDWA9}; 3097 return makeArrayRef(Variants); 3098 } 3099 3100 if (isForcedDPP()) { 3101 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3102 return makeArrayRef(Variants); 3103 } 3104 3105 return getAllVariants(); 3106 } 3107 3108 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3109 if (getForcedEncodingSize() == 32) 3110 return "e32"; 3111 3112 if (isForcedVOP3()) 3113 return "e64"; 3114 3115 if (isForcedSDWA()) 3116 return "sdwa"; 3117 3118 if (isForcedDPP()) 3119 return "dpp"; 3120 3121 return ""; 3122 } 3123 3124 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3125 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3126 const unsigned Num = Desc.getNumImplicitUses(); 3127 for (unsigned i = 0; i < Num; ++i) { 3128 unsigned Reg = Desc.ImplicitUses[i]; 3129 switch (Reg) { 3130 case AMDGPU::FLAT_SCR: 3131 case AMDGPU::VCC: 3132 case AMDGPU::VCC_LO: 3133 case AMDGPU::VCC_HI: 3134 case AMDGPU::M0: 3135 return Reg; 3136 default: 3137 break; 3138 } 3139 } 3140 return AMDGPU::NoRegister; 3141 } 3142 3143 // NB: This code is correct only when used to check constant 3144 // bus limitations because GFX7 support no f16 inline constants. 3145 // Note that there are no cases when a GFX7 opcode violates 3146 // constant bus limitations due to the use of an f16 constant. 3147 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3148 unsigned OpIdx) const { 3149 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3150 3151 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3152 return false; 3153 } 3154 3155 const MCOperand &MO = Inst.getOperand(OpIdx); 3156 3157 int64_t Val = MO.getImm(); 3158 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3159 3160 switch (OpSize) { // expected operand size 3161 case 8: 3162 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3163 case 4: 3164 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3165 case 2: { 3166 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3167 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3168 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3169 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3170 return AMDGPU::isInlinableIntLiteral(Val); 3171 3172 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3173 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3174 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3175 return AMDGPU::isInlinableIntLiteralV216(Val); 3176 3177 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3178 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3179 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3180 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3181 3182 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3183 } 3184 default: 3185 llvm_unreachable("invalid operand size"); 3186 } 3187 } 3188 3189 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3190 if (!isGFX10Plus()) 3191 return 1; 3192 3193 switch (Opcode) { 3194 // 64-bit shift instructions can use only one scalar value input 3195 case AMDGPU::V_LSHLREV_B64_e64: 3196 case AMDGPU::V_LSHLREV_B64_gfx10: 3197 case AMDGPU::V_LSHRREV_B64_e64: 3198 case AMDGPU::V_LSHRREV_B64_gfx10: 3199 case AMDGPU::V_ASHRREV_I64_e64: 3200 case AMDGPU::V_ASHRREV_I64_gfx10: 3201 case AMDGPU::V_LSHL_B64_e64: 3202 case AMDGPU::V_LSHR_B64_e64: 3203 case AMDGPU::V_ASHR_I64_e64: 3204 return 1; 3205 default: 3206 return 2; 3207 } 3208 } 3209 3210 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3211 const MCOperand &MO = Inst.getOperand(OpIdx); 3212 if (MO.isImm()) { 3213 return !isInlineConstant(Inst, OpIdx); 3214 } else if (MO.isReg()) { 3215 auto Reg = MO.getReg(); 3216 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3217 auto PReg = mc2PseudoReg(Reg); 3218 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3219 } else { 3220 return true; 3221 } 3222 } 3223 3224 bool 3225 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3226 const OperandVector &Operands) { 3227 const unsigned Opcode = Inst.getOpcode(); 3228 const MCInstrDesc &Desc = MII.get(Opcode); 3229 unsigned LastSGPR = AMDGPU::NoRegister; 3230 unsigned ConstantBusUseCount = 0; 3231 unsigned NumLiterals = 0; 3232 unsigned LiteralSize; 3233 3234 if (Desc.TSFlags & 3235 (SIInstrFlags::VOPC | 3236 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3237 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3238 SIInstrFlags::SDWA)) { 3239 // Check special imm operands (used by madmk, etc) 3240 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3241 ++ConstantBusUseCount; 3242 } 3243 3244 SmallDenseSet<unsigned> SGPRsUsed; 3245 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3246 if (SGPRUsed != AMDGPU::NoRegister) { 3247 SGPRsUsed.insert(SGPRUsed); 3248 ++ConstantBusUseCount; 3249 } 3250 3251 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3252 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3253 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3254 3255 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3256 3257 for (int OpIdx : OpIndices) { 3258 if (OpIdx == -1) break; 3259 3260 const MCOperand &MO = Inst.getOperand(OpIdx); 3261 if (usesConstantBus(Inst, OpIdx)) { 3262 if (MO.isReg()) { 3263 LastSGPR = mc2PseudoReg(MO.getReg()); 3264 // Pairs of registers with a partial intersections like these 3265 // s0, s[0:1] 3266 // flat_scratch_lo, flat_scratch 3267 // flat_scratch_lo, flat_scratch_hi 3268 // are theoretically valid but they are disabled anyway. 3269 // Note that this code mimics SIInstrInfo::verifyInstruction 3270 if (!SGPRsUsed.count(LastSGPR)) { 3271 SGPRsUsed.insert(LastSGPR); 3272 ++ConstantBusUseCount; 3273 } 3274 } else { // Expression or a literal 3275 3276 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3277 continue; // special operand like VINTERP attr_chan 3278 3279 // An instruction may use only one literal. 3280 // This has been validated on the previous step. 3281 // See validateVOP3Literal. 3282 // This literal may be used as more than one operand. 3283 // If all these operands are of the same size, 3284 // this literal counts as one scalar value. 3285 // Otherwise it counts as 2 scalar values. 3286 // See "GFX10 Shader Programming", section 3.6.2.3. 3287 3288 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3289 if (Size < 4) Size = 4; 3290 3291 if (NumLiterals == 0) { 3292 NumLiterals = 1; 3293 LiteralSize = Size; 3294 } else if (LiteralSize != Size) { 3295 NumLiterals = 2; 3296 } 3297 } 3298 } 3299 } 3300 } 3301 ConstantBusUseCount += NumLiterals; 3302 3303 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3304 return true; 3305 3306 SMLoc LitLoc = getLitLoc(Operands); 3307 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3308 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3309 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3310 return false; 3311 } 3312 3313 bool 3314 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3315 const OperandVector &Operands) { 3316 const unsigned Opcode = Inst.getOpcode(); 3317 const MCInstrDesc &Desc = MII.get(Opcode); 3318 3319 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3320 if (DstIdx == -1 || 3321 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3322 return true; 3323 } 3324 3325 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3326 3327 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3328 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3329 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3330 3331 assert(DstIdx != -1); 3332 const MCOperand &Dst = Inst.getOperand(DstIdx); 3333 assert(Dst.isReg()); 3334 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3335 3336 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3337 3338 for (int SrcIdx : SrcIndices) { 3339 if (SrcIdx == -1) break; 3340 const MCOperand &Src = Inst.getOperand(SrcIdx); 3341 if (Src.isReg()) { 3342 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3343 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3344 Error(getRegLoc(SrcReg, Operands), 3345 "destination must be different than all sources"); 3346 return false; 3347 } 3348 } 3349 } 3350 3351 return true; 3352 } 3353 3354 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3355 3356 const unsigned Opc = Inst.getOpcode(); 3357 const MCInstrDesc &Desc = MII.get(Opc); 3358 3359 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3360 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3361 assert(ClampIdx != -1); 3362 return Inst.getOperand(ClampIdx).getImm() == 0; 3363 } 3364 3365 return true; 3366 } 3367 3368 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3369 3370 const unsigned Opc = Inst.getOpcode(); 3371 const MCInstrDesc &Desc = MII.get(Opc); 3372 3373 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3374 return true; 3375 3376 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3377 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3378 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3379 3380 assert(VDataIdx != -1); 3381 3382 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3383 return true; 3384 3385 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3386 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3387 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3388 if (DMask == 0) 3389 DMask = 1; 3390 3391 unsigned DataSize = 3392 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3393 if (hasPackedD16()) { 3394 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3395 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3396 DataSize = (DataSize + 1) / 2; 3397 } 3398 3399 return (VDataSize / 4) == DataSize + TFESize; 3400 } 3401 3402 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3403 const unsigned Opc = Inst.getOpcode(); 3404 const MCInstrDesc &Desc = MII.get(Opc); 3405 3406 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3407 return true; 3408 3409 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3410 3411 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3412 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3413 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3414 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3415 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3416 3417 assert(VAddr0Idx != -1); 3418 assert(SrsrcIdx != -1); 3419 assert(SrsrcIdx > VAddr0Idx); 3420 3421 if (DimIdx == -1) 3422 return true; // intersect_ray 3423 3424 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3425 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3426 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3427 unsigned VAddrSize = 3428 IsNSA ? SrsrcIdx - VAddr0Idx 3429 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3430 3431 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3432 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3433 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3434 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3435 if (!IsNSA) { 3436 if (AddrSize > 8) 3437 AddrSize = 16; 3438 else if (AddrSize > 4) 3439 AddrSize = 8; 3440 } 3441 3442 return VAddrSize == AddrSize; 3443 } 3444 3445 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3446 3447 const unsigned Opc = Inst.getOpcode(); 3448 const MCInstrDesc &Desc = MII.get(Opc); 3449 3450 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3451 return true; 3452 if (!Desc.mayLoad() || !Desc.mayStore()) 3453 return true; // Not atomic 3454 3455 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3456 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3457 3458 // This is an incomplete check because image_atomic_cmpswap 3459 // may only use 0x3 and 0xf while other atomic operations 3460 // may use 0x1 and 0x3. However these limitations are 3461 // verified when we check that dmask matches dst size. 3462 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3463 } 3464 3465 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3466 3467 const unsigned Opc = Inst.getOpcode(); 3468 const MCInstrDesc &Desc = MII.get(Opc); 3469 3470 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3471 return true; 3472 3473 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3474 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3475 3476 // GATHER4 instructions use dmask in a different fashion compared to 3477 // other MIMG instructions. The only useful DMASK values are 3478 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3479 // (red,red,red,red) etc.) The ISA document doesn't mention 3480 // this. 3481 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3482 } 3483 3484 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3485 const unsigned Opc = Inst.getOpcode(); 3486 const MCInstrDesc &Desc = MII.get(Opc); 3487 3488 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3489 return true; 3490 3491 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3492 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3493 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3494 3495 if (!BaseOpcode->MSAA) 3496 return true; 3497 3498 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3499 assert(DimIdx != -1); 3500 3501 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3502 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3503 3504 return DimInfo->MSAA; 3505 } 3506 3507 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3508 { 3509 switch (Opcode) { 3510 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3511 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3512 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3513 return true; 3514 default: 3515 return false; 3516 } 3517 } 3518 3519 // movrels* opcodes should only allow VGPRS as src0. 3520 // This is specified in .td description for vop1/vop3, 3521 // but sdwa is handled differently. See isSDWAOperand. 3522 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3523 const OperandVector &Operands) { 3524 3525 const unsigned Opc = Inst.getOpcode(); 3526 const MCInstrDesc &Desc = MII.get(Opc); 3527 3528 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3529 return true; 3530 3531 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3532 assert(Src0Idx != -1); 3533 3534 SMLoc ErrLoc; 3535 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3536 if (Src0.isReg()) { 3537 auto Reg = mc2PseudoReg(Src0.getReg()); 3538 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3539 if (!isSGPR(Reg, TRI)) 3540 return true; 3541 ErrLoc = getRegLoc(Reg, Operands); 3542 } else { 3543 ErrLoc = getConstLoc(Operands); 3544 } 3545 3546 Error(ErrLoc, "source operand must be a VGPR"); 3547 return false; 3548 } 3549 3550 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3551 const OperandVector &Operands) { 3552 3553 const unsigned Opc = Inst.getOpcode(); 3554 3555 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3556 return true; 3557 3558 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3559 assert(Src0Idx != -1); 3560 3561 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3562 if (!Src0.isReg()) 3563 return true; 3564 3565 auto Reg = mc2PseudoReg(Src0.getReg()); 3566 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3567 if (isSGPR(Reg, TRI)) { 3568 Error(getRegLoc(Reg, Operands), 3569 "source operand must be either a VGPR or an inline constant"); 3570 return false; 3571 } 3572 3573 return true; 3574 } 3575 3576 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3577 switch (Inst.getOpcode()) { 3578 default: 3579 return true; 3580 case V_DIV_SCALE_F32_gfx6_gfx7: 3581 case V_DIV_SCALE_F32_vi: 3582 case V_DIV_SCALE_F32_gfx10: 3583 case V_DIV_SCALE_F64_gfx6_gfx7: 3584 case V_DIV_SCALE_F64_vi: 3585 case V_DIV_SCALE_F64_gfx10: 3586 break; 3587 } 3588 3589 // TODO: Check that src0 = src1 or src2. 3590 3591 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3592 AMDGPU::OpName::src2_modifiers, 3593 AMDGPU::OpName::src2_modifiers}) { 3594 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3595 .getImm() & 3596 SISrcMods::ABS) { 3597 return false; 3598 } 3599 } 3600 3601 return true; 3602 } 3603 3604 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3605 3606 const unsigned Opc = Inst.getOpcode(); 3607 const MCInstrDesc &Desc = MII.get(Opc); 3608 3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3610 return true; 3611 3612 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3613 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3614 if (isCI() || isSI()) 3615 return false; 3616 } 3617 3618 return true; 3619 } 3620 3621 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3626 return true; 3627 3628 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3629 if (DimIdx < 0) 3630 return true; 3631 3632 long Imm = Inst.getOperand(DimIdx).getImm(); 3633 if (Imm < 0 || Imm >= 8) 3634 return false; 3635 3636 return true; 3637 } 3638 3639 static bool IsRevOpcode(const unsigned Opcode) 3640 { 3641 switch (Opcode) { 3642 case AMDGPU::V_SUBREV_F32_e32: 3643 case AMDGPU::V_SUBREV_F32_e64: 3644 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3645 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3646 case AMDGPU::V_SUBREV_F32_e32_vi: 3647 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3648 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3649 case AMDGPU::V_SUBREV_F32_e64_vi: 3650 3651 case AMDGPU::V_SUBREV_CO_U32_e32: 3652 case AMDGPU::V_SUBREV_CO_U32_e64: 3653 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3654 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3655 3656 case AMDGPU::V_SUBBREV_U32_e32: 3657 case AMDGPU::V_SUBBREV_U32_e64: 3658 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3659 case AMDGPU::V_SUBBREV_U32_e32_vi: 3660 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3661 case AMDGPU::V_SUBBREV_U32_e64_vi: 3662 3663 case AMDGPU::V_SUBREV_U32_e32: 3664 case AMDGPU::V_SUBREV_U32_e64: 3665 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3666 case AMDGPU::V_SUBREV_U32_e32_vi: 3667 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3668 case AMDGPU::V_SUBREV_U32_e64_vi: 3669 3670 case AMDGPU::V_SUBREV_F16_e32: 3671 case AMDGPU::V_SUBREV_F16_e64: 3672 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3673 case AMDGPU::V_SUBREV_F16_e32_vi: 3674 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3675 case AMDGPU::V_SUBREV_F16_e64_vi: 3676 3677 case AMDGPU::V_SUBREV_U16_e32: 3678 case AMDGPU::V_SUBREV_U16_e64: 3679 case AMDGPU::V_SUBREV_U16_e32_vi: 3680 case AMDGPU::V_SUBREV_U16_e64_vi: 3681 3682 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3683 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3684 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3685 3686 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3687 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3688 3689 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3690 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3691 3692 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3693 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3694 3695 case AMDGPU::V_LSHRREV_B32_e32: 3696 case AMDGPU::V_LSHRREV_B32_e64: 3697 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3698 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3699 case AMDGPU::V_LSHRREV_B32_e32_vi: 3700 case AMDGPU::V_LSHRREV_B32_e64_vi: 3701 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3702 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3703 3704 case AMDGPU::V_ASHRREV_I32_e32: 3705 case AMDGPU::V_ASHRREV_I32_e64: 3706 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3707 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3708 case AMDGPU::V_ASHRREV_I32_e32_vi: 3709 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3710 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3711 case AMDGPU::V_ASHRREV_I32_e64_vi: 3712 3713 case AMDGPU::V_LSHLREV_B32_e32: 3714 case AMDGPU::V_LSHLREV_B32_e64: 3715 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3716 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3717 case AMDGPU::V_LSHLREV_B32_e32_vi: 3718 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3719 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3720 case AMDGPU::V_LSHLREV_B32_e64_vi: 3721 3722 case AMDGPU::V_LSHLREV_B16_e32: 3723 case AMDGPU::V_LSHLREV_B16_e64: 3724 case AMDGPU::V_LSHLREV_B16_e32_vi: 3725 case AMDGPU::V_LSHLREV_B16_e64_vi: 3726 case AMDGPU::V_LSHLREV_B16_gfx10: 3727 3728 case AMDGPU::V_LSHRREV_B16_e32: 3729 case AMDGPU::V_LSHRREV_B16_e64: 3730 case AMDGPU::V_LSHRREV_B16_e32_vi: 3731 case AMDGPU::V_LSHRREV_B16_e64_vi: 3732 case AMDGPU::V_LSHRREV_B16_gfx10: 3733 3734 case AMDGPU::V_ASHRREV_I16_e32: 3735 case AMDGPU::V_ASHRREV_I16_e64: 3736 case AMDGPU::V_ASHRREV_I16_e32_vi: 3737 case AMDGPU::V_ASHRREV_I16_e64_vi: 3738 case AMDGPU::V_ASHRREV_I16_gfx10: 3739 3740 case AMDGPU::V_LSHLREV_B64_e64: 3741 case AMDGPU::V_LSHLREV_B64_gfx10: 3742 case AMDGPU::V_LSHLREV_B64_vi: 3743 3744 case AMDGPU::V_LSHRREV_B64_e64: 3745 case AMDGPU::V_LSHRREV_B64_gfx10: 3746 case AMDGPU::V_LSHRREV_B64_vi: 3747 3748 case AMDGPU::V_ASHRREV_I64_e64: 3749 case AMDGPU::V_ASHRREV_I64_gfx10: 3750 case AMDGPU::V_ASHRREV_I64_vi: 3751 3752 case AMDGPU::V_PK_LSHLREV_B16: 3753 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3754 case AMDGPU::V_PK_LSHLREV_B16_vi: 3755 3756 case AMDGPU::V_PK_LSHRREV_B16: 3757 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3758 case AMDGPU::V_PK_LSHRREV_B16_vi: 3759 case AMDGPU::V_PK_ASHRREV_I16: 3760 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3761 case AMDGPU::V_PK_ASHRREV_I16_vi: 3762 return true; 3763 default: 3764 return false; 3765 } 3766 } 3767 3768 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3769 3770 using namespace SIInstrFlags; 3771 const unsigned Opcode = Inst.getOpcode(); 3772 const MCInstrDesc &Desc = MII.get(Opcode); 3773 3774 // lds_direct register is defined so that it can be used 3775 // with 9-bit operands only. Ignore encodings which do not accept these. 3776 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3777 if ((Desc.TSFlags & Enc) == 0) 3778 return None; 3779 3780 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3781 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3782 if (SrcIdx == -1) 3783 break; 3784 const auto &Src = Inst.getOperand(SrcIdx); 3785 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3786 3787 if (isGFX90A()) 3788 return StringRef("lds_direct is not supported on this GPU"); 3789 3790 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3791 return StringRef("lds_direct cannot be used with this instruction"); 3792 3793 if (SrcName != OpName::src0) 3794 return StringRef("lds_direct may be used as src0 only"); 3795 } 3796 } 3797 3798 return None; 3799 } 3800 3801 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3802 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3803 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3804 if (Op.isFlatOffset()) 3805 return Op.getStartLoc(); 3806 } 3807 return getLoc(); 3808 } 3809 3810 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3811 const OperandVector &Operands) { 3812 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3813 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3814 return true; 3815 3816 auto Opcode = Inst.getOpcode(); 3817 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3818 assert(OpNum != -1); 3819 3820 const auto &Op = Inst.getOperand(OpNum); 3821 if (!hasFlatOffsets() && Op.getImm() != 0) { 3822 Error(getFlatOffsetLoc(Operands), 3823 "flat offset modifier is not supported on this GPU"); 3824 return false; 3825 } 3826 3827 // For FLAT segment the offset must be positive; 3828 // MSB is ignored and forced to zero. 3829 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3830 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3831 if (!isIntN(OffsetSize, Op.getImm())) { 3832 Error(getFlatOffsetLoc(Operands), 3833 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3834 return false; 3835 } 3836 } else { 3837 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3838 if (!isUIntN(OffsetSize, Op.getImm())) { 3839 Error(getFlatOffsetLoc(Operands), 3840 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3841 return false; 3842 } 3843 } 3844 3845 return true; 3846 } 3847 3848 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3849 // Start with second operand because SMEM Offset cannot be dst or src0. 3850 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3851 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3852 if (Op.isSMEMOffset()) 3853 return Op.getStartLoc(); 3854 } 3855 return getLoc(); 3856 } 3857 3858 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3859 const OperandVector &Operands) { 3860 if (isCI() || isSI()) 3861 return true; 3862 3863 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3864 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3865 return true; 3866 3867 auto Opcode = Inst.getOpcode(); 3868 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3869 if (OpNum == -1) 3870 return true; 3871 3872 const auto &Op = Inst.getOperand(OpNum); 3873 if (!Op.isImm()) 3874 return true; 3875 3876 uint64_t Offset = Op.getImm(); 3877 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3878 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3879 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3880 return true; 3881 3882 Error(getSMEMOffsetLoc(Operands), 3883 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3884 "expected a 21-bit signed offset"); 3885 3886 return false; 3887 } 3888 3889 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3890 unsigned Opcode = Inst.getOpcode(); 3891 const MCInstrDesc &Desc = MII.get(Opcode); 3892 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3893 return true; 3894 3895 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3896 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3897 3898 const int OpIndices[] = { Src0Idx, Src1Idx }; 3899 3900 unsigned NumExprs = 0; 3901 unsigned NumLiterals = 0; 3902 uint32_t LiteralValue; 3903 3904 for (int OpIdx : OpIndices) { 3905 if (OpIdx == -1) break; 3906 3907 const MCOperand &MO = Inst.getOperand(OpIdx); 3908 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3909 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3910 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3911 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3912 if (NumLiterals == 0 || LiteralValue != Value) { 3913 LiteralValue = Value; 3914 ++NumLiterals; 3915 } 3916 } else if (MO.isExpr()) { 3917 ++NumExprs; 3918 } 3919 } 3920 } 3921 3922 return NumLiterals + NumExprs <= 1; 3923 } 3924 3925 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3926 const unsigned Opc = Inst.getOpcode(); 3927 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3928 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3929 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3930 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3931 3932 if (OpSel & ~3) 3933 return false; 3934 } 3935 return true; 3936 } 3937 3938 // Check if VCC register matches wavefront size 3939 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3940 auto FB = getFeatureBits(); 3941 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3942 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3943 } 3944 3945 // VOP3 literal is only allowed in GFX10+ and only one can be used 3946 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3947 const OperandVector &Operands) { 3948 unsigned Opcode = Inst.getOpcode(); 3949 const MCInstrDesc &Desc = MII.get(Opcode); 3950 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3951 return true; 3952 3953 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3954 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3955 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3956 3957 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3958 3959 unsigned NumExprs = 0; 3960 unsigned NumLiterals = 0; 3961 uint32_t LiteralValue; 3962 3963 for (int OpIdx : OpIndices) { 3964 if (OpIdx == -1) break; 3965 3966 const MCOperand &MO = Inst.getOperand(OpIdx); 3967 if (!MO.isImm() && !MO.isExpr()) 3968 continue; 3969 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3970 continue; 3971 3972 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3973 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3974 Error(getConstLoc(Operands), 3975 "inline constants are not allowed for this operand"); 3976 return false; 3977 } 3978 3979 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3980 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3981 if (NumLiterals == 0 || LiteralValue != Value) { 3982 LiteralValue = Value; 3983 ++NumLiterals; 3984 } 3985 } else if (MO.isExpr()) { 3986 ++NumExprs; 3987 } 3988 } 3989 NumLiterals += NumExprs; 3990 3991 if (!NumLiterals) 3992 return true; 3993 3994 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3995 Error(getLitLoc(Operands), "literal operands are not supported"); 3996 return false; 3997 } 3998 3999 if (NumLiterals > 1) { 4000 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4001 return false; 4002 } 4003 4004 return true; 4005 } 4006 4007 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4008 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4009 const MCRegisterInfo *MRI) { 4010 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4011 if (OpIdx < 0) 4012 return -1; 4013 4014 const MCOperand &Op = Inst.getOperand(OpIdx); 4015 if (!Op.isReg()) 4016 return -1; 4017 4018 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4019 auto Reg = Sub ? Sub : Op.getReg(); 4020 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4021 return AGRP32.contains(Reg) ? 1 : 0; 4022 } 4023 4024 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4025 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4026 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4027 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4028 SIInstrFlags::DS)) == 0) 4029 return true; 4030 4031 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4032 : AMDGPU::OpName::vdata; 4033 4034 const MCRegisterInfo *MRI = getMRI(); 4035 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4036 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4037 4038 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4039 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4040 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4041 return false; 4042 } 4043 4044 auto FB = getFeatureBits(); 4045 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4046 if (DataAreg < 0 || DstAreg < 0) 4047 return true; 4048 return DstAreg == DataAreg; 4049 } 4050 4051 return DstAreg < 1 && DataAreg < 1; 4052 } 4053 4054 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4055 auto FB = getFeatureBits(); 4056 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4057 return true; 4058 4059 const MCRegisterInfo *MRI = getMRI(); 4060 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4061 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4062 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4063 const MCOperand &Op = Inst.getOperand(I); 4064 if (!Op.isReg()) 4065 continue; 4066 4067 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4068 if (!Sub) 4069 continue; 4070 4071 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4072 return false; 4073 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4074 return false; 4075 } 4076 4077 return true; 4078 } 4079 4080 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4081 const OperandVector &Operands, 4082 const SMLoc &IDLoc) { 4083 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4084 AMDGPU::OpName::cpol); 4085 if (CPolPos == -1) 4086 return true; 4087 4088 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4089 4090 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4091 if ((TSFlags & (SIInstrFlags::SMRD)) && 4092 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4093 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4094 return false; 4095 } 4096 4097 if (isGFX90A() && (CPol & CPol::SCC)) { 4098 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4099 StringRef CStr(S.getPointer()); 4100 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4101 Error(S, "scc is not supported on this GPU"); 4102 return false; 4103 } 4104 4105 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4106 return true; 4107 4108 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4109 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4110 Error(IDLoc, "instruction must use glc"); 4111 return false; 4112 } 4113 } else { 4114 if (CPol & CPol::GLC) { 4115 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4116 StringRef CStr(S.getPointer()); 4117 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4118 Error(S, "instruction must not use glc"); 4119 return false; 4120 } 4121 } 4122 4123 return true; 4124 } 4125 4126 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4127 const SMLoc &IDLoc, 4128 const OperandVector &Operands) { 4129 if (auto ErrMsg = validateLdsDirect(Inst)) { 4130 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4131 return false; 4132 } 4133 if (!validateSOPLiteral(Inst)) { 4134 Error(getLitLoc(Operands), 4135 "only one literal operand is allowed"); 4136 return false; 4137 } 4138 if (!validateVOP3Literal(Inst, Operands)) { 4139 return false; 4140 } 4141 if (!validateConstantBusLimitations(Inst, Operands)) { 4142 return false; 4143 } 4144 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4145 return false; 4146 } 4147 if (!validateIntClampSupported(Inst)) { 4148 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4149 "integer clamping is not supported on this GPU"); 4150 return false; 4151 } 4152 if (!validateOpSel(Inst)) { 4153 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4154 "invalid op_sel operand"); 4155 return false; 4156 } 4157 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4158 if (!validateMIMGD16(Inst)) { 4159 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4160 "d16 modifier is not supported on this GPU"); 4161 return false; 4162 } 4163 if (!validateMIMGDim(Inst)) { 4164 Error(IDLoc, "dim modifier is required on this GPU"); 4165 return false; 4166 } 4167 if (!validateMIMGMSAA(Inst)) { 4168 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4169 "invalid dim; must be MSAA type"); 4170 return false; 4171 } 4172 if (!validateMIMGDataSize(Inst)) { 4173 Error(IDLoc, 4174 "image data size does not match dmask and tfe"); 4175 return false; 4176 } 4177 if (!validateMIMGAddrSize(Inst)) { 4178 Error(IDLoc, 4179 "image address size does not match dim and a16"); 4180 return false; 4181 } 4182 if (!validateMIMGAtomicDMask(Inst)) { 4183 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4184 "invalid atomic image dmask"); 4185 return false; 4186 } 4187 if (!validateMIMGGatherDMask(Inst)) { 4188 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4189 "invalid image_gather dmask: only one bit must be set"); 4190 return false; 4191 } 4192 if (!validateMovrels(Inst, Operands)) { 4193 return false; 4194 } 4195 if (!validateFlatOffset(Inst, Operands)) { 4196 return false; 4197 } 4198 if (!validateSMEMOffset(Inst, Operands)) { 4199 return false; 4200 } 4201 if (!validateMAIAccWrite(Inst, Operands)) { 4202 return false; 4203 } 4204 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4205 return false; 4206 } 4207 4208 if (!validateAGPRLdSt(Inst)) { 4209 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4210 ? "invalid register class: data and dst should be all VGPR or AGPR" 4211 : "invalid register class: agpr loads and stores not supported on this GPU" 4212 ); 4213 return false; 4214 } 4215 if (!validateVGPRAlign(Inst)) { 4216 Error(IDLoc, 4217 "invalid register class: vgpr tuples must be 64 bit aligned"); 4218 return false; 4219 } 4220 4221 if (!validateDivScale(Inst)) { 4222 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4223 return false; 4224 } 4225 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4226 return false; 4227 } 4228 4229 return true; 4230 } 4231 4232 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4233 const FeatureBitset &FBS, 4234 unsigned VariantID = 0); 4235 4236 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4237 const FeatureBitset &AvailableFeatures, 4238 unsigned VariantID); 4239 4240 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4241 const FeatureBitset &FBS) { 4242 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4243 } 4244 4245 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4246 const FeatureBitset &FBS, 4247 ArrayRef<unsigned> Variants) { 4248 for (auto Variant : Variants) { 4249 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4250 return true; 4251 } 4252 4253 return false; 4254 } 4255 4256 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4257 const SMLoc &IDLoc) { 4258 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4259 4260 // Check if requested instruction variant is supported. 4261 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4262 return false; 4263 4264 // This instruction is not supported. 4265 // Clear any other pending errors because they are no longer relevant. 4266 getParser().clearPendingErrors(); 4267 4268 // Requested instruction variant is not supported. 4269 // Check if any other variants are supported. 4270 StringRef VariantName = getMatchedVariantName(); 4271 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4272 return Error(IDLoc, 4273 Twine(VariantName, 4274 " variant of this instruction is not supported")); 4275 } 4276 4277 // Finally check if this instruction is supported on any other GPU. 4278 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4279 return Error(IDLoc, "instruction not supported on this GPU"); 4280 } 4281 4282 // Instruction not supported on any GPU. Probably a typo. 4283 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4284 return Error(IDLoc, "invalid instruction" + Suggestion); 4285 } 4286 4287 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4288 OperandVector &Operands, 4289 MCStreamer &Out, 4290 uint64_t &ErrorInfo, 4291 bool MatchingInlineAsm) { 4292 MCInst Inst; 4293 unsigned Result = Match_Success; 4294 for (auto Variant : getMatchedVariants()) { 4295 uint64_t EI; 4296 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4297 Variant); 4298 // We order match statuses from least to most specific. We use most specific 4299 // status as resulting 4300 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4301 if ((R == Match_Success) || 4302 (R == Match_PreferE32) || 4303 (R == Match_MissingFeature && Result != Match_PreferE32) || 4304 (R == Match_InvalidOperand && Result != Match_MissingFeature 4305 && Result != Match_PreferE32) || 4306 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4307 && Result != Match_MissingFeature 4308 && Result != Match_PreferE32)) { 4309 Result = R; 4310 ErrorInfo = EI; 4311 } 4312 if (R == Match_Success) 4313 break; 4314 } 4315 4316 if (Result == Match_Success) { 4317 if (!validateInstruction(Inst, IDLoc, Operands)) { 4318 return true; 4319 } 4320 Inst.setLoc(IDLoc); 4321 Out.emitInstruction(Inst, getSTI()); 4322 return false; 4323 } 4324 4325 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4326 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4327 return true; 4328 } 4329 4330 switch (Result) { 4331 default: break; 4332 case Match_MissingFeature: 4333 // It has been verified that the specified instruction 4334 // mnemonic is valid. A match was found but it requires 4335 // features which are not supported on this GPU. 4336 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4337 4338 case Match_InvalidOperand: { 4339 SMLoc ErrorLoc = IDLoc; 4340 if (ErrorInfo != ~0ULL) { 4341 if (ErrorInfo >= Operands.size()) { 4342 return Error(IDLoc, "too few operands for instruction"); 4343 } 4344 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4345 if (ErrorLoc == SMLoc()) 4346 ErrorLoc = IDLoc; 4347 } 4348 return Error(ErrorLoc, "invalid operand for instruction"); 4349 } 4350 4351 case Match_PreferE32: 4352 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4353 "should be encoded as e32"); 4354 case Match_MnemonicFail: 4355 llvm_unreachable("Invalid instructions should have been handled already"); 4356 } 4357 llvm_unreachable("Implement any new match types added!"); 4358 } 4359 4360 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4361 int64_t Tmp = -1; 4362 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4363 return true; 4364 } 4365 if (getParser().parseAbsoluteExpression(Tmp)) { 4366 return true; 4367 } 4368 Ret = static_cast<uint32_t>(Tmp); 4369 return false; 4370 } 4371 4372 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4373 uint32_t &Minor) { 4374 if (ParseAsAbsoluteExpression(Major)) 4375 return TokError("invalid major version"); 4376 4377 if (!trySkipToken(AsmToken::Comma)) 4378 return TokError("minor version number required, comma expected"); 4379 4380 if (ParseAsAbsoluteExpression(Minor)) 4381 return TokError("invalid minor version"); 4382 4383 return false; 4384 } 4385 4386 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4387 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4388 return TokError("directive only supported for amdgcn architecture"); 4389 4390 std::string TargetIDDirective; 4391 SMLoc TargetStart = getTok().getLoc(); 4392 if (getParser().parseEscapedString(TargetIDDirective)) 4393 return true; 4394 4395 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4396 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4397 return getParser().Error(TargetRange.Start, 4398 (Twine(".amdgcn_target directive's target id ") + 4399 Twine(TargetIDDirective) + 4400 Twine(" does not match the specified target id ") + 4401 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4402 4403 return false; 4404 } 4405 4406 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4407 return Error(Range.Start, "value out of range", Range); 4408 } 4409 4410 bool AMDGPUAsmParser::calculateGPRBlocks( 4411 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4412 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4413 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4414 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4415 // TODO(scott.linder): These calculations are duplicated from 4416 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4417 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4418 4419 unsigned NumVGPRs = NextFreeVGPR; 4420 unsigned NumSGPRs = NextFreeSGPR; 4421 4422 if (Version.Major >= 10) 4423 NumSGPRs = 0; 4424 else { 4425 unsigned MaxAddressableNumSGPRs = 4426 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4427 4428 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4429 NumSGPRs > MaxAddressableNumSGPRs) 4430 return OutOfRangeError(SGPRRange); 4431 4432 NumSGPRs += 4433 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4434 4435 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4436 NumSGPRs > MaxAddressableNumSGPRs) 4437 return OutOfRangeError(SGPRRange); 4438 4439 if (Features.test(FeatureSGPRInitBug)) 4440 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4441 } 4442 4443 VGPRBlocks = 4444 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4445 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4446 4447 return false; 4448 } 4449 4450 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4451 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4452 return TokError("directive only supported for amdgcn architecture"); 4453 4454 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4455 return TokError("directive only supported for amdhsa OS"); 4456 4457 StringRef KernelName; 4458 if (getParser().parseIdentifier(KernelName)) 4459 return true; 4460 4461 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4462 4463 StringSet<> Seen; 4464 4465 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4466 4467 SMRange VGPRRange; 4468 uint64_t NextFreeVGPR = 0; 4469 uint64_t AccumOffset = 0; 4470 SMRange SGPRRange; 4471 uint64_t NextFreeSGPR = 0; 4472 unsigned UserSGPRCount = 0; 4473 bool ReserveVCC = true; 4474 bool ReserveFlatScr = true; 4475 Optional<bool> EnableWavefrontSize32; 4476 4477 while (true) { 4478 while (trySkipToken(AsmToken::EndOfStatement)); 4479 4480 StringRef ID; 4481 SMRange IDRange = getTok().getLocRange(); 4482 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4483 return true; 4484 4485 if (ID == ".end_amdhsa_kernel") 4486 break; 4487 4488 if (Seen.find(ID) != Seen.end()) 4489 return TokError(".amdhsa_ directives cannot be repeated"); 4490 Seen.insert(ID); 4491 4492 SMLoc ValStart = getLoc(); 4493 int64_t IVal; 4494 if (getParser().parseAbsoluteExpression(IVal)) 4495 return true; 4496 SMLoc ValEnd = getLoc(); 4497 SMRange ValRange = SMRange(ValStart, ValEnd); 4498 4499 if (IVal < 0) 4500 return OutOfRangeError(ValRange); 4501 4502 uint64_t Val = IVal; 4503 4504 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4505 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4506 return OutOfRangeError(RANGE); \ 4507 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4508 4509 if (ID == ".amdhsa_group_segment_fixed_size") { 4510 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4511 return OutOfRangeError(ValRange); 4512 KD.group_segment_fixed_size = Val; 4513 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4514 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4515 return OutOfRangeError(ValRange); 4516 KD.private_segment_fixed_size = Val; 4517 } else if (ID == ".amdhsa_kernarg_size") { 4518 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4519 return OutOfRangeError(ValRange); 4520 KD.kernarg_size = Val; 4521 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4522 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4523 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4524 Val, ValRange); 4525 if (Val) 4526 UserSGPRCount += 4; 4527 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4528 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4529 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4530 ValRange); 4531 if (Val) 4532 UserSGPRCount += 2; 4533 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4534 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4535 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4536 ValRange); 4537 if (Val) 4538 UserSGPRCount += 2; 4539 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4540 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4541 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4542 Val, ValRange); 4543 if (Val) 4544 UserSGPRCount += 2; 4545 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4546 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4547 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4548 ValRange); 4549 if (Val) 4550 UserSGPRCount += 2; 4551 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4552 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4553 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4554 ValRange); 4555 if (Val) 4556 UserSGPRCount += 2; 4557 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4558 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4559 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4560 Val, ValRange); 4561 if (Val) 4562 UserSGPRCount += 1; 4563 } else if (ID == ".amdhsa_wavefront_size32") { 4564 if (IVersion.Major < 10) 4565 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4566 EnableWavefrontSize32 = Val; 4567 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4568 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4569 Val, ValRange); 4570 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4571 PARSE_BITS_ENTRY( 4572 KD.compute_pgm_rsrc2, 4573 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4574 ValRange); 4575 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4576 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4577 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4578 ValRange); 4579 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4580 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4581 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4582 ValRange); 4583 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4584 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4585 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4586 ValRange); 4587 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4588 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4589 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4590 ValRange); 4591 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4592 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4593 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4594 ValRange); 4595 } else if (ID == ".amdhsa_next_free_vgpr") { 4596 VGPRRange = ValRange; 4597 NextFreeVGPR = Val; 4598 } else if (ID == ".amdhsa_next_free_sgpr") { 4599 SGPRRange = ValRange; 4600 NextFreeSGPR = Val; 4601 } else if (ID == ".amdhsa_accum_offset") { 4602 if (!isGFX90A()) 4603 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4604 AccumOffset = Val; 4605 } else if (ID == ".amdhsa_reserve_vcc") { 4606 if (!isUInt<1>(Val)) 4607 return OutOfRangeError(ValRange); 4608 ReserveVCC = Val; 4609 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4610 if (IVersion.Major < 7) 4611 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4612 if (!isUInt<1>(Val)) 4613 return OutOfRangeError(ValRange); 4614 ReserveFlatScr = Val; 4615 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4616 if (IVersion.Major < 8) 4617 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4618 if (!isUInt<1>(Val)) 4619 return OutOfRangeError(ValRange); 4620 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4621 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4622 IDRange); 4623 } else if (ID == ".amdhsa_float_round_mode_32") { 4624 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4625 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4626 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4627 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4628 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4629 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4630 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4631 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4632 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4633 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4634 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4635 ValRange); 4636 } else if (ID == ".amdhsa_dx10_clamp") { 4637 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4638 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4639 } else if (ID == ".amdhsa_ieee_mode") { 4640 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4641 Val, ValRange); 4642 } else if (ID == ".amdhsa_fp16_overflow") { 4643 if (IVersion.Major < 9) 4644 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4645 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4646 ValRange); 4647 } else if (ID == ".amdhsa_tg_split") { 4648 if (!isGFX90A()) 4649 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4651 ValRange); 4652 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4653 if (IVersion.Major < 10) 4654 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4655 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4656 ValRange); 4657 } else if (ID == ".amdhsa_memory_ordered") { 4658 if (IVersion.Major < 10) 4659 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4661 ValRange); 4662 } else if (ID == ".amdhsa_forward_progress") { 4663 if (IVersion.Major < 10) 4664 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4666 ValRange); 4667 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4668 PARSE_BITS_ENTRY( 4669 KD.compute_pgm_rsrc2, 4670 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4671 ValRange); 4672 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4674 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4675 Val, ValRange); 4676 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4677 PARSE_BITS_ENTRY( 4678 KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4680 ValRange); 4681 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4683 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4684 Val, ValRange); 4685 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4687 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4688 Val, ValRange); 4689 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4691 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4692 Val, ValRange); 4693 } else if (ID == ".amdhsa_exception_int_div_zero") { 4694 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4695 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4696 Val, ValRange); 4697 } else { 4698 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4699 } 4700 4701 #undef PARSE_BITS_ENTRY 4702 } 4703 4704 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4705 return TokError(".amdhsa_next_free_vgpr directive is required"); 4706 4707 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4708 return TokError(".amdhsa_next_free_sgpr directive is required"); 4709 4710 unsigned VGPRBlocks; 4711 unsigned SGPRBlocks; 4712 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4713 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4714 EnableWavefrontSize32, NextFreeVGPR, 4715 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4716 SGPRBlocks)) 4717 return true; 4718 4719 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4720 VGPRBlocks)) 4721 return OutOfRangeError(VGPRRange); 4722 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4723 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4724 4725 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4726 SGPRBlocks)) 4727 return OutOfRangeError(SGPRRange); 4728 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4729 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4730 SGPRBlocks); 4731 4732 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4733 return TokError("too many user SGPRs enabled"); 4734 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4735 UserSGPRCount); 4736 4737 if (isGFX90A()) { 4738 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4739 return TokError(".amdhsa_accum_offset directive is required"); 4740 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4741 return TokError("accum_offset should be in range [4..256] in " 4742 "increments of 4"); 4743 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4744 return TokError("accum_offset exceeds total VGPR allocation"); 4745 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4746 (AccumOffset / 4 - 1)); 4747 } 4748 4749 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4750 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4751 ReserveFlatScr); 4752 return false; 4753 } 4754 4755 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4756 uint32_t Major; 4757 uint32_t Minor; 4758 4759 if (ParseDirectiveMajorMinor(Major, Minor)) 4760 return true; 4761 4762 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4763 return false; 4764 } 4765 4766 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4767 uint32_t Major; 4768 uint32_t Minor; 4769 uint32_t Stepping; 4770 StringRef VendorName; 4771 StringRef ArchName; 4772 4773 // If this directive has no arguments, then use the ISA version for the 4774 // targeted GPU. 4775 if (isToken(AsmToken::EndOfStatement)) { 4776 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4777 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4778 ISA.Stepping, 4779 "AMD", "AMDGPU"); 4780 return false; 4781 } 4782 4783 if (ParseDirectiveMajorMinor(Major, Minor)) 4784 return true; 4785 4786 if (!trySkipToken(AsmToken::Comma)) 4787 return TokError("stepping version number required, comma expected"); 4788 4789 if (ParseAsAbsoluteExpression(Stepping)) 4790 return TokError("invalid stepping version"); 4791 4792 if (!trySkipToken(AsmToken::Comma)) 4793 return TokError("vendor name required, comma expected"); 4794 4795 if (!parseString(VendorName, "invalid vendor name")) 4796 return true; 4797 4798 if (!trySkipToken(AsmToken::Comma)) 4799 return TokError("arch name required, comma expected"); 4800 4801 if (!parseString(ArchName, "invalid arch name")) 4802 return true; 4803 4804 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4805 VendorName, ArchName); 4806 return false; 4807 } 4808 4809 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4810 amd_kernel_code_t &Header) { 4811 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4812 // assembly for backwards compatibility. 4813 if (ID == "max_scratch_backing_memory_byte_size") { 4814 Parser.eatToEndOfStatement(); 4815 return false; 4816 } 4817 4818 SmallString<40> ErrStr; 4819 raw_svector_ostream Err(ErrStr); 4820 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4821 return TokError(Err.str()); 4822 } 4823 Lex(); 4824 4825 if (ID == "enable_wavefront_size32") { 4826 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4827 if (!isGFX10Plus()) 4828 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4829 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4830 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4831 } else { 4832 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4833 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4834 } 4835 } 4836 4837 if (ID == "wavefront_size") { 4838 if (Header.wavefront_size == 5) { 4839 if (!isGFX10Plus()) 4840 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4841 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4842 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4843 } else if (Header.wavefront_size == 6) { 4844 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4845 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4846 } 4847 } 4848 4849 if (ID == "enable_wgp_mode") { 4850 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4851 !isGFX10Plus()) 4852 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4853 } 4854 4855 if (ID == "enable_mem_ordered") { 4856 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4857 !isGFX10Plus()) 4858 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4859 } 4860 4861 if (ID == "enable_fwd_progress") { 4862 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4863 !isGFX10Plus()) 4864 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4865 } 4866 4867 return false; 4868 } 4869 4870 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4871 amd_kernel_code_t Header; 4872 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4873 4874 while (true) { 4875 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4876 // will set the current token to EndOfStatement. 4877 while(trySkipToken(AsmToken::EndOfStatement)); 4878 4879 StringRef ID; 4880 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4881 return true; 4882 4883 if (ID == ".end_amd_kernel_code_t") 4884 break; 4885 4886 if (ParseAMDKernelCodeTValue(ID, Header)) 4887 return true; 4888 } 4889 4890 getTargetStreamer().EmitAMDKernelCodeT(Header); 4891 4892 return false; 4893 } 4894 4895 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4896 StringRef KernelName; 4897 if (!parseId(KernelName, "expected symbol name")) 4898 return true; 4899 4900 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4901 ELF::STT_AMDGPU_HSA_KERNEL); 4902 4903 KernelScope.initialize(getContext()); 4904 return false; 4905 } 4906 4907 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4908 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4909 return Error(getLoc(), 4910 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4911 "architectures"); 4912 } 4913 4914 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4915 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4916 return Error(getParser().getTok().getLoc(), "target id must match options"); 4917 4918 getTargetStreamer().EmitISAVersion(); 4919 Lex(); 4920 4921 return false; 4922 } 4923 4924 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4925 const char *AssemblerDirectiveBegin; 4926 const char *AssemblerDirectiveEnd; 4927 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4928 isHsaAbiVersion3Or4(&getSTI()) 4929 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4930 HSAMD::V3::AssemblerDirectiveEnd) 4931 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4932 HSAMD::AssemblerDirectiveEnd); 4933 4934 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4935 return Error(getLoc(), 4936 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4937 "not available on non-amdhsa OSes")).str()); 4938 } 4939 4940 std::string HSAMetadataString; 4941 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4942 HSAMetadataString)) 4943 return true; 4944 4945 if (isHsaAbiVersion3Or4(&getSTI())) { 4946 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4947 return Error(getLoc(), "invalid HSA metadata"); 4948 } else { 4949 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4950 return Error(getLoc(), "invalid HSA metadata"); 4951 } 4952 4953 return false; 4954 } 4955 4956 /// Common code to parse out a block of text (typically YAML) between start and 4957 /// end directives. 4958 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4959 const char *AssemblerDirectiveEnd, 4960 std::string &CollectString) { 4961 4962 raw_string_ostream CollectStream(CollectString); 4963 4964 getLexer().setSkipSpace(false); 4965 4966 bool FoundEnd = false; 4967 while (!isToken(AsmToken::Eof)) { 4968 while (isToken(AsmToken::Space)) { 4969 CollectStream << getTokenStr(); 4970 Lex(); 4971 } 4972 4973 if (trySkipId(AssemblerDirectiveEnd)) { 4974 FoundEnd = true; 4975 break; 4976 } 4977 4978 CollectStream << Parser.parseStringToEndOfStatement() 4979 << getContext().getAsmInfo()->getSeparatorString(); 4980 4981 Parser.eatToEndOfStatement(); 4982 } 4983 4984 getLexer().setSkipSpace(true); 4985 4986 if (isToken(AsmToken::Eof) && !FoundEnd) { 4987 return TokError(Twine("expected directive ") + 4988 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4989 } 4990 4991 CollectStream.flush(); 4992 return false; 4993 } 4994 4995 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4996 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4997 std::string String; 4998 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4999 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5000 return true; 5001 5002 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5003 if (!PALMetadata->setFromString(String)) 5004 return Error(getLoc(), "invalid PAL metadata"); 5005 return false; 5006 } 5007 5008 /// Parse the assembler directive for old linear-format PAL metadata. 5009 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5010 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5011 return Error(getLoc(), 5012 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5013 "not available on non-amdpal OSes")).str()); 5014 } 5015 5016 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5017 PALMetadata->setLegacy(); 5018 for (;;) { 5019 uint32_t Key, Value; 5020 if (ParseAsAbsoluteExpression(Key)) { 5021 return TokError(Twine("invalid value in ") + 5022 Twine(PALMD::AssemblerDirective)); 5023 } 5024 if (!trySkipToken(AsmToken::Comma)) { 5025 return TokError(Twine("expected an even number of values in ") + 5026 Twine(PALMD::AssemblerDirective)); 5027 } 5028 if (ParseAsAbsoluteExpression(Value)) { 5029 return TokError(Twine("invalid value in ") + 5030 Twine(PALMD::AssemblerDirective)); 5031 } 5032 PALMetadata->setRegister(Key, Value); 5033 if (!trySkipToken(AsmToken::Comma)) 5034 break; 5035 } 5036 return false; 5037 } 5038 5039 /// ParseDirectiveAMDGPULDS 5040 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5041 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5042 if (getParser().checkForValidSection()) 5043 return true; 5044 5045 StringRef Name; 5046 SMLoc NameLoc = getLoc(); 5047 if (getParser().parseIdentifier(Name)) 5048 return TokError("expected identifier in directive"); 5049 5050 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5051 if (parseToken(AsmToken::Comma, "expected ','")) 5052 return true; 5053 5054 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5055 5056 int64_t Size; 5057 SMLoc SizeLoc = getLoc(); 5058 if (getParser().parseAbsoluteExpression(Size)) 5059 return true; 5060 if (Size < 0) 5061 return Error(SizeLoc, "size must be non-negative"); 5062 if (Size > LocalMemorySize) 5063 return Error(SizeLoc, "size is too large"); 5064 5065 int64_t Alignment = 4; 5066 if (trySkipToken(AsmToken::Comma)) { 5067 SMLoc AlignLoc = getLoc(); 5068 if (getParser().parseAbsoluteExpression(Alignment)) 5069 return true; 5070 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5071 return Error(AlignLoc, "alignment must be a power of two"); 5072 5073 // Alignment larger than the size of LDS is possible in theory, as long 5074 // as the linker manages to place to symbol at address 0, but we do want 5075 // to make sure the alignment fits nicely into a 32-bit integer. 5076 if (Alignment >= 1u << 31) 5077 return Error(AlignLoc, "alignment is too large"); 5078 } 5079 5080 if (parseToken(AsmToken::EndOfStatement, 5081 "unexpected token in '.amdgpu_lds' directive")) 5082 return true; 5083 5084 Symbol->redefineIfPossible(); 5085 if (!Symbol->isUndefined()) 5086 return Error(NameLoc, "invalid symbol redefinition"); 5087 5088 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5089 return false; 5090 } 5091 5092 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5093 StringRef IDVal = DirectiveID.getString(); 5094 5095 if (isHsaAbiVersion3Or4(&getSTI())) { 5096 if (IDVal == ".amdhsa_kernel") 5097 return ParseDirectiveAMDHSAKernel(); 5098 5099 // TODO: Restructure/combine with PAL metadata directive. 5100 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5101 return ParseDirectiveHSAMetadata(); 5102 } else { 5103 if (IDVal == ".hsa_code_object_version") 5104 return ParseDirectiveHSACodeObjectVersion(); 5105 5106 if (IDVal == ".hsa_code_object_isa") 5107 return ParseDirectiveHSACodeObjectISA(); 5108 5109 if (IDVal == ".amd_kernel_code_t") 5110 return ParseDirectiveAMDKernelCodeT(); 5111 5112 if (IDVal == ".amdgpu_hsa_kernel") 5113 return ParseDirectiveAMDGPUHsaKernel(); 5114 5115 if (IDVal == ".amd_amdgpu_isa") 5116 return ParseDirectiveISAVersion(); 5117 5118 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5119 return ParseDirectiveHSAMetadata(); 5120 } 5121 5122 if (IDVal == ".amdgcn_target") 5123 return ParseDirectiveAMDGCNTarget(); 5124 5125 if (IDVal == ".amdgpu_lds") 5126 return ParseDirectiveAMDGPULDS(); 5127 5128 if (IDVal == PALMD::AssemblerDirectiveBegin) 5129 return ParseDirectivePALMetadataBegin(); 5130 5131 if (IDVal == PALMD::AssemblerDirective) 5132 return ParseDirectivePALMetadata(); 5133 5134 return true; 5135 } 5136 5137 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5138 unsigned RegNo) { 5139 5140 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5141 R.isValid(); ++R) { 5142 if (*R == RegNo) 5143 return isGFX9Plus(); 5144 } 5145 5146 // GFX10 has 2 more SGPRs 104 and 105. 5147 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5148 R.isValid(); ++R) { 5149 if (*R == RegNo) 5150 return hasSGPR104_SGPR105(); 5151 } 5152 5153 switch (RegNo) { 5154 case AMDGPU::SRC_SHARED_BASE: 5155 case AMDGPU::SRC_SHARED_LIMIT: 5156 case AMDGPU::SRC_PRIVATE_BASE: 5157 case AMDGPU::SRC_PRIVATE_LIMIT: 5158 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5159 return isGFX9Plus(); 5160 case AMDGPU::TBA: 5161 case AMDGPU::TBA_LO: 5162 case AMDGPU::TBA_HI: 5163 case AMDGPU::TMA: 5164 case AMDGPU::TMA_LO: 5165 case AMDGPU::TMA_HI: 5166 return !isGFX9Plus(); 5167 case AMDGPU::XNACK_MASK: 5168 case AMDGPU::XNACK_MASK_LO: 5169 case AMDGPU::XNACK_MASK_HI: 5170 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5171 case AMDGPU::SGPR_NULL: 5172 return isGFX10Plus(); 5173 default: 5174 break; 5175 } 5176 5177 if (isCI()) 5178 return true; 5179 5180 if (isSI() || isGFX10Plus()) { 5181 // No flat_scr on SI. 5182 // On GFX10 flat scratch is not a valid register operand and can only be 5183 // accessed with s_setreg/s_getreg. 5184 switch (RegNo) { 5185 case AMDGPU::FLAT_SCR: 5186 case AMDGPU::FLAT_SCR_LO: 5187 case AMDGPU::FLAT_SCR_HI: 5188 return false; 5189 default: 5190 return true; 5191 } 5192 } 5193 5194 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5195 // SI/CI have. 5196 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5197 R.isValid(); ++R) { 5198 if (*R == RegNo) 5199 return hasSGPR102_SGPR103(); 5200 } 5201 5202 return true; 5203 } 5204 5205 OperandMatchResultTy 5206 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5207 OperandMode Mode) { 5208 // Try to parse with a custom parser 5209 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5210 5211 // If we successfully parsed the operand or if there as an error parsing, 5212 // we are done. 5213 // 5214 // If we are parsing after we reach EndOfStatement then this means we 5215 // are appending default values to the Operands list. This is only done 5216 // by custom parser, so we shouldn't continue on to the generic parsing. 5217 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5218 isToken(AsmToken::EndOfStatement)) 5219 return ResTy; 5220 5221 SMLoc RBraceLoc; 5222 SMLoc LBraceLoc = getLoc(); 5223 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5224 unsigned Prefix = Operands.size(); 5225 5226 for (;;) { 5227 auto Loc = getLoc(); 5228 ResTy = parseReg(Operands); 5229 if (ResTy == MatchOperand_NoMatch) 5230 Error(Loc, "expected a register"); 5231 if (ResTy != MatchOperand_Success) 5232 return MatchOperand_ParseFail; 5233 5234 RBraceLoc = getLoc(); 5235 if (trySkipToken(AsmToken::RBrac)) 5236 break; 5237 5238 if (!skipToken(AsmToken::Comma, 5239 "expected a comma or a closing square bracket")) { 5240 return MatchOperand_ParseFail; 5241 } 5242 } 5243 5244 if (Operands.size() - Prefix > 1) { 5245 Operands.insert(Operands.begin() + Prefix, 5246 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5247 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5248 } 5249 5250 return MatchOperand_Success; 5251 } 5252 5253 return parseRegOrImm(Operands); 5254 } 5255 5256 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5257 // Clear any forced encodings from the previous instruction. 5258 setForcedEncodingSize(0); 5259 setForcedDPP(false); 5260 setForcedSDWA(false); 5261 5262 if (Name.endswith("_e64")) { 5263 setForcedEncodingSize(64); 5264 return Name.substr(0, Name.size() - 4); 5265 } else if (Name.endswith("_e32")) { 5266 setForcedEncodingSize(32); 5267 return Name.substr(0, Name.size() - 4); 5268 } else if (Name.endswith("_dpp")) { 5269 setForcedDPP(true); 5270 return Name.substr(0, Name.size() - 4); 5271 } else if (Name.endswith("_sdwa")) { 5272 setForcedSDWA(true); 5273 return Name.substr(0, Name.size() - 5); 5274 } 5275 return Name; 5276 } 5277 5278 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5279 StringRef Name, 5280 SMLoc NameLoc, OperandVector &Operands) { 5281 // Add the instruction mnemonic 5282 Name = parseMnemonicSuffix(Name); 5283 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5284 5285 bool IsMIMG = Name.startswith("image_"); 5286 5287 while (!trySkipToken(AsmToken::EndOfStatement)) { 5288 OperandMode Mode = OperandMode_Default; 5289 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5290 Mode = OperandMode_NSA; 5291 CPolSeen = 0; 5292 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5293 5294 if (Res != MatchOperand_Success) { 5295 checkUnsupportedInstruction(Name, NameLoc); 5296 if (!Parser.hasPendingError()) { 5297 // FIXME: use real operand location rather than the current location. 5298 StringRef Msg = 5299 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5300 "not a valid operand."; 5301 Error(getLoc(), Msg); 5302 } 5303 while (!trySkipToken(AsmToken::EndOfStatement)) { 5304 lex(); 5305 } 5306 return true; 5307 } 5308 5309 // Eat the comma or space if there is one. 5310 trySkipToken(AsmToken::Comma); 5311 } 5312 5313 return false; 5314 } 5315 5316 //===----------------------------------------------------------------------===// 5317 // Utility functions 5318 //===----------------------------------------------------------------------===// 5319 5320 OperandMatchResultTy 5321 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5322 5323 if (!trySkipId(Prefix, AsmToken::Colon)) 5324 return MatchOperand_NoMatch; 5325 5326 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5327 } 5328 5329 OperandMatchResultTy 5330 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5331 AMDGPUOperand::ImmTy ImmTy, 5332 bool (*ConvertResult)(int64_t&)) { 5333 SMLoc S = getLoc(); 5334 int64_t Value = 0; 5335 5336 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5337 if (Res != MatchOperand_Success) 5338 return Res; 5339 5340 if (ConvertResult && !ConvertResult(Value)) { 5341 Error(S, "invalid " + StringRef(Prefix) + " value."); 5342 } 5343 5344 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5345 return MatchOperand_Success; 5346 } 5347 5348 OperandMatchResultTy 5349 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5350 OperandVector &Operands, 5351 AMDGPUOperand::ImmTy ImmTy, 5352 bool (*ConvertResult)(int64_t&)) { 5353 SMLoc S = getLoc(); 5354 if (!trySkipId(Prefix, AsmToken::Colon)) 5355 return MatchOperand_NoMatch; 5356 5357 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5358 return MatchOperand_ParseFail; 5359 5360 unsigned Val = 0; 5361 const unsigned MaxSize = 4; 5362 5363 // FIXME: How to verify the number of elements matches the number of src 5364 // operands? 5365 for (int I = 0; ; ++I) { 5366 int64_t Op; 5367 SMLoc Loc = getLoc(); 5368 if (!parseExpr(Op)) 5369 return MatchOperand_ParseFail; 5370 5371 if (Op != 0 && Op != 1) { 5372 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5373 return MatchOperand_ParseFail; 5374 } 5375 5376 Val |= (Op << I); 5377 5378 if (trySkipToken(AsmToken::RBrac)) 5379 break; 5380 5381 if (I + 1 == MaxSize) { 5382 Error(getLoc(), "expected a closing square bracket"); 5383 return MatchOperand_ParseFail; 5384 } 5385 5386 if (!skipToken(AsmToken::Comma, "expected a comma")) 5387 return MatchOperand_ParseFail; 5388 } 5389 5390 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5391 return MatchOperand_Success; 5392 } 5393 5394 OperandMatchResultTy 5395 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5396 AMDGPUOperand::ImmTy ImmTy) { 5397 int64_t Bit; 5398 SMLoc S = getLoc(); 5399 5400 if (trySkipId(Name)) { 5401 Bit = 1; 5402 } else if (trySkipId("no", Name)) { 5403 Bit = 0; 5404 } else { 5405 return MatchOperand_NoMatch; 5406 } 5407 5408 if (Name == "r128" && !hasMIMG_R128()) { 5409 Error(S, "r128 modifier is not supported on this GPU"); 5410 return MatchOperand_ParseFail; 5411 } 5412 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5413 Error(S, "a16 modifier is not supported on this GPU"); 5414 return MatchOperand_ParseFail; 5415 } 5416 5417 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5418 ImmTy = AMDGPUOperand::ImmTyR128A16; 5419 5420 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5421 return MatchOperand_Success; 5422 } 5423 5424 OperandMatchResultTy 5425 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5426 unsigned CPolOn = 0; 5427 unsigned CPolOff = 0; 5428 SMLoc S = getLoc(); 5429 5430 if (trySkipId("glc")) 5431 CPolOn = AMDGPU::CPol::GLC; 5432 else if (trySkipId("noglc")) 5433 CPolOff = AMDGPU::CPol::GLC; 5434 else if (trySkipId("slc")) 5435 CPolOn = AMDGPU::CPol::SLC; 5436 else if (trySkipId("noslc")) 5437 CPolOff = AMDGPU::CPol::SLC; 5438 else if (trySkipId("dlc")) 5439 CPolOn = AMDGPU::CPol::DLC; 5440 else if (trySkipId("nodlc")) 5441 CPolOff = AMDGPU::CPol::DLC; 5442 else if (trySkipId("scc")) 5443 CPolOn = AMDGPU::CPol::SCC; 5444 else if (trySkipId("noscc")) 5445 CPolOff = AMDGPU::CPol::SCC; 5446 else 5447 return MatchOperand_NoMatch; 5448 5449 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5450 Error(S, "dlc modifier is not supported on this GPU"); 5451 return MatchOperand_ParseFail; 5452 } 5453 5454 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5455 Error(S, "scc modifier is not supported on this GPU"); 5456 return MatchOperand_ParseFail; 5457 } 5458 5459 if (CPolSeen & (CPolOn | CPolOff)) { 5460 Error(S, "duplicate cache policy modifier"); 5461 return MatchOperand_ParseFail; 5462 } 5463 5464 CPolSeen |= (CPolOn | CPolOff); 5465 5466 for (unsigned I = 1; I != Operands.size(); ++I) { 5467 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5468 if (Op.isCPol()) { 5469 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5470 return MatchOperand_Success; 5471 } 5472 } 5473 5474 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5475 AMDGPUOperand::ImmTyCPol)); 5476 5477 return MatchOperand_Success; 5478 } 5479 5480 static void addOptionalImmOperand( 5481 MCInst& Inst, const OperandVector& Operands, 5482 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5483 AMDGPUOperand::ImmTy ImmT, 5484 int64_t Default = 0) { 5485 auto i = OptionalIdx.find(ImmT); 5486 if (i != OptionalIdx.end()) { 5487 unsigned Idx = i->second; 5488 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5489 } else { 5490 Inst.addOperand(MCOperand::createImm(Default)); 5491 } 5492 } 5493 5494 OperandMatchResultTy 5495 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5496 StringRef &Value, 5497 SMLoc &StringLoc) { 5498 if (!trySkipId(Prefix, AsmToken::Colon)) 5499 return MatchOperand_NoMatch; 5500 5501 StringLoc = getLoc(); 5502 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5503 : MatchOperand_ParseFail; 5504 } 5505 5506 //===----------------------------------------------------------------------===// 5507 // MTBUF format 5508 //===----------------------------------------------------------------------===// 5509 5510 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5511 int64_t MaxVal, 5512 int64_t &Fmt) { 5513 int64_t Val; 5514 SMLoc Loc = getLoc(); 5515 5516 auto Res = parseIntWithPrefix(Pref, Val); 5517 if (Res == MatchOperand_ParseFail) 5518 return false; 5519 if (Res == MatchOperand_NoMatch) 5520 return true; 5521 5522 if (Val < 0 || Val > MaxVal) { 5523 Error(Loc, Twine("out of range ", StringRef(Pref))); 5524 return false; 5525 } 5526 5527 Fmt = Val; 5528 return true; 5529 } 5530 5531 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5532 // values to live in a joint format operand in the MCInst encoding. 5533 OperandMatchResultTy 5534 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5535 using namespace llvm::AMDGPU::MTBUFFormat; 5536 5537 int64_t Dfmt = DFMT_UNDEF; 5538 int64_t Nfmt = NFMT_UNDEF; 5539 5540 // dfmt and nfmt can appear in either order, and each is optional. 5541 for (int I = 0; I < 2; ++I) { 5542 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5543 return MatchOperand_ParseFail; 5544 5545 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5546 return MatchOperand_ParseFail; 5547 } 5548 // Skip optional comma between dfmt/nfmt 5549 // but guard against 2 commas following each other. 5550 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5551 !peekToken().is(AsmToken::Comma)) { 5552 trySkipToken(AsmToken::Comma); 5553 } 5554 } 5555 5556 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5557 return MatchOperand_NoMatch; 5558 5559 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5560 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5561 5562 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5563 return MatchOperand_Success; 5564 } 5565 5566 OperandMatchResultTy 5567 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5568 using namespace llvm::AMDGPU::MTBUFFormat; 5569 5570 int64_t Fmt = UFMT_UNDEF; 5571 5572 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5573 return MatchOperand_ParseFail; 5574 5575 if (Fmt == UFMT_UNDEF) 5576 return MatchOperand_NoMatch; 5577 5578 Format = Fmt; 5579 return MatchOperand_Success; 5580 } 5581 5582 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5583 int64_t &Nfmt, 5584 StringRef FormatStr, 5585 SMLoc Loc) { 5586 using namespace llvm::AMDGPU::MTBUFFormat; 5587 int64_t Format; 5588 5589 Format = getDfmt(FormatStr); 5590 if (Format != DFMT_UNDEF) { 5591 Dfmt = Format; 5592 return true; 5593 } 5594 5595 Format = getNfmt(FormatStr, getSTI()); 5596 if (Format != NFMT_UNDEF) { 5597 Nfmt = Format; 5598 return true; 5599 } 5600 5601 Error(Loc, "unsupported format"); 5602 return false; 5603 } 5604 5605 OperandMatchResultTy 5606 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5607 SMLoc FormatLoc, 5608 int64_t &Format) { 5609 using namespace llvm::AMDGPU::MTBUFFormat; 5610 5611 int64_t Dfmt = DFMT_UNDEF; 5612 int64_t Nfmt = NFMT_UNDEF; 5613 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5614 return MatchOperand_ParseFail; 5615 5616 if (trySkipToken(AsmToken::Comma)) { 5617 StringRef Str; 5618 SMLoc Loc = getLoc(); 5619 if (!parseId(Str, "expected a format string") || 5620 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5621 return MatchOperand_ParseFail; 5622 } 5623 if (Dfmt == DFMT_UNDEF) { 5624 Error(Loc, "duplicate numeric format"); 5625 return MatchOperand_ParseFail; 5626 } else if (Nfmt == NFMT_UNDEF) { 5627 Error(Loc, "duplicate data format"); 5628 return MatchOperand_ParseFail; 5629 } 5630 } 5631 5632 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5633 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5634 5635 if (isGFX10Plus()) { 5636 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5637 if (Ufmt == UFMT_UNDEF) { 5638 Error(FormatLoc, "unsupported format"); 5639 return MatchOperand_ParseFail; 5640 } 5641 Format = Ufmt; 5642 } else { 5643 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5644 } 5645 5646 return MatchOperand_Success; 5647 } 5648 5649 OperandMatchResultTy 5650 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5651 SMLoc Loc, 5652 int64_t &Format) { 5653 using namespace llvm::AMDGPU::MTBUFFormat; 5654 5655 auto Id = getUnifiedFormat(FormatStr); 5656 if (Id == UFMT_UNDEF) 5657 return MatchOperand_NoMatch; 5658 5659 if (!isGFX10Plus()) { 5660 Error(Loc, "unified format is not supported on this GPU"); 5661 return MatchOperand_ParseFail; 5662 } 5663 5664 Format = Id; 5665 return MatchOperand_Success; 5666 } 5667 5668 OperandMatchResultTy 5669 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5670 using namespace llvm::AMDGPU::MTBUFFormat; 5671 SMLoc Loc = getLoc(); 5672 5673 if (!parseExpr(Format)) 5674 return MatchOperand_ParseFail; 5675 if (!isValidFormatEncoding(Format, getSTI())) { 5676 Error(Loc, "out of range format"); 5677 return MatchOperand_ParseFail; 5678 } 5679 5680 return MatchOperand_Success; 5681 } 5682 5683 OperandMatchResultTy 5684 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5685 using namespace llvm::AMDGPU::MTBUFFormat; 5686 5687 if (!trySkipId("format", AsmToken::Colon)) 5688 return MatchOperand_NoMatch; 5689 5690 if (trySkipToken(AsmToken::LBrac)) { 5691 StringRef FormatStr; 5692 SMLoc Loc = getLoc(); 5693 if (!parseId(FormatStr, "expected a format string")) 5694 return MatchOperand_ParseFail; 5695 5696 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5697 if (Res == MatchOperand_NoMatch) 5698 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5699 if (Res != MatchOperand_Success) 5700 return Res; 5701 5702 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5703 return MatchOperand_ParseFail; 5704 5705 return MatchOperand_Success; 5706 } 5707 5708 return parseNumericFormat(Format); 5709 } 5710 5711 OperandMatchResultTy 5712 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5713 using namespace llvm::AMDGPU::MTBUFFormat; 5714 5715 int64_t Format = getDefaultFormatEncoding(getSTI()); 5716 OperandMatchResultTy Res; 5717 SMLoc Loc = getLoc(); 5718 5719 // Parse legacy format syntax. 5720 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5721 if (Res == MatchOperand_ParseFail) 5722 return Res; 5723 5724 bool FormatFound = (Res == MatchOperand_Success); 5725 5726 Operands.push_back( 5727 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5728 5729 if (FormatFound) 5730 trySkipToken(AsmToken::Comma); 5731 5732 if (isToken(AsmToken::EndOfStatement)) { 5733 // We are expecting an soffset operand, 5734 // but let matcher handle the error. 5735 return MatchOperand_Success; 5736 } 5737 5738 // Parse soffset. 5739 Res = parseRegOrImm(Operands); 5740 if (Res != MatchOperand_Success) 5741 return Res; 5742 5743 trySkipToken(AsmToken::Comma); 5744 5745 if (!FormatFound) { 5746 Res = parseSymbolicOrNumericFormat(Format); 5747 if (Res == MatchOperand_ParseFail) 5748 return Res; 5749 if (Res == MatchOperand_Success) { 5750 auto Size = Operands.size(); 5751 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5752 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5753 Op.setImm(Format); 5754 } 5755 return MatchOperand_Success; 5756 } 5757 5758 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5759 Error(getLoc(), "duplicate format"); 5760 return MatchOperand_ParseFail; 5761 } 5762 return MatchOperand_Success; 5763 } 5764 5765 //===----------------------------------------------------------------------===// 5766 // ds 5767 //===----------------------------------------------------------------------===// 5768 5769 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5770 const OperandVector &Operands) { 5771 OptionalImmIndexMap OptionalIdx; 5772 5773 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5774 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5775 5776 // Add the register arguments 5777 if (Op.isReg()) { 5778 Op.addRegOperands(Inst, 1); 5779 continue; 5780 } 5781 5782 // Handle optional arguments 5783 OptionalIdx[Op.getImmTy()] = i; 5784 } 5785 5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5789 5790 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5791 } 5792 5793 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5794 bool IsGdsHardcoded) { 5795 OptionalImmIndexMap OptionalIdx; 5796 5797 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5799 5800 // Add the register arguments 5801 if (Op.isReg()) { 5802 Op.addRegOperands(Inst, 1); 5803 continue; 5804 } 5805 5806 if (Op.isToken() && Op.getToken() == "gds") { 5807 IsGdsHardcoded = true; 5808 continue; 5809 } 5810 5811 // Handle optional arguments 5812 OptionalIdx[Op.getImmTy()] = i; 5813 } 5814 5815 AMDGPUOperand::ImmTy OffsetType = 5816 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5817 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5818 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5819 AMDGPUOperand::ImmTyOffset; 5820 5821 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5822 5823 if (!IsGdsHardcoded) { 5824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5825 } 5826 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5827 } 5828 5829 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5830 OptionalImmIndexMap OptionalIdx; 5831 5832 unsigned OperandIdx[4]; 5833 unsigned EnMask = 0; 5834 int SrcIdx = 0; 5835 5836 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5837 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5838 5839 // Add the register arguments 5840 if (Op.isReg()) { 5841 assert(SrcIdx < 4); 5842 OperandIdx[SrcIdx] = Inst.size(); 5843 Op.addRegOperands(Inst, 1); 5844 ++SrcIdx; 5845 continue; 5846 } 5847 5848 if (Op.isOff()) { 5849 assert(SrcIdx < 4); 5850 OperandIdx[SrcIdx] = Inst.size(); 5851 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5852 ++SrcIdx; 5853 continue; 5854 } 5855 5856 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5857 Op.addImmOperands(Inst, 1); 5858 continue; 5859 } 5860 5861 if (Op.isToken() && Op.getToken() == "done") 5862 continue; 5863 5864 // Handle optional arguments 5865 OptionalIdx[Op.getImmTy()] = i; 5866 } 5867 5868 assert(SrcIdx == 4); 5869 5870 bool Compr = false; 5871 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5872 Compr = true; 5873 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5874 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5875 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5876 } 5877 5878 for (auto i = 0; i < SrcIdx; ++i) { 5879 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5880 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5881 } 5882 } 5883 5884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5886 5887 Inst.addOperand(MCOperand::createImm(EnMask)); 5888 } 5889 5890 //===----------------------------------------------------------------------===// 5891 // s_waitcnt 5892 //===----------------------------------------------------------------------===// 5893 5894 static bool 5895 encodeCnt( 5896 const AMDGPU::IsaVersion ISA, 5897 int64_t &IntVal, 5898 int64_t CntVal, 5899 bool Saturate, 5900 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5901 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5902 { 5903 bool Failed = false; 5904 5905 IntVal = encode(ISA, IntVal, CntVal); 5906 if (CntVal != decode(ISA, IntVal)) { 5907 if (Saturate) { 5908 IntVal = encode(ISA, IntVal, -1); 5909 } else { 5910 Failed = true; 5911 } 5912 } 5913 return Failed; 5914 } 5915 5916 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5917 5918 SMLoc CntLoc = getLoc(); 5919 StringRef CntName = getTokenStr(); 5920 5921 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5922 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5923 return false; 5924 5925 int64_t CntVal; 5926 SMLoc ValLoc = getLoc(); 5927 if (!parseExpr(CntVal)) 5928 return false; 5929 5930 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5931 5932 bool Failed = true; 5933 bool Sat = CntName.endswith("_sat"); 5934 5935 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5936 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5937 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5938 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5939 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5940 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5941 } else { 5942 Error(CntLoc, "invalid counter name " + CntName); 5943 return false; 5944 } 5945 5946 if (Failed) { 5947 Error(ValLoc, "too large value for " + CntName); 5948 return false; 5949 } 5950 5951 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5952 return false; 5953 5954 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5955 if (isToken(AsmToken::EndOfStatement)) { 5956 Error(getLoc(), "expected a counter name"); 5957 return false; 5958 } 5959 } 5960 5961 return true; 5962 } 5963 5964 OperandMatchResultTy 5965 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5966 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5967 int64_t Waitcnt = getWaitcntBitMask(ISA); 5968 SMLoc S = getLoc(); 5969 5970 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5971 while (!isToken(AsmToken::EndOfStatement)) { 5972 if (!parseCnt(Waitcnt)) 5973 return MatchOperand_ParseFail; 5974 } 5975 } else { 5976 if (!parseExpr(Waitcnt)) 5977 return MatchOperand_ParseFail; 5978 } 5979 5980 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5981 return MatchOperand_Success; 5982 } 5983 5984 bool 5985 AMDGPUOperand::isSWaitCnt() const { 5986 return isImm(); 5987 } 5988 5989 //===----------------------------------------------------------------------===// 5990 // hwreg 5991 //===----------------------------------------------------------------------===// 5992 5993 bool 5994 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5995 OperandInfoTy &Offset, 5996 OperandInfoTy &Width) { 5997 using namespace llvm::AMDGPU::Hwreg; 5998 5999 // The register may be specified by name or using a numeric code 6000 HwReg.Loc = getLoc(); 6001 if (isToken(AsmToken::Identifier) && 6002 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6003 HwReg.IsSymbolic = true; 6004 lex(); // skip register name 6005 } else if (!parseExpr(HwReg.Id, "a register name")) { 6006 return false; 6007 } 6008 6009 if (trySkipToken(AsmToken::RParen)) 6010 return true; 6011 6012 // parse optional params 6013 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6014 return false; 6015 6016 Offset.Loc = getLoc(); 6017 if (!parseExpr(Offset.Id)) 6018 return false; 6019 6020 if (!skipToken(AsmToken::Comma, "expected a comma")) 6021 return false; 6022 6023 Width.Loc = getLoc(); 6024 return parseExpr(Width.Id) && 6025 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6026 } 6027 6028 bool 6029 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6030 const OperandInfoTy &Offset, 6031 const OperandInfoTy &Width) { 6032 6033 using namespace llvm::AMDGPU::Hwreg; 6034 6035 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6036 Error(HwReg.Loc, 6037 "specified hardware register is not supported on this GPU"); 6038 return false; 6039 } 6040 if (!isValidHwreg(HwReg.Id)) { 6041 Error(HwReg.Loc, 6042 "invalid code of hardware register: only 6-bit values are legal"); 6043 return false; 6044 } 6045 if (!isValidHwregOffset(Offset.Id)) { 6046 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6047 return false; 6048 } 6049 if (!isValidHwregWidth(Width.Id)) { 6050 Error(Width.Loc, 6051 "invalid bitfield width: only values from 1 to 32 are legal"); 6052 return false; 6053 } 6054 return true; 6055 } 6056 6057 OperandMatchResultTy 6058 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6059 using namespace llvm::AMDGPU::Hwreg; 6060 6061 int64_t ImmVal = 0; 6062 SMLoc Loc = getLoc(); 6063 6064 if (trySkipId("hwreg", AsmToken::LParen)) { 6065 OperandInfoTy HwReg(ID_UNKNOWN_); 6066 OperandInfoTy Offset(OFFSET_DEFAULT_); 6067 OperandInfoTy Width(WIDTH_DEFAULT_); 6068 if (parseHwregBody(HwReg, Offset, Width) && 6069 validateHwreg(HwReg, Offset, Width)) { 6070 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6071 } else { 6072 return MatchOperand_ParseFail; 6073 } 6074 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6075 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6076 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6077 return MatchOperand_ParseFail; 6078 } 6079 } else { 6080 return MatchOperand_ParseFail; 6081 } 6082 6083 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6084 return MatchOperand_Success; 6085 } 6086 6087 bool AMDGPUOperand::isHwreg() const { 6088 return isImmTy(ImmTyHwreg); 6089 } 6090 6091 //===----------------------------------------------------------------------===// 6092 // sendmsg 6093 //===----------------------------------------------------------------------===// 6094 6095 bool 6096 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6097 OperandInfoTy &Op, 6098 OperandInfoTy &Stream) { 6099 using namespace llvm::AMDGPU::SendMsg; 6100 6101 Msg.Loc = getLoc(); 6102 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6103 Msg.IsSymbolic = true; 6104 lex(); // skip message name 6105 } else if (!parseExpr(Msg.Id, "a message name")) { 6106 return false; 6107 } 6108 6109 if (trySkipToken(AsmToken::Comma)) { 6110 Op.IsDefined = true; 6111 Op.Loc = getLoc(); 6112 if (isToken(AsmToken::Identifier) && 6113 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6114 lex(); // skip operation name 6115 } else if (!parseExpr(Op.Id, "an operation name")) { 6116 return false; 6117 } 6118 6119 if (trySkipToken(AsmToken::Comma)) { 6120 Stream.IsDefined = true; 6121 Stream.Loc = getLoc(); 6122 if (!parseExpr(Stream.Id)) 6123 return false; 6124 } 6125 } 6126 6127 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6128 } 6129 6130 bool 6131 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6132 const OperandInfoTy &Op, 6133 const OperandInfoTy &Stream) { 6134 using namespace llvm::AMDGPU::SendMsg; 6135 6136 // Validation strictness depends on whether message is specified 6137 // in a symbolc or in a numeric form. In the latter case 6138 // only encoding possibility is checked. 6139 bool Strict = Msg.IsSymbolic; 6140 6141 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6142 Error(Msg.Loc, "invalid message id"); 6143 return false; 6144 } 6145 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6146 if (Op.IsDefined) { 6147 Error(Op.Loc, "message does not support operations"); 6148 } else { 6149 Error(Msg.Loc, "missing message operation"); 6150 } 6151 return false; 6152 } 6153 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6154 Error(Op.Loc, "invalid operation id"); 6155 return false; 6156 } 6157 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6158 Error(Stream.Loc, "message operation does not support streams"); 6159 return false; 6160 } 6161 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6162 Error(Stream.Loc, "invalid message stream id"); 6163 return false; 6164 } 6165 return true; 6166 } 6167 6168 OperandMatchResultTy 6169 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6170 using namespace llvm::AMDGPU::SendMsg; 6171 6172 int64_t ImmVal = 0; 6173 SMLoc Loc = getLoc(); 6174 6175 if (trySkipId("sendmsg", AsmToken::LParen)) { 6176 OperandInfoTy Msg(ID_UNKNOWN_); 6177 OperandInfoTy Op(OP_NONE_); 6178 OperandInfoTy Stream(STREAM_ID_NONE_); 6179 if (parseSendMsgBody(Msg, Op, Stream) && 6180 validateSendMsg(Msg, Op, Stream)) { 6181 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6182 } else { 6183 return MatchOperand_ParseFail; 6184 } 6185 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6186 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6187 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6188 return MatchOperand_ParseFail; 6189 } 6190 } else { 6191 return MatchOperand_ParseFail; 6192 } 6193 6194 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6195 return MatchOperand_Success; 6196 } 6197 6198 bool AMDGPUOperand::isSendMsg() const { 6199 return isImmTy(ImmTySendMsg); 6200 } 6201 6202 //===----------------------------------------------------------------------===// 6203 // v_interp 6204 //===----------------------------------------------------------------------===// 6205 6206 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6207 StringRef Str; 6208 SMLoc S = getLoc(); 6209 6210 if (!parseId(Str)) 6211 return MatchOperand_NoMatch; 6212 6213 int Slot = StringSwitch<int>(Str) 6214 .Case("p10", 0) 6215 .Case("p20", 1) 6216 .Case("p0", 2) 6217 .Default(-1); 6218 6219 if (Slot == -1) { 6220 Error(S, "invalid interpolation slot"); 6221 return MatchOperand_ParseFail; 6222 } 6223 6224 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6225 AMDGPUOperand::ImmTyInterpSlot)); 6226 return MatchOperand_Success; 6227 } 6228 6229 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6230 StringRef Str; 6231 SMLoc S = getLoc(); 6232 6233 if (!parseId(Str)) 6234 return MatchOperand_NoMatch; 6235 6236 if (!Str.startswith("attr")) { 6237 Error(S, "invalid interpolation attribute"); 6238 return MatchOperand_ParseFail; 6239 } 6240 6241 StringRef Chan = Str.take_back(2); 6242 int AttrChan = StringSwitch<int>(Chan) 6243 .Case(".x", 0) 6244 .Case(".y", 1) 6245 .Case(".z", 2) 6246 .Case(".w", 3) 6247 .Default(-1); 6248 if (AttrChan == -1) { 6249 Error(S, "invalid or missing interpolation attribute channel"); 6250 return MatchOperand_ParseFail; 6251 } 6252 6253 Str = Str.drop_back(2).drop_front(4); 6254 6255 uint8_t Attr; 6256 if (Str.getAsInteger(10, Attr)) { 6257 Error(S, "invalid or missing interpolation attribute number"); 6258 return MatchOperand_ParseFail; 6259 } 6260 6261 if (Attr > 63) { 6262 Error(S, "out of bounds interpolation attribute number"); 6263 return MatchOperand_ParseFail; 6264 } 6265 6266 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6267 6268 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6269 AMDGPUOperand::ImmTyInterpAttr)); 6270 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6271 AMDGPUOperand::ImmTyAttrChan)); 6272 return MatchOperand_Success; 6273 } 6274 6275 //===----------------------------------------------------------------------===// 6276 // exp 6277 //===----------------------------------------------------------------------===// 6278 6279 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6280 using namespace llvm::AMDGPU::Exp; 6281 6282 StringRef Str; 6283 SMLoc S = getLoc(); 6284 6285 if (!parseId(Str)) 6286 return MatchOperand_NoMatch; 6287 6288 unsigned Id = getTgtId(Str); 6289 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6290 Error(S, (Id == ET_INVALID) ? 6291 "invalid exp target" : 6292 "exp target is not supported on this GPU"); 6293 return MatchOperand_ParseFail; 6294 } 6295 6296 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6297 AMDGPUOperand::ImmTyExpTgt)); 6298 return MatchOperand_Success; 6299 } 6300 6301 //===----------------------------------------------------------------------===// 6302 // parser helpers 6303 //===----------------------------------------------------------------------===// 6304 6305 bool 6306 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6307 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6308 } 6309 6310 bool 6311 AMDGPUAsmParser::isId(const StringRef Id) const { 6312 return isId(getToken(), Id); 6313 } 6314 6315 bool 6316 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6317 return getTokenKind() == Kind; 6318 } 6319 6320 bool 6321 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6322 if (isId(Id)) { 6323 lex(); 6324 return true; 6325 } 6326 return false; 6327 } 6328 6329 bool 6330 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6331 if (isToken(AsmToken::Identifier)) { 6332 StringRef Tok = getTokenStr(); 6333 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6334 lex(); 6335 return true; 6336 } 6337 } 6338 return false; 6339 } 6340 6341 bool 6342 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6343 if (isId(Id) && peekToken().is(Kind)) { 6344 lex(); 6345 lex(); 6346 return true; 6347 } 6348 return false; 6349 } 6350 6351 bool 6352 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6353 if (isToken(Kind)) { 6354 lex(); 6355 return true; 6356 } 6357 return false; 6358 } 6359 6360 bool 6361 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6362 const StringRef ErrMsg) { 6363 if (!trySkipToken(Kind)) { 6364 Error(getLoc(), ErrMsg); 6365 return false; 6366 } 6367 return true; 6368 } 6369 6370 bool 6371 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6372 SMLoc S = getLoc(); 6373 6374 const MCExpr *Expr; 6375 if (Parser.parseExpression(Expr)) 6376 return false; 6377 6378 if (Expr->evaluateAsAbsolute(Imm)) 6379 return true; 6380 6381 if (Expected.empty()) { 6382 Error(S, "expected absolute expression"); 6383 } else { 6384 Error(S, Twine("expected ", Expected) + 6385 Twine(" or an absolute expression")); 6386 } 6387 return false; 6388 } 6389 6390 bool 6391 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6392 SMLoc S = getLoc(); 6393 6394 const MCExpr *Expr; 6395 if (Parser.parseExpression(Expr)) 6396 return false; 6397 6398 int64_t IntVal; 6399 if (Expr->evaluateAsAbsolute(IntVal)) { 6400 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6401 } else { 6402 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6403 } 6404 return true; 6405 } 6406 6407 bool 6408 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6409 if (isToken(AsmToken::String)) { 6410 Val = getToken().getStringContents(); 6411 lex(); 6412 return true; 6413 } else { 6414 Error(getLoc(), ErrMsg); 6415 return false; 6416 } 6417 } 6418 6419 bool 6420 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6421 if (isToken(AsmToken::Identifier)) { 6422 Val = getTokenStr(); 6423 lex(); 6424 return true; 6425 } else { 6426 if (!ErrMsg.empty()) 6427 Error(getLoc(), ErrMsg); 6428 return false; 6429 } 6430 } 6431 6432 AsmToken 6433 AMDGPUAsmParser::getToken() const { 6434 return Parser.getTok(); 6435 } 6436 6437 AsmToken 6438 AMDGPUAsmParser::peekToken() { 6439 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6440 } 6441 6442 void 6443 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6444 auto TokCount = getLexer().peekTokens(Tokens); 6445 6446 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6447 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6448 } 6449 6450 AsmToken::TokenKind 6451 AMDGPUAsmParser::getTokenKind() const { 6452 return getLexer().getKind(); 6453 } 6454 6455 SMLoc 6456 AMDGPUAsmParser::getLoc() const { 6457 return getToken().getLoc(); 6458 } 6459 6460 StringRef 6461 AMDGPUAsmParser::getTokenStr() const { 6462 return getToken().getString(); 6463 } 6464 6465 void 6466 AMDGPUAsmParser::lex() { 6467 Parser.Lex(); 6468 } 6469 6470 SMLoc 6471 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6472 const OperandVector &Operands) const { 6473 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6474 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6475 if (Test(Op)) 6476 return Op.getStartLoc(); 6477 } 6478 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6479 } 6480 6481 SMLoc 6482 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6483 const OperandVector &Operands) const { 6484 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6485 return getOperandLoc(Test, Operands); 6486 } 6487 6488 SMLoc 6489 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6490 const OperandVector &Operands) const { 6491 auto Test = [=](const AMDGPUOperand& Op) { 6492 return Op.isRegKind() && Op.getReg() == Reg; 6493 }; 6494 return getOperandLoc(Test, Operands); 6495 } 6496 6497 SMLoc 6498 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6499 auto Test = [](const AMDGPUOperand& Op) { 6500 return Op.IsImmKindLiteral() || Op.isExpr(); 6501 }; 6502 return getOperandLoc(Test, Operands); 6503 } 6504 6505 SMLoc 6506 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6507 auto Test = [](const AMDGPUOperand& Op) { 6508 return Op.isImmKindConst(); 6509 }; 6510 return getOperandLoc(Test, Operands); 6511 } 6512 6513 //===----------------------------------------------------------------------===// 6514 // swizzle 6515 //===----------------------------------------------------------------------===// 6516 6517 LLVM_READNONE 6518 static unsigned 6519 encodeBitmaskPerm(const unsigned AndMask, 6520 const unsigned OrMask, 6521 const unsigned XorMask) { 6522 using namespace llvm::AMDGPU::Swizzle; 6523 6524 return BITMASK_PERM_ENC | 6525 (AndMask << BITMASK_AND_SHIFT) | 6526 (OrMask << BITMASK_OR_SHIFT) | 6527 (XorMask << BITMASK_XOR_SHIFT); 6528 } 6529 6530 bool 6531 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6532 const unsigned MinVal, 6533 const unsigned MaxVal, 6534 const StringRef ErrMsg, 6535 SMLoc &Loc) { 6536 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6537 return false; 6538 } 6539 Loc = getLoc(); 6540 if (!parseExpr(Op)) { 6541 return false; 6542 } 6543 if (Op < MinVal || Op > MaxVal) { 6544 Error(Loc, ErrMsg); 6545 return false; 6546 } 6547 6548 return true; 6549 } 6550 6551 bool 6552 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6553 const unsigned MinVal, 6554 const unsigned MaxVal, 6555 const StringRef ErrMsg) { 6556 SMLoc Loc; 6557 for (unsigned i = 0; i < OpNum; ++i) { 6558 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6559 return false; 6560 } 6561 6562 return true; 6563 } 6564 6565 bool 6566 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6567 using namespace llvm::AMDGPU::Swizzle; 6568 6569 int64_t Lane[LANE_NUM]; 6570 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6571 "expected a 2-bit lane id")) { 6572 Imm = QUAD_PERM_ENC; 6573 for (unsigned I = 0; I < LANE_NUM; ++I) { 6574 Imm |= Lane[I] << (LANE_SHIFT * I); 6575 } 6576 return true; 6577 } 6578 return false; 6579 } 6580 6581 bool 6582 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6583 using namespace llvm::AMDGPU::Swizzle; 6584 6585 SMLoc Loc; 6586 int64_t GroupSize; 6587 int64_t LaneIdx; 6588 6589 if (!parseSwizzleOperand(GroupSize, 6590 2, 32, 6591 "group size must be in the interval [2,32]", 6592 Loc)) { 6593 return false; 6594 } 6595 if (!isPowerOf2_64(GroupSize)) { 6596 Error(Loc, "group size must be a power of two"); 6597 return false; 6598 } 6599 if (parseSwizzleOperand(LaneIdx, 6600 0, GroupSize - 1, 6601 "lane id must be in the interval [0,group size - 1]", 6602 Loc)) { 6603 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6604 return true; 6605 } 6606 return false; 6607 } 6608 6609 bool 6610 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6611 using namespace llvm::AMDGPU::Swizzle; 6612 6613 SMLoc Loc; 6614 int64_t GroupSize; 6615 6616 if (!parseSwizzleOperand(GroupSize, 6617 2, 32, 6618 "group size must be in the interval [2,32]", 6619 Loc)) { 6620 return false; 6621 } 6622 if (!isPowerOf2_64(GroupSize)) { 6623 Error(Loc, "group size must be a power of two"); 6624 return false; 6625 } 6626 6627 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6628 return true; 6629 } 6630 6631 bool 6632 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6633 using namespace llvm::AMDGPU::Swizzle; 6634 6635 SMLoc Loc; 6636 int64_t GroupSize; 6637 6638 if (!parseSwizzleOperand(GroupSize, 6639 1, 16, 6640 "group size must be in the interval [1,16]", 6641 Loc)) { 6642 return false; 6643 } 6644 if (!isPowerOf2_64(GroupSize)) { 6645 Error(Loc, "group size must be a power of two"); 6646 return false; 6647 } 6648 6649 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6650 return true; 6651 } 6652 6653 bool 6654 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6655 using namespace llvm::AMDGPU::Swizzle; 6656 6657 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6658 return false; 6659 } 6660 6661 StringRef Ctl; 6662 SMLoc StrLoc = getLoc(); 6663 if (!parseString(Ctl)) { 6664 return false; 6665 } 6666 if (Ctl.size() != BITMASK_WIDTH) { 6667 Error(StrLoc, "expected a 5-character mask"); 6668 return false; 6669 } 6670 6671 unsigned AndMask = 0; 6672 unsigned OrMask = 0; 6673 unsigned XorMask = 0; 6674 6675 for (size_t i = 0; i < Ctl.size(); ++i) { 6676 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6677 switch(Ctl[i]) { 6678 default: 6679 Error(StrLoc, "invalid mask"); 6680 return false; 6681 case '0': 6682 break; 6683 case '1': 6684 OrMask |= Mask; 6685 break; 6686 case 'p': 6687 AndMask |= Mask; 6688 break; 6689 case 'i': 6690 AndMask |= Mask; 6691 XorMask |= Mask; 6692 break; 6693 } 6694 } 6695 6696 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6697 return true; 6698 } 6699 6700 bool 6701 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6702 6703 SMLoc OffsetLoc = getLoc(); 6704 6705 if (!parseExpr(Imm, "a swizzle macro")) { 6706 return false; 6707 } 6708 if (!isUInt<16>(Imm)) { 6709 Error(OffsetLoc, "expected a 16-bit offset"); 6710 return false; 6711 } 6712 return true; 6713 } 6714 6715 bool 6716 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6717 using namespace llvm::AMDGPU::Swizzle; 6718 6719 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6720 6721 SMLoc ModeLoc = getLoc(); 6722 bool Ok = false; 6723 6724 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6725 Ok = parseSwizzleQuadPerm(Imm); 6726 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6727 Ok = parseSwizzleBitmaskPerm(Imm); 6728 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6729 Ok = parseSwizzleBroadcast(Imm); 6730 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6731 Ok = parseSwizzleSwap(Imm); 6732 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6733 Ok = parseSwizzleReverse(Imm); 6734 } else { 6735 Error(ModeLoc, "expected a swizzle mode"); 6736 } 6737 6738 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6739 } 6740 6741 return false; 6742 } 6743 6744 OperandMatchResultTy 6745 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6746 SMLoc S = getLoc(); 6747 int64_t Imm = 0; 6748 6749 if (trySkipId("offset")) { 6750 6751 bool Ok = false; 6752 if (skipToken(AsmToken::Colon, "expected a colon")) { 6753 if (trySkipId("swizzle")) { 6754 Ok = parseSwizzleMacro(Imm); 6755 } else { 6756 Ok = parseSwizzleOffset(Imm); 6757 } 6758 } 6759 6760 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6761 6762 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6763 } else { 6764 // Swizzle "offset" operand is optional. 6765 // If it is omitted, try parsing other optional operands. 6766 return parseOptionalOpr(Operands); 6767 } 6768 } 6769 6770 bool 6771 AMDGPUOperand::isSwizzle() const { 6772 return isImmTy(ImmTySwizzle); 6773 } 6774 6775 //===----------------------------------------------------------------------===// 6776 // VGPR Index Mode 6777 //===----------------------------------------------------------------------===// 6778 6779 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6780 6781 using namespace llvm::AMDGPU::VGPRIndexMode; 6782 6783 if (trySkipToken(AsmToken::RParen)) { 6784 return OFF; 6785 } 6786 6787 int64_t Imm = 0; 6788 6789 while (true) { 6790 unsigned Mode = 0; 6791 SMLoc S = getLoc(); 6792 6793 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6794 if (trySkipId(IdSymbolic[ModeId])) { 6795 Mode = 1 << ModeId; 6796 break; 6797 } 6798 } 6799 6800 if (Mode == 0) { 6801 Error(S, (Imm == 0)? 6802 "expected a VGPR index mode or a closing parenthesis" : 6803 "expected a VGPR index mode"); 6804 return UNDEF; 6805 } 6806 6807 if (Imm & Mode) { 6808 Error(S, "duplicate VGPR index mode"); 6809 return UNDEF; 6810 } 6811 Imm |= Mode; 6812 6813 if (trySkipToken(AsmToken::RParen)) 6814 break; 6815 if (!skipToken(AsmToken::Comma, 6816 "expected a comma or a closing parenthesis")) 6817 return UNDEF; 6818 } 6819 6820 return Imm; 6821 } 6822 6823 OperandMatchResultTy 6824 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6825 6826 using namespace llvm::AMDGPU::VGPRIndexMode; 6827 6828 int64_t Imm = 0; 6829 SMLoc S = getLoc(); 6830 6831 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6832 Imm = parseGPRIdxMacro(); 6833 if (Imm == UNDEF) 6834 return MatchOperand_ParseFail; 6835 } else { 6836 if (getParser().parseAbsoluteExpression(Imm)) 6837 return MatchOperand_ParseFail; 6838 if (Imm < 0 || !isUInt<4>(Imm)) { 6839 Error(S, "invalid immediate: only 4-bit values are legal"); 6840 return MatchOperand_ParseFail; 6841 } 6842 } 6843 6844 Operands.push_back( 6845 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6846 return MatchOperand_Success; 6847 } 6848 6849 bool AMDGPUOperand::isGPRIdxMode() const { 6850 return isImmTy(ImmTyGprIdxMode); 6851 } 6852 6853 //===----------------------------------------------------------------------===// 6854 // sopp branch targets 6855 //===----------------------------------------------------------------------===// 6856 6857 OperandMatchResultTy 6858 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6859 6860 // Make sure we are not parsing something 6861 // that looks like a label or an expression but is not. 6862 // This will improve error messages. 6863 if (isRegister() || isModifier()) 6864 return MatchOperand_NoMatch; 6865 6866 if (!parseExpr(Operands)) 6867 return MatchOperand_ParseFail; 6868 6869 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6870 assert(Opr.isImm() || Opr.isExpr()); 6871 SMLoc Loc = Opr.getStartLoc(); 6872 6873 // Currently we do not support arbitrary expressions as branch targets. 6874 // Only labels and absolute expressions are accepted. 6875 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6876 Error(Loc, "expected an absolute expression or a label"); 6877 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6878 Error(Loc, "expected a 16-bit signed jump offset"); 6879 } 6880 6881 return MatchOperand_Success; 6882 } 6883 6884 //===----------------------------------------------------------------------===// 6885 // Boolean holding registers 6886 //===----------------------------------------------------------------------===// 6887 6888 OperandMatchResultTy 6889 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6890 return parseReg(Operands); 6891 } 6892 6893 //===----------------------------------------------------------------------===// 6894 // mubuf 6895 //===----------------------------------------------------------------------===// 6896 6897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6898 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6899 } 6900 6901 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6902 const OperandVector &Operands, 6903 bool IsAtomic, 6904 bool IsLds) { 6905 bool IsLdsOpcode = IsLds; 6906 bool HasLdsModifier = false; 6907 OptionalImmIndexMap OptionalIdx; 6908 unsigned FirstOperandIdx = 1; 6909 bool IsAtomicReturn = false; 6910 6911 if (IsAtomic) { 6912 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6913 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6914 if (!Op.isCPol()) 6915 continue; 6916 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6917 break; 6918 } 6919 6920 if (!IsAtomicReturn) { 6921 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6922 if (NewOpc != -1) 6923 Inst.setOpcode(NewOpc); 6924 } 6925 6926 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6927 SIInstrFlags::IsAtomicRet; 6928 } 6929 6930 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6931 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6932 6933 // Add the register arguments 6934 if (Op.isReg()) { 6935 Op.addRegOperands(Inst, 1); 6936 // Insert a tied src for atomic return dst. 6937 // This cannot be postponed as subsequent calls to 6938 // addImmOperands rely on correct number of MC operands. 6939 if (IsAtomicReturn && i == FirstOperandIdx) 6940 Op.addRegOperands(Inst, 1); 6941 continue; 6942 } 6943 6944 // Handle the case where soffset is an immediate 6945 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6946 Op.addImmOperands(Inst, 1); 6947 continue; 6948 } 6949 6950 HasLdsModifier |= Op.isLDS(); 6951 6952 // Handle tokens like 'offen' which are sometimes hard-coded into the 6953 // asm string. There are no MCInst operands for these. 6954 if (Op.isToken()) { 6955 continue; 6956 } 6957 assert(Op.isImm()); 6958 6959 // Handle optional arguments 6960 OptionalIdx[Op.getImmTy()] = i; 6961 } 6962 6963 // This is a workaround for an llvm quirk which may result in an 6964 // incorrect instruction selection. Lds and non-lds versions of 6965 // MUBUF instructions are identical except that lds versions 6966 // have mandatory 'lds' modifier. However this modifier follows 6967 // optional modifiers and llvm asm matcher regards this 'lds' 6968 // modifier as an optional one. As a result, an lds version 6969 // of opcode may be selected even if it has no 'lds' modifier. 6970 if (IsLdsOpcode && !HasLdsModifier) { 6971 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6972 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6973 Inst.setOpcode(NoLdsOpcode); 6974 IsLdsOpcode = false; 6975 } 6976 } 6977 6978 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6980 6981 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6983 } 6984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6985 } 6986 6987 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6988 OptionalImmIndexMap OptionalIdx; 6989 6990 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6991 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6992 6993 // Add the register arguments 6994 if (Op.isReg()) { 6995 Op.addRegOperands(Inst, 1); 6996 continue; 6997 } 6998 6999 // Handle the case where soffset is an immediate 7000 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7001 Op.addImmOperands(Inst, 1); 7002 continue; 7003 } 7004 7005 // Handle tokens like 'offen' which are sometimes hard-coded into the 7006 // asm string. There are no MCInst operands for these. 7007 if (Op.isToken()) { 7008 continue; 7009 } 7010 assert(Op.isImm()); 7011 7012 // Handle optional arguments 7013 OptionalIdx[Op.getImmTy()] = i; 7014 } 7015 7016 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7017 AMDGPUOperand::ImmTyOffset); 7018 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7020 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7021 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7022 } 7023 7024 //===----------------------------------------------------------------------===// 7025 // mimg 7026 //===----------------------------------------------------------------------===// 7027 7028 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7029 bool IsAtomic) { 7030 unsigned I = 1; 7031 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7032 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7033 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7034 } 7035 7036 if (IsAtomic) { 7037 // Add src, same as dst 7038 assert(Desc.getNumDefs() == 1); 7039 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7040 } 7041 7042 OptionalImmIndexMap OptionalIdx; 7043 7044 for (unsigned E = Operands.size(); I != E; ++I) { 7045 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7046 7047 // Add the register arguments 7048 if (Op.isReg()) { 7049 Op.addRegOperands(Inst, 1); 7050 } else if (Op.isImmModifier()) { 7051 OptionalIdx[Op.getImmTy()] = I; 7052 } else if (!Op.isToken()) { 7053 llvm_unreachable("unexpected operand type"); 7054 } 7055 } 7056 7057 bool IsGFX10Plus = isGFX10Plus(); 7058 7059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7060 if (IsGFX10Plus) 7061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7065 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7067 if (IsGFX10Plus) 7068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7070 if (!IsGFX10Plus) 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7073 } 7074 7075 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7076 cvtMIMG(Inst, Operands, true); 7077 } 7078 7079 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7080 OptionalImmIndexMap OptionalIdx; 7081 bool IsAtomicReturn = false; 7082 7083 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7084 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7085 if (!Op.isCPol()) 7086 continue; 7087 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7088 break; 7089 } 7090 7091 if (!IsAtomicReturn) { 7092 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7093 if (NewOpc != -1) 7094 Inst.setOpcode(NewOpc); 7095 } 7096 7097 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7098 SIInstrFlags::IsAtomicRet; 7099 7100 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7101 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7102 7103 // Add the register arguments 7104 if (Op.isReg()) { 7105 Op.addRegOperands(Inst, 1); 7106 if (IsAtomicReturn && i == 1) 7107 Op.addRegOperands(Inst, 1); 7108 continue; 7109 } 7110 7111 // Handle the case where soffset is an immediate 7112 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7113 Op.addImmOperands(Inst, 1); 7114 continue; 7115 } 7116 7117 // Handle tokens like 'offen' which are sometimes hard-coded into the 7118 // asm string. There are no MCInst operands for these. 7119 if (Op.isToken()) { 7120 continue; 7121 } 7122 assert(Op.isImm()); 7123 7124 // Handle optional arguments 7125 OptionalIdx[Op.getImmTy()] = i; 7126 } 7127 7128 if ((int)Inst.getNumOperands() <= 7129 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7130 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7132 } 7133 7134 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7135 const OperandVector &Operands) { 7136 for (unsigned I = 1; I < Operands.size(); ++I) { 7137 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7138 if (Operand.isReg()) 7139 Operand.addRegOperands(Inst, 1); 7140 } 7141 7142 Inst.addOperand(MCOperand::createImm(1)); // a16 7143 } 7144 7145 //===----------------------------------------------------------------------===// 7146 // smrd 7147 //===----------------------------------------------------------------------===// 7148 7149 bool AMDGPUOperand::isSMRDOffset8() const { 7150 return isImm() && isUInt<8>(getImm()); 7151 } 7152 7153 bool AMDGPUOperand::isSMEMOffset() const { 7154 return isImm(); // Offset range is checked later by validator. 7155 } 7156 7157 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7158 // 32-bit literals are only supported on CI and we only want to use them 7159 // when the offset is > 8-bits. 7160 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7161 } 7162 7163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7164 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7165 } 7166 7167 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7168 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7169 } 7170 7171 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7172 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7173 } 7174 7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7176 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7177 } 7178 7179 //===----------------------------------------------------------------------===// 7180 // vop3 7181 //===----------------------------------------------------------------------===// 7182 7183 static bool ConvertOmodMul(int64_t &Mul) { 7184 if (Mul != 1 && Mul != 2 && Mul != 4) 7185 return false; 7186 7187 Mul >>= 1; 7188 return true; 7189 } 7190 7191 static bool ConvertOmodDiv(int64_t &Div) { 7192 if (Div == 1) { 7193 Div = 0; 7194 return true; 7195 } 7196 7197 if (Div == 2) { 7198 Div = 3; 7199 return true; 7200 } 7201 7202 return false; 7203 } 7204 7205 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7206 // This is intentional and ensures compatibility with sp3. 7207 // See bug 35397 for details. 7208 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7209 if (BoundCtrl == 0 || BoundCtrl == 1) { 7210 BoundCtrl = 1; 7211 return true; 7212 } 7213 return false; 7214 } 7215 7216 // Note: the order in this table matches the order of operands in AsmString. 7217 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7218 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7219 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7220 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7221 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7222 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7223 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7224 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7225 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7226 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7227 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7228 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7229 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7230 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7231 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7232 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7233 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7234 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7235 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7236 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7237 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7238 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7239 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7240 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7241 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7242 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7243 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7244 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7245 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7246 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7247 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7248 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7249 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7250 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7251 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7252 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7253 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7254 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7255 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7256 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7257 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7258 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7259 }; 7260 7261 void AMDGPUAsmParser::onBeginOfFile() { 7262 if (!getParser().getStreamer().getTargetStreamer() || 7263 getSTI().getTargetTriple().getArch() == Triple::r600) 7264 return; 7265 7266 if (!getTargetStreamer().getTargetID()) 7267 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7268 7269 if (isHsaAbiVersion3Or4(&getSTI())) 7270 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7271 } 7272 7273 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7274 7275 OperandMatchResultTy res = parseOptionalOpr(Operands); 7276 7277 // This is a hack to enable hardcoded mandatory operands which follow 7278 // optional operands. 7279 // 7280 // Current design assumes that all operands after the first optional operand 7281 // are also optional. However implementation of some instructions violates 7282 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7283 // 7284 // To alleviate this problem, we have to (implicitly) parse extra operands 7285 // to make sure autogenerated parser of custom operands never hit hardcoded 7286 // mandatory operands. 7287 7288 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7289 if (res != MatchOperand_Success || 7290 isToken(AsmToken::EndOfStatement)) 7291 break; 7292 7293 trySkipToken(AsmToken::Comma); 7294 res = parseOptionalOpr(Operands); 7295 } 7296 7297 return res; 7298 } 7299 7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7301 OperandMatchResultTy res; 7302 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7303 // try to parse any optional operand here 7304 if (Op.IsBit) { 7305 res = parseNamedBit(Op.Name, Operands, Op.Type); 7306 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7307 res = parseOModOperand(Operands); 7308 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7309 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7310 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7311 res = parseSDWASel(Operands, Op.Name, Op.Type); 7312 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7313 res = parseSDWADstUnused(Operands); 7314 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7315 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7316 Op.Type == AMDGPUOperand::ImmTyNegLo || 7317 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7318 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7319 Op.ConvertResult); 7320 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7321 res = parseDim(Operands); 7322 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7323 res = parseCPol(Operands); 7324 } else { 7325 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7326 } 7327 if (res != MatchOperand_NoMatch) { 7328 return res; 7329 } 7330 } 7331 return MatchOperand_NoMatch; 7332 } 7333 7334 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7335 StringRef Name = getTokenStr(); 7336 if (Name == "mul") { 7337 return parseIntWithPrefix("mul", Operands, 7338 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7339 } 7340 7341 if (Name == "div") { 7342 return parseIntWithPrefix("div", Operands, 7343 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7344 } 7345 7346 return MatchOperand_NoMatch; 7347 } 7348 7349 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7350 cvtVOP3P(Inst, Operands); 7351 7352 int Opc = Inst.getOpcode(); 7353 7354 int SrcNum; 7355 const int Ops[] = { AMDGPU::OpName::src0, 7356 AMDGPU::OpName::src1, 7357 AMDGPU::OpName::src2 }; 7358 for (SrcNum = 0; 7359 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7360 ++SrcNum); 7361 assert(SrcNum > 0); 7362 7363 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7364 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7365 7366 if ((OpSel & (1 << SrcNum)) != 0) { 7367 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7368 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7369 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7370 } 7371 } 7372 7373 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7374 // 1. This operand is input modifiers 7375 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7376 // 2. This is not last operand 7377 && Desc.NumOperands > (OpNum + 1) 7378 // 3. Next operand is register class 7379 && Desc.OpInfo[OpNum + 1].RegClass != -1 7380 // 4. Next register is not tied to any other operand 7381 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7382 } 7383 7384 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7385 { 7386 OptionalImmIndexMap OptionalIdx; 7387 unsigned Opc = Inst.getOpcode(); 7388 7389 unsigned I = 1; 7390 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7391 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7392 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7393 } 7394 7395 for (unsigned E = Operands.size(); I != E; ++I) { 7396 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7397 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7398 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7399 } else if (Op.isInterpSlot() || 7400 Op.isInterpAttr() || 7401 Op.isAttrChan()) { 7402 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7403 } else if (Op.isImmModifier()) { 7404 OptionalIdx[Op.getImmTy()] = I; 7405 } else { 7406 llvm_unreachable("unhandled operand type"); 7407 } 7408 } 7409 7410 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7412 } 7413 7414 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7416 } 7417 7418 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7420 } 7421 } 7422 7423 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7424 OptionalImmIndexMap &OptionalIdx) { 7425 unsigned Opc = Inst.getOpcode(); 7426 7427 unsigned I = 1; 7428 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7429 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7430 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7431 } 7432 7433 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7434 // This instruction has src modifiers 7435 for (unsigned E = Operands.size(); I != E; ++I) { 7436 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7437 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7438 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7439 } else if (Op.isImmModifier()) { 7440 OptionalIdx[Op.getImmTy()] = I; 7441 } else if (Op.isRegOrImm()) { 7442 Op.addRegOrImmOperands(Inst, 1); 7443 } else { 7444 llvm_unreachable("unhandled operand type"); 7445 } 7446 } 7447 } else { 7448 // No src modifiers 7449 for (unsigned E = Operands.size(); I != E; ++I) { 7450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7451 if (Op.isMod()) { 7452 OptionalIdx[Op.getImmTy()] = I; 7453 } else { 7454 Op.addRegOrImmOperands(Inst, 1); 7455 } 7456 } 7457 } 7458 7459 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7461 } 7462 7463 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7464 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7465 } 7466 7467 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7468 // it has src2 register operand that is tied to dst operand 7469 // we don't allow modifiers for this operand in assembler so src2_modifiers 7470 // should be 0. 7471 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7472 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7473 Opc == AMDGPU::V_MAC_F32_e64_vi || 7474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7475 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7476 Opc == AMDGPU::V_MAC_F16_e64_vi || 7477 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7478 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7479 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7480 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7481 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7482 auto it = Inst.begin(); 7483 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7484 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7485 ++it; 7486 // Copy the operand to ensure it's not invalidated when Inst grows. 7487 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7488 } 7489 } 7490 7491 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7492 OptionalImmIndexMap OptionalIdx; 7493 cvtVOP3(Inst, Operands, OptionalIdx); 7494 } 7495 7496 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7497 OptionalImmIndexMap &OptIdx) { 7498 const int Opc = Inst.getOpcode(); 7499 const MCInstrDesc &Desc = MII.get(Opc); 7500 7501 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7502 7503 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7504 assert(!IsPacked); 7505 Inst.addOperand(Inst.getOperand(0)); 7506 } 7507 7508 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7509 // instruction, and then figure out where to actually put the modifiers 7510 7511 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7512 if (OpSelIdx != -1) { 7513 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7514 } 7515 7516 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7517 if (OpSelHiIdx != -1) { 7518 int DefaultVal = IsPacked ? -1 : 0; 7519 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7520 DefaultVal); 7521 } 7522 7523 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7524 if (NegLoIdx != -1) { 7525 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7526 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7527 } 7528 7529 const int Ops[] = { AMDGPU::OpName::src0, 7530 AMDGPU::OpName::src1, 7531 AMDGPU::OpName::src2 }; 7532 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7533 AMDGPU::OpName::src1_modifiers, 7534 AMDGPU::OpName::src2_modifiers }; 7535 7536 unsigned OpSel = 0; 7537 unsigned OpSelHi = 0; 7538 unsigned NegLo = 0; 7539 unsigned NegHi = 0; 7540 7541 if (OpSelIdx != -1) 7542 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7543 7544 if (OpSelHiIdx != -1) 7545 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7546 7547 if (NegLoIdx != -1) { 7548 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7549 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7550 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7551 } 7552 7553 for (int J = 0; J < 3; ++J) { 7554 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7555 if (OpIdx == -1) 7556 break; 7557 7558 uint32_t ModVal = 0; 7559 7560 if ((OpSel & (1 << J)) != 0) 7561 ModVal |= SISrcMods::OP_SEL_0; 7562 7563 if ((OpSelHi & (1 << J)) != 0) 7564 ModVal |= SISrcMods::OP_SEL_1; 7565 7566 if ((NegLo & (1 << J)) != 0) 7567 ModVal |= SISrcMods::NEG; 7568 7569 if ((NegHi & (1 << J)) != 0) 7570 ModVal |= SISrcMods::NEG_HI; 7571 7572 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7573 7574 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7575 } 7576 } 7577 7578 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7579 OptionalImmIndexMap OptIdx; 7580 cvtVOP3(Inst, Operands, OptIdx); 7581 cvtVOP3P(Inst, Operands, OptIdx); 7582 } 7583 7584 //===----------------------------------------------------------------------===// 7585 // dpp 7586 //===----------------------------------------------------------------------===// 7587 7588 bool AMDGPUOperand::isDPP8() const { 7589 return isImmTy(ImmTyDPP8); 7590 } 7591 7592 bool AMDGPUOperand::isDPPCtrl() const { 7593 using namespace AMDGPU::DPP; 7594 7595 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7596 if (result) { 7597 int64_t Imm = getImm(); 7598 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7599 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7600 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7601 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7602 (Imm == DppCtrl::WAVE_SHL1) || 7603 (Imm == DppCtrl::WAVE_ROL1) || 7604 (Imm == DppCtrl::WAVE_SHR1) || 7605 (Imm == DppCtrl::WAVE_ROR1) || 7606 (Imm == DppCtrl::ROW_MIRROR) || 7607 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7608 (Imm == DppCtrl::BCAST15) || 7609 (Imm == DppCtrl::BCAST31) || 7610 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7611 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7612 } 7613 return false; 7614 } 7615 7616 //===----------------------------------------------------------------------===// 7617 // mAI 7618 //===----------------------------------------------------------------------===// 7619 7620 bool AMDGPUOperand::isBLGP() const { 7621 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7622 } 7623 7624 bool AMDGPUOperand::isCBSZ() const { 7625 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7626 } 7627 7628 bool AMDGPUOperand::isABID() const { 7629 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7630 } 7631 7632 bool AMDGPUOperand::isS16Imm() const { 7633 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7634 } 7635 7636 bool AMDGPUOperand::isU16Imm() const { 7637 return isImm() && isUInt<16>(getImm()); 7638 } 7639 7640 //===----------------------------------------------------------------------===// 7641 // dim 7642 //===----------------------------------------------------------------------===// 7643 7644 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7645 // We want to allow "dim:1D" etc., 7646 // but the initial 1 is tokenized as an integer. 7647 std::string Token; 7648 if (isToken(AsmToken::Integer)) { 7649 SMLoc Loc = getToken().getEndLoc(); 7650 Token = std::string(getTokenStr()); 7651 lex(); 7652 if (getLoc() != Loc) 7653 return false; 7654 } 7655 7656 StringRef Suffix; 7657 if (!parseId(Suffix)) 7658 return false; 7659 Token += Suffix; 7660 7661 StringRef DimId = Token; 7662 if (DimId.startswith("SQ_RSRC_IMG_")) 7663 DimId = DimId.drop_front(12); 7664 7665 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7666 if (!DimInfo) 7667 return false; 7668 7669 Encoding = DimInfo->Encoding; 7670 return true; 7671 } 7672 7673 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7674 if (!isGFX10Plus()) 7675 return MatchOperand_NoMatch; 7676 7677 SMLoc S = getLoc(); 7678 7679 if (!trySkipId("dim", AsmToken::Colon)) 7680 return MatchOperand_NoMatch; 7681 7682 unsigned Encoding; 7683 SMLoc Loc = getLoc(); 7684 if (!parseDimId(Encoding)) { 7685 Error(Loc, "invalid dim value"); 7686 return MatchOperand_ParseFail; 7687 } 7688 7689 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7690 AMDGPUOperand::ImmTyDim)); 7691 return MatchOperand_Success; 7692 } 7693 7694 //===----------------------------------------------------------------------===// 7695 // dpp 7696 //===----------------------------------------------------------------------===// 7697 7698 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7699 SMLoc S = getLoc(); 7700 7701 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7702 return MatchOperand_NoMatch; 7703 7704 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7705 7706 int64_t Sels[8]; 7707 7708 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7709 return MatchOperand_ParseFail; 7710 7711 for (size_t i = 0; i < 8; ++i) { 7712 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7713 return MatchOperand_ParseFail; 7714 7715 SMLoc Loc = getLoc(); 7716 if (getParser().parseAbsoluteExpression(Sels[i])) 7717 return MatchOperand_ParseFail; 7718 if (0 > Sels[i] || 7 < Sels[i]) { 7719 Error(Loc, "expected a 3-bit value"); 7720 return MatchOperand_ParseFail; 7721 } 7722 } 7723 7724 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7725 return MatchOperand_ParseFail; 7726 7727 unsigned DPP8 = 0; 7728 for (size_t i = 0; i < 8; ++i) 7729 DPP8 |= (Sels[i] << (i * 3)); 7730 7731 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7732 return MatchOperand_Success; 7733 } 7734 7735 bool 7736 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7737 const OperandVector &Operands) { 7738 if (Ctrl == "row_newbcast") 7739 return isGFX90A(); 7740 7741 // DPP64 is supported for row_newbcast only. 7742 const MCRegisterInfo *MRI = getMRI(); 7743 if (Operands.size() > 2 && Operands[1]->isReg() && 7744 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7745 return false; 7746 7747 if (Ctrl == "row_share" || 7748 Ctrl == "row_xmask") 7749 return isGFX10Plus(); 7750 7751 if (Ctrl == "wave_shl" || 7752 Ctrl == "wave_shr" || 7753 Ctrl == "wave_rol" || 7754 Ctrl == "wave_ror" || 7755 Ctrl == "row_bcast") 7756 return isVI() || isGFX9(); 7757 7758 return Ctrl == "row_mirror" || 7759 Ctrl == "row_half_mirror" || 7760 Ctrl == "quad_perm" || 7761 Ctrl == "row_shl" || 7762 Ctrl == "row_shr" || 7763 Ctrl == "row_ror"; 7764 } 7765 7766 int64_t 7767 AMDGPUAsmParser::parseDPPCtrlPerm() { 7768 // quad_perm:[%d,%d,%d,%d] 7769 7770 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7771 return -1; 7772 7773 int64_t Val = 0; 7774 for (int i = 0; i < 4; ++i) { 7775 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7776 return -1; 7777 7778 int64_t Temp; 7779 SMLoc Loc = getLoc(); 7780 if (getParser().parseAbsoluteExpression(Temp)) 7781 return -1; 7782 if (Temp < 0 || Temp > 3) { 7783 Error(Loc, "expected a 2-bit value"); 7784 return -1; 7785 } 7786 7787 Val += (Temp << i * 2); 7788 } 7789 7790 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7791 return -1; 7792 7793 return Val; 7794 } 7795 7796 int64_t 7797 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7798 using namespace AMDGPU::DPP; 7799 7800 // sel:%d 7801 7802 int64_t Val; 7803 SMLoc Loc = getLoc(); 7804 7805 if (getParser().parseAbsoluteExpression(Val)) 7806 return -1; 7807 7808 struct DppCtrlCheck { 7809 int64_t Ctrl; 7810 int Lo; 7811 int Hi; 7812 }; 7813 7814 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7815 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7816 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7817 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7818 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7819 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7820 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7821 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7822 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7823 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7824 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7825 .Default({-1, 0, 0}); 7826 7827 bool Valid; 7828 if (Check.Ctrl == -1) { 7829 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7830 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7831 } else { 7832 Valid = Check.Lo <= Val && Val <= Check.Hi; 7833 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7834 } 7835 7836 if (!Valid) { 7837 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7838 return -1; 7839 } 7840 7841 return Val; 7842 } 7843 7844 OperandMatchResultTy 7845 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7846 using namespace AMDGPU::DPP; 7847 7848 if (!isToken(AsmToken::Identifier) || 7849 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7850 return MatchOperand_NoMatch; 7851 7852 SMLoc S = getLoc(); 7853 int64_t Val = -1; 7854 StringRef Ctrl; 7855 7856 parseId(Ctrl); 7857 7858 if (Ctrl == "row_mirror") { 7859 Val = DppCtrl::ROW_MIRROR; 7860 } else if (Ctrl == "row_half_mirror") { 7861 Val = DppCtrl::ROW_HALF_MIRROR; 7862 } else { 7863 if (skipToken(AsmToken::Colon, "expected a colon")) { 7864 if (Ctrl == "quad_perm") { 7865 Val = parseDPPCtrlPerm(); 7866 } else { 7867 Val = parseDPPCtrlSel(Ctrl); 7868 } 7869 } 7870 } 7871 7872 if (Val == -1) 7873 return MatchOperand_ParseFail; 7874 7875 Operands.push_back( 7876 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7877 return MatchOperand_Success; 7878 } 7879 7880 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7881 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7882 } 7883 7884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7885 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7886 } 7887 7888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7889 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7890 } 7891 7892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7893 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7894 } 7895 7896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7897 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7898 } 7899 7900 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7901 OptionalImmIndexMap OptionalIdx; 7902 7903 unsigned I = 1; 7904 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7905 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7906 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7907 } 7908 7909 int Fi = 0; 7910 for (unsigned E = Operands.size(); I != E; ++I) { 7911 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7912 MCOI::TIED_TO); 7913 if (TiedTo != -1) { 7914 assert((unsigned)TiedTo < Inst.getNumOperands()); 7915 // handle tied old or src2 for MAC instructions 7916 Inst.addOperand(Inst.getOperand(TiedTo)); 7917 } 7918 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7919 // Add the register arguments 7920 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7921 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7922 // Skip it. 7923 continue; 7924 } 7925 7926 if (IsDPP8) { 7927 if (Op.isDPP8()) { 7928 Op.addImmOperands(Inst, 1); 7929 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7930 Op.addRegWithFPInputModsOperands(Inst, 2); 7931 } else if (Op.isFI()) { 7932 Fi = Op.getImm(); 7933 } else if (Op.isReg()) { 7934 Op.addRegOperands(Inst, 1); 7935 } else { 7936 llvm_unreachable("Invalid operand type"); 7937 } 7938 } else { 7939 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7940 Op.addRegWithFPInputModsOperands(Inst, 2); 7941 } else if (Op.isDPPCtrl()) { 7942 Op.addImmOperands(Inst, 1); 7943 } else if (Op.isImm()) { 7944 // Handle optional arguments 7945 OptionalIdx[Op.getImmTy()] = I; 7946 } else { 7947 llvm_unreachable("Invalid operand type"); 7948 } 7949 } 7950 } 7951 7952 if (IsDPP8) { 7953 using namespace llvm::AMDGPU::DPP; 7954 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7955 } else { 7956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7959 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7961 } 7962 } 7963 } 7964 7965 //===----------------------------------------------------------------------===// 7966 // sdwa 7967 //===----------------------------------------------------------------------===// 7968 7969 OperandMatchResultTy 7970 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7971 AMDGPUOperand::ImmTy Type) { 7972 using namespace llvm::AMDGPU::SDWA; 7973 7974 SMLoc S = getLoc(); 7975 StringRef Value; 7976 OperandMatchResultTy res; 7977 7978 SMLoc StringLoc; 7979 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7980 if (res != MatchOperand_Success) { 7981 return res; 7982 } 7983 7984 int64_t Int; 7985 Int = StringSwitch<int64_t>(Value) 7986 .Case("BYTE_0", SdwaSel::BYTE_0) 7987 .Case("BYTE_1", SdwaSel::BYTE_1) 7988 .Case("BYTE_2", SdwaSel::BYTE_2) 7989 .Case("BYTE_3", SdwaSel::BYTE_3) 7990 .Case("WORD_0", SdwaSel::WORD_0) 7991 .Case("WORD_1", SdwaSel::WORD_1) 7992 .Case("DWORD", SdwaSel::DWORD) 7993 .Default(0xffffffff); 7994 7995 if (Int == 0xffffffff) { 7996 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7997 return MatchOperand_ParseFail; 7998 } 7999 8000 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8001 return MatchOperand_Success; 8002 } 8003 8004 OperandMatchResultTy 8005 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8006 using namespace llvm::AMDGPU::SDWA; 8007 8008 SMLoc S = getLoc(); 8009 StringRef Value; 8010 OperandMatchResultTy res; 8011 8012 SMLoc StringLoc; 8013 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8014 if (res != MatchOperand_Success) { 8015 return res; 8016 } 8017 8018 int64_t Int; 8019 Int = StringSwitch<int64_t>(Value) 8020 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8021 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8022 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8023 .Default(0xffffffff); 8024 8025 if (Int == 0xffffffff) { 8026 Error(StringLoc, "invalid dst_unused value"); 8027 return MatchOperand_ParseFail; 8028 } 8029 8030 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8031 return MatchOperand_Success; 8032 } 8033 8034 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8035 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8036 } 8037 8038 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8039 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8040 } 8041 8042 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8043 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8044 } 8045 8046 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8047 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8048 } 8049 8050 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8051 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8052 } 8053 8054 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8055 uint64_t BasicInstType, 8056 bool SkipDstVcc, 8057 bool SkipSrcVcc) { 8058 using namespace llvm::AMDGPU::SDWA; 8059 8060 OptionalImmIndexMap OptionalIdx; 8061 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8062 bool SkippedVcc = false; 8063 8064 unsigned I = 1; 8065 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8066 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8067 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8068 } 8069 8070 for (unsigned E = Operands.size(); I != E; ++I) { 8071 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8072 if (SkipVcc && !SkippedVcc && Op.isReg() && 8073 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8074 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8075 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8076 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8077 // Skip VCC only if we didn't skip it on previous iteration. 8078 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8079 if (BasicInstType == SIInstrFlags::VOP2 && 8080 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8081 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8082 SkippedVcc = true; 8083 continue; 8084 } else if (BasicInstType == SIInstrFlags::VOPC && 8085 Inst.getNumOperands() == 0) { 8086 SkippedVcc = true; 8087 continue; 8088 } 8089 } 8090 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8091 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8092 } else if (Op.isImm()) { 8093 // Handle optional arguments 8094 OptionalIdx[Op.getImmTy()] = I; 8095 } else { 8096 llvm_unreachable("Invalid operand type"); 8097 } 8098 SkippedVcc = false; 8099 } 8100 8101 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8102 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8103 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8104 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8105 switch (BasicInstType) { 8106 case SIInstrFlags::VOP1: 8107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8108 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8110 } 8111 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8114 break; 8115 8116 case SIInstrFlags::VOP2: 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8118 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8120 } 8121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8125 break; 8126 8127 case SIInstrFlags::VOPC: 8128 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8130 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8132 break; 8133 8134 default: 8135 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8136 } 8137 } 8138 8139 // special case v_mac_{f16, f32}: 8140 // it has src2 register operand that is tied to dst operand 8141 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8142 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8143 auto it = Inst.begin(); 8144 std::advance( 8145 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8146 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8147 } 8148 } 8149 8150 //===----------------------------------------------------------------------===// 8151 // mAI 8152 //===----------------------------------------------------------------------===// 8153 8154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8155 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8156 } 8157 8158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8159 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8160 } 8161 8162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8163 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8164 } 8165 8166 /// Force static initialization. 8167 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8168 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8169 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8170 } 8171 8172 #define GET_REGISTER_MATCHER 8173 #define GET_MATCHER_IMPLEMENTATION 8174 #define GET_MNEMONIC_SPELL_CHECKER 8175 #define GET_MNEMONIC_CHECKER 8176 #include "AMDGPUGenAsmMatcher.inc" 8177 8178 // This fuction should be defined after auto-generated include so that we have 8179 // MatchClassKind enum defined 8180 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8181 unsigned Kind) { 8182 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8183 // But MatchInstructionImpl() expects to meet token and fails to validate 8184 // operand. This method checks if we are given immediate operand but expect to 8185 // get corresponding token. 8186 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8187 switch (Kind) { 8188 case MCK_addr64: 8189 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8190 case MCK_gds: 8191 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8192 case MCK_lds: 8193 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8194 case MCK_idxen: 8195 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8196 case MCK_offen: 8197 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8198 case MCK_SSrcB32: 8199 // When operands have expression values, they will return true for isToken, 8200 // because it is not possible to distinguish between a token and an 8201 // expression at parse time. MatchInstructionImpl() will always try to 8202 // match an operand as a token, when isToken returns true, and when the 8203 // name of the expression is not a valid token, the match will fail, 8204 // so we need to handle it here. 8205 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8206 case MCK_SSrcF32: 8207 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8208 case MCK_SoppBrTarget: 8209 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8210 case MCK_VReg32OrOff: 8211 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8212 case MCK_InterpSlot: 8213 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8214 case MCK_Attr: 8215 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8216 case MCK_AttrChan: 8217 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8218 case MCK_ImmSMEMOffset: 8219 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8220 case MCK_SReg_64: 8221 case MCK_SReg_64_XEXEC: 8222 // Null is defined as a 32-bit register but 8223 // it should also be enabled with 64-bit operands. 8224 // The following code enables it for SReg_64 operands 8225 // used as source and destination. Remaining source 8226 // operands are handled in isInlinableImm. 8227 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8228 default: 8229 return Match_InvalidOperand; 8230 } 8231 } 8232 8233 //===----------------------------------------------------------------------===// 8234 // endpgm 8235 //===----------------------------------------------------------------------===// 8236 8237 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8238 SMLoc S = getLoc(); 8239 int64_t Imm = 0; 8240 8241 if (!parseExpr(Imm)) { 8242 // The operand is optional, if not present default to 0 8243 Imm = 0; 8244 } 8245 8246 if (!isUInt<16>(Imm)) { 8247 Error(S, "expected a 16-bit value"); 8248 return MatchOperand_ParseFail; 8249 } 8250 8251 Operands.push_back( 8252 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8253 return MatchOperand_Success; 8254 } 8255 8256 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8257