1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1216 bool ParseDirectiveAMDGPUHsaKernel(); 1217 1218 bool ParseDirectiveISAVersion(); 1219 bool ParseDirectiveHSAMetadata(); 1220 bool ParseDirectivePALMetadataBegin(); 1221 bool ParseDirectivePALMetadata(); 1222 bool ParseDirectiveAMDGPULDS(); 1223 1224 /// Common code to parse out a block of text (typically YAML) between start and 1225 /// end directives. 1226 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1227 const char *AssemblerDirectiveEnd, 1228 std::string &CollectString); 1229 1230 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1231 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1232 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1233 unsigned &RegNum, unsigned &RegWidth, 1234 bool RestoreOnFailure = false); 1235 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1236 unsigned &RegNum, unsigned &RegWidth, 1237 SmallVectorImpl<AsmToken> &Tokens); 1238 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1239 unsigned &RegWidth, 1240 SmallVectorImpl<AsmToken> &Tokens); 1241 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1242 unsigned &RegWidth, 1243 SmallVectorImpl<AsmToken> &Tokens); 1244 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1245 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1246 bool ParseRegRange(unsigned& Num, unsigned& Width); 1247 unsigned getRegularReg(RegisterKind RegKind, 1248 unsigned RegNum, 1249 unsigned RegWidth, 1250 SMLoc Loc); 1251 1252 bool isRegister(); 1253 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1254 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1255 void initializeGprCountSymbol(RegisterKind RegKind); 1256 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1257 unsigned RegWidth); 1258 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1259 bool IsAtomic, bool IsLds = false); 1260 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1261 bool IsGdsHardcoded); 1262 1263 public: 1264 enum AMDGPUMatchResultTy { 1265 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1266 }; 1267 enum OperandMode { 1268 OperandMode_Default, 1269 OperandMode_NSA, 1270 }; 1271 1272 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1273 1274 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1275 const MCInstrInfo &MII, 1276 const MCTargetOptions &Options) 1277 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1278 MCAsmParserExtension::Initialize(Parser); 1279 1280 if (getFeatureBits().none()) { 1281 // Set default features. 1282 copySTI().ToggleFeature("southern-islands"); 1283 } 1284 1285 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1286 1287 { 1288 // TODO: make those pre-defined variables read-only. 1289 // Currently there is none suitable machinery in the core llvm-mc for this. 1290 // MCSymbol::isRedefinable is intended for another purpose, and 1291 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1292 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1293 MCContext &Ctx = getContext(); 1294 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1295 MCSymbol *Sym = 1296 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1297 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1298 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1299 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1300 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1301 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1302 } else { 1303 MCSymbol *Sym = 1304 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1305 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1306 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1307 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1308 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1309 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1310 } 1311 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1312 initializeGprCountSymbol(IS_VGPR); 1313 initializeGprCountSymbol(IS_SGPR); 1314 } else 1315 KernelScope.initialize(getContext()); 1316 } 1317 } 1318 1319 bool hasXNACK() const { 1320 return AMDGPU::hasXNACK(getSTI()); 1321 } 1322 1323 bool hasMIMG_R128() const { 1324 return AMDGPU::hasMIMG_R128(getSTI()); 1325 } 1326 1327 bool hasPackedD16() const { 1328 return AMDGPU::hasPackedD16(getSTI()); 1329 } 1330 1331 bool hasGFX10A16() const { 1332 return AMDGPU::hasGFX10A16(getSTI()); 1333 } 1334 1335 bool isSI() const { 1336 return AMDGPU::isSI(getSTI()); 1337 } 1338 1339 bool isCI() const { 1340 return AMDGPU::isCI(getSTI()); 1341 } 1342 1343 bool isVI() const { 1344 return AMDGPU::isVI(getSTI()); 1345 } 1346 1347 bool isGFX9() const { 1348 return AMDGPU::isGFX9(getSTI()); 1349 } 1350 1351 bool isGFX90A() const { 1352 return AMDGPU::isGFX90A(getSTI()); 1353 } 1354 1355 bool isGFX9Plus() const { 1356 return AMDGPU::isGFX9Plus(getSTI()); 1357 } 1358 1359 bool isGFX10() const { 1360 return AMDGPU::isGFX10(getSTI()); 1361 } 1362 1363 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1364 1365 bool isGFX10_BEncoding() const { 1366 return AMDGPU::isGFX10_BEncoding(getSTI()); 1367 } 1368 1369 bool hasInv2PiInlineImm() const { 1370 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1371 } 1372 1373 bool hasFlatOffsets() const { 1374 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1375 } 1376 1377 bool hasSGPR102_SGPR103() const { 1378 return !isVI() && !isGFX9(); 1379 } 1380 1381 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1382 1383 bool hasIntClamp() const { 1384 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1385 } 1386 1387 AMDGPUTargetStreamer &getTargetStreamer() { 1388 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1389 return static_cast<AMDGPUTargetStreamer &>(TS); 1390 } 1391 1392 const MCRegisterInfo *getMRI() const { 1393 // We need this const_cast because for some reason getContext() is not const 1394 // in MCAsmParser. 1395 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1396 } 1397 1398 const MCInstrInfo *getMII() const { 1399 return &MII; 1400 } 1401 1402 const FeatureBitset &getFeatureBits() const { 1403 return getSTI().getFeatureBits(); 1404 } 1405 1406 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1407 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1408 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1409 1410 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1411 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1412 bool isForcedDPP() const { return ForcedDPP; } 1413 bool isForcedSDWA() const { return ForcedSDWA; } 1414 ArrayRef<unsigned> getMatchedVariants() const; 1415 StringRef getMatchedVariantName() const; 1416 1417 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1418 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1419 bool RestoreOnFailure); 1420 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1421 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1422 SMLoc &EndLoc) override; 1423 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1424 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1425 unsigned Kind) override; 1426 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1427 OperandVector &Operands, MCStreamer &Out, 1428 uint64_t &ErrorInfo, 1429 bool MatchingInlineAsm) override; 1430 bool ParseDirective(AsmToken DirectiveID) override; 1431 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1432 OperandMode Mode = OperandMode_Default); 1433 StringRef parseMnemonicSuffix(StringRef Name); 1434 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1435 SMLoc NameLoc, OperandVector &Operands) override; 1436 //bool ProcessInstruction(MCInst &Inst); 1437 1438 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1439 1440 OperandMatchResultTy 1441 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1442 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1443 bool (*ConvertResult)(int64_t &) = nullptr); 1444 1445 OperandMatchResultTy 1446 parseOperandArrayWithPrefix(const char *Prefix, 1447 OperandVector &Operands, 1448 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1449 bool (*ConvertResult)(int64_t&) = nullptr); 1450 1451 OperandMatchResultTy 1452 parseNamedBit(StringRef Name, OperandVector &Operands, 1453 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1454 OperandMatchResultTy parseCPol(OperandVector &Operands); 1455 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1456 StringRef &Value, 1457 SMLoc &StringLoc); 1458 1459 bool isModifier(); 1460 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1462 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1463 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool parseSP3NegModifier(); 1465 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1466 OperandMatchResultTy parseReg(OperandVector &Operands); 1467 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1468 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1469 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1470 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1471 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1472 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1473 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1474 OperandMatchResultTy parseUfmt(int64_t &Format); 1475 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1476 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1477 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1478 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1479 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1480 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1481 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1482 1483 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1484 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1485 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1486 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1487 1488 bool parseCnt(int64_t &IntVal); 1489 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1490 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1491 1492 private: 1493 struct OperandInfoTy { 1494 SMLoc Loc; 1495 int64_t Id; 1496 bool IsSymbolic = false; 1497 bool IsDefined = false; 1498 1499 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1500 }; 1501 1502 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1503 bool validateSendMsg(const OperandInfoTy &Msg, 1504 const OperandInfoTy &Op, 1505 const OperandInfoTy &Stream); 1506 1507 bool parseHwregBody(OperandInfoTy &HwReg, 1508 OperandInfoTy &Offset, 1509 OperandInfoTy &Width); 1510 bool validateHwreg(const OperandInfoTy &HwReg, 1511 const OperandInfoTy &Offset, 1512 const OperandInfoTy &Width); 1513 1514 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1515 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1516 1517 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1518 const OperandVector &Operands) const; 1519 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1520 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1521 SMLoc getLitLoc(const OperandVector &Operands) const; 1522 SMLoc getConstLoc(const OperandVector &Operands) const; 1523 1524 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1525 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1527 bool validateSOPLiteral(const MCInst &Inst) const; 1528 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateIntClampSupported(const MCInst &Inst); 1531 bool validateMIMGAtomicDMask(const MCInst &Inst); 1532 bool validateMIMGGatherDMask(const MCInst &Inst); 1533 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1534 bool validateMIMGDataSize(const MCInst &Inst); 1535 bool validateMIMGAddrSize(const MCInst &Inst); 1536 bool validateMIMGD16(const MCInst &Inst); 1537 bool validateMIMGDim(const MCInst &Inst); 1538 bool validateMIMGMSAA(const MCInst &Inst); 1539 bool validateOpSel(const MCInst &Inst); 1540 bool validateVccOperand(unsigned Reg) const; 1541 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1542 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1543 bool validateAGPRLdSt(const MCInst &Inst) const; 1544 bool validateVGPRAlign(const MCInst &Inst) const; 1545 bool validateDivScale(const MCInst &Inst); 1546 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1547 const SMLoc &IDLoc); 1548 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1549 unsigned getConstantBusLimit(unsigned Opcode) const; 1550 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1551 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1552 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1553 1554 bool isSupportedMnemo(StringRef Mnemo, 1555 const FeatureBitset &FBS); 1556 bool isSupportedMnemo(StringRef Mnemo, 1557 const FeatureBitset &FBS, 1558 ArrayRef<unsigned> Variants); 1559 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1560 1561 bool isId(const StringRef Id) const; 1562 bool isId(const AsmToken &Token, const StringRef Id) const; 1563 bool isToken(const AsmToken::TokenKind Kind) const; 1564 bool trySkipId(const StringRef Id); 1565 bool trySkipId(const StringRef Pref, const StringRef Id); 1566 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1567 bool trySkipToken(const AsmToken::TokenKind Kind); 1568 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1569 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1570 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1571 1572 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1573 AsmToken::TokenKind getTokenKind() const; 1574 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1575 bool parseExpr(OperandVector &Operands); 1576 StringRef getTokenStr() const; 1577 AsmToken peekToken(); 1578 AsmToken getToken() const; 1579 SMLoc getLoc() const; 1580 void lex(); 1581 1582 public: 1583 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1584 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1585 1586 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1587 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1588 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1589 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1590 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1591 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1592 1593 bool parseSwizzleOperand(int64_t &Op, 1594 const unsigned MinVal, 1595 const unsigned MaxVal, 1596 const StringRef ErrMsg, 1597 SMLoc &Loc); 1598 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1599 const unsigned MinVal, 1600 const unsigned MaxVal, 1601 const StringRef ErrMsg); 1602 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1603 bool parseSwizzleOffset(int64_t &Imm); 1604 bool parseSwizzleMacro(int64_t &Imm); 1605 bool parseSwizzleQuadPerm(int64_t &Imm); 1606 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1607 bool parseSwizzleBroadcast(int64_t &Imm); 1608 bool parseSwizzleSwap(int64_t &Imm); 1609 bool parseSwizzleReverse(int64_t &Imm); 1610 1611 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1612 int64_t parseGPRIdxMacro(); 1613 1614 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1615 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1616 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1617 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1618 1619 AMDGPUOperand::Ptr defaultCPol() const; 1620 1621 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1622 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1623 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1624 AMDGPUOperand::Ptr defaultFlatOffset() const; 1625 1626 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1627 1628 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1629 OptionalImmIndexMap &OptionalIdx); 1630 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1631 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1632 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1633 1634 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1635 1636 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1637 bool IsAtomic = false); 1638 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1639 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1640 1641 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1642 1643 bool parseDimId(unsigned &Encoding); 1644 OperandMatchResultTy parseDim(OperandVector &Operands); 1645 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1646 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1647 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1648 int64_t parseDPPCtrlSel(StringRef Ctrl); 1649 int64_t parseDPPCtrlPerm(); 1650 AMDGPUOperand::Ptr defaultRowMask() const; 1651 AMDGPUOperand::Ptr defaultBankMask() const; 1652 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1653 AMDGPUOperand::Ptr defaultFI() const; 1654 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1655 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1656 1657 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1658 AMDGPUOperand::ImmTy Type); 1659 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1660 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1661 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1662 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1663 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1664 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1666 uint64_t BasicInstType, 1667 bool SkipDstVcc = false, 1668 bool SkipSrcVcc = false); 1669 1670 AMDGPUOperand::Ptr defaultBLGP() const; 1671 AMDGPUOperand::Ptr defaultCBSZ() const; 1672 AMDGPUOperand::Ptr defaultABID() const; 1673 1674 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1675 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1676 }; 1677 1678 struct OptionalOperand { 1679 const char *Name; 1680 AMDGPUOperand::ImmTy Type; 1681 bool IsBit; 1682 bool (*ConvertResult)(int64_t&); 1683 }; 1684 1685 } // end anonymous namespace 1686 1687 // May be called with integer type with equivalent bitwidth. 1688 static const fltSemantics *getFltSemantics(unsigned Size) { 1689 switch (Size) { 1690 case 4: 1691 return &APFloat::IEEEsingle(); 1692 case 8: 1693 return &APFloat::IEEEdouble(); 1694 case 2: 1695 return &APFloat::IEEEhalf(); 1696 default: 1697 llvm_unreachable("unsupported fp type"); 1698 } 1699 } 1700 1701 static const fltSemantics *getFltSemantics(MVT VT) { 1702 return getFltSemantics(VT.getSizeInBits() / 8); 1703 } 1704 1705 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1706 switch (OperandType) { 1707 case AMDGPU::OPERAND_REG_IMM_INT32: 1708 case AMDGPU::OPERAND_REG_IMM_FP32: 1709 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1710 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1711 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1712 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1713 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1714 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1715 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1716 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1717 return &APFloat::IEEEsingle(); 1718 case AMDGPU::OPERAND_REG_IMM_INT64: 1719 case AMDGPU::OPERAND_REG_IMM_FP64: 1720 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1721 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1722 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1723 return &APFloat::IEEEdouble(); 1724 case AMDGPU::OPERAND_REG_IMM_INT16: 1725 case AMDGPU::OPERAND_REG_IMM_FP16: 1726 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1734 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1735 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1736 return &APFloat::IEEEhalf(); 1737 default: 1738 llvm_unreachable("unsupported fp type"); 1739 } 1740 } 1741 1742 //===----------------------------------------------------------------------===// 1743 // Operand 1744 //===----------------------------------------------------------------------===// 1745 1746 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1747 bool Lost; 1748 1749 // Convert literal to single precision 1750 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1751 APFloat::rmNearestTiesToEven, 1752 &Lost); 1753 // We allow precision lost but not overflow or underflow 1754 if (Status != APFloat::opOK && 1755 Lost && 1756 ((Status & APFloat::opOverflow) != 0 || 1757 (Status & APFloat::opUnderflow) != 0)) { 1758 return false; 1759 } 1760 1761 return true; 1762 } 1763 1764 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1765 return isUIntN(Size, Val) || isIntN(Size, Val); 1766 } 1767 1768 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1769 if (VT.getScalarType() == MVT::i16) { 1770 // FP immediate values are broken. 1771 return isInlinableIntLiteral(Val); 1772 } 1773 1774 // f16/v2f16 operands work correctly for all values. 1775 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1776 } 1777 1778 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1779 1780 // This is a hack to enable named inline values like 1781 // shared_base with both 32-bit and 64-bit operands. 1782 // Note that these values are defined as 1783 // 32-bit operands only. 1784 if (isInlineValue()) { 1785 return true; 1786 } 1787 1788 if (!isImmTy(ImmTyNone)) { 1789 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1790 return false; 1791 } 1792 // TODO: We should avoid using host float here. It would be better to 1793 // check the float bit values which is what a few other places do. 1794 // We've had bot failures before due to weird NaN support on mips hosts. 1795 1796 APInt Literal(64, Imm.Val); 1797 1798 if (Imm.IsFPImm) { // We got fp literal token 1799 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1800 return AMDGPU::isInlinableLiteral64(Imm.Val, 1801 AsmParser->hasInv2PiInlineImm()); 1802 } 1803 1804 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1805 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1806 return false; 1807 1808 if (type.getScalarSizeInBits() == 16) { 1809 return isInlineableLiteralOp16( 1810 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1811 type, AsmParser->hasInv2PiInlineImm()); 1812 } 1813 1814 // Check if single precision literal is inlinable 1815 return AMDGPU::isInlinableLiteral32( 1816 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1817 AsmParser->hasInv2PiInlineImm()); 1818 } 1819 1820 // We got int literal token. 1821 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1822 return AMDGPU::isInlinableLiteral64(Imm.Val, 1823 AsmParser->hasInv2PiInlineImm()); 1824 } 1825 1826 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1827 return false; 1828 } 1829 1830 if (type.getScalarSizeInBits() == 16) { 1831 return isInlineableLiteralOp16( 1832 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1833 type, AsmParser->hasInv2PiInlineImm()); 1834 } 1835 1836 return AMDGPU::isInlinableLiteral32( 1837 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1838 AsmParser->hasInv2PiInlineImm()); 1839 } 1840 1841 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1842 // Check that this immediate can be added as literal 1843 if (!isImmTy(ImmTyNone)) { 1844 return false; 1845 } 1846 1847 if (!Imm.IsFPImm) { 1848 // We got int literal token. 1849 1850 if (type == MVT::f64 && hasFPModifiers()) { 1851 // Cannot apply fp modifiers to int literals preserving the same semantics 1852 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1853 // disable these cases. 1854 return false; 1855 } 1856 1857 unsigned Size = type.getSizeInBits(); 1858 if (Size == 64) 1859 Size = 32; 1860 1861 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1862 // types. 1863 return isSafeTruncation(Imm.Val, Size); 1864 } 1865 1866 // We got fp literal token 1867 if (type == MVT::f64) { // Expected 64-bit fp operand 1868 // We would set low 64-bits of literal to zeroes but we accept this literals 1869 return true; 1870 } 1871 1872 if (type == MVT::i64) { // Expected 64-bit int operand 1873 // We don't allow fp literals in 64-bit integer instructions. It is 1874 // unclear how we should encode them. 1875 return false; 1876 } 1877 1878 // We allow fp literals with f16x2 operands assuming that the specified 1879 // literal goes into the lower half and the upper half is zero. We also 1880 // require that the literal may be losslesly converted to f16. 1881 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1882 (type == MVT::v2i16)? MVT::i16 : 1883 (type == MVT::v2f32)? MVT::f32 : type; 1884 1885 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1886 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1887 } 1888 1889 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1890 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1891 } 1892 1893 bool AMDGPUOperand::isVRegWithInputMods() const { 1894 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1895 // GFX90A allows DPP on 64-bit operands. 1896 (isRegClass(AMDGPU::VReg_64RegClassID) && 1897 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1898 } 1899 1900 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1901 if (AsmParser->isVI()) 1902 return isVReg32(); 1903 else if (AsmParser->isGFX9Plus()) 1904 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1905 else 1906 return false; 1907 } 1908 1909 bool AMDGPUOperand::isSDWAFP16Operand() const { 1910 return isSDWAOperand(MVT::f16); 1911 } 1912 1913 bool AMDGPUOperand::isSDWAFP32Operand() const { 1914 return isSDWAOperand(MVT::f32); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAInt16Operand() const { 1918 return isSDWAOperand(MVT::i16); 1919 } 1920 1921 bool AMDGPUOperand::isSDWAInt32Operand() const { 1922 return isSDWAOperand(MVT::i32); 1923 } 1924 1925 bool AMDGPUOperand::isBoolReg() const { 1926 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1927 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1928 } 1929 1930 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1931 { 1932 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1933 assert(Size == 2 || Size == 4 || Size == 8); 1934 1935 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1936 1937 if (Imm.Mods.Abs) { 1938 Val &= ~FpSignMask; 1939 } 1940 if (Imm.Mods.Neg) { 1941 Val ^= FpSignMask; 1942 } 1943 1944 return Val; 1945 } 1946 1947 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1948 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1949 Inst.getNumOperands())) { 1950 addLiteralImmOperand(Inst, Imm.Val, 1951 ApplyModifiers & 1952 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1953 } else { 1954 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1955 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1956 setImmKindNone(); 1957 } 1958 } 1959 1960 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1961 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1962 auto OpNum = Inst.getNumOperands(); 1963 // Check that this operand accepts literals 1964 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1965 1966 if (ApplyModifiers) { 1967 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1968 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1969 Val = applyInputFPModifiers(Val, Size); 1970 } 1971 1972 APInt Literal(64, Val); 1973 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1974 1975 if (Imm.IsFPImm) { // We got fp literal token 1976 switch (OpTy) { 1977 case AMDGPU::OPERAND_REG_IMM_INT64: 1978 case AMDGPU::OPERAND_REG_IMM_FP64: 1979 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1980 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1981 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1982 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1983 AsmParser->hasInv2PiInlineImm())) { 1984 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1985 setImmKindConst(); 1986 return; 1987 } 1988 1989 // Non-inlineable 1990 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1991 // For fp operands we check if low 32 bits are zeros 1992 if (Literal.getLoBits(32) != 0) { 1993 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1994 "Can't encode literal as exact 64-bit floating-point operand. " 1995 "Low 32-bits will be set to zero"); 1996 } 1997 1998 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1999 setImmKindLiteral(); 2000 return; 2001 } 2002 2003 // We don't allow fp literals in 64-bit integer instructions. It is 2004 // unclear how we should encode them. This case should be checked earlier 2005 // in predicate methods (isLiteralImm()) 2006 llvm_unreachable("fp literal in 64-bit integer instruction."); 2007 2008 case AMDGPU::OPERAND_REG_IMM_INT32: 2009 case AMDGPU::OPERAND_REG_IMM_FP32: 2010 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2011 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2012 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2013 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2014 case AMDGPU::OPERAND_REG_IMM_INT16: 2015 case AMDGPU::OPERAND_REG_IMM_FP16: 2016 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2017 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2018 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2019 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2020 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2021 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2024 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2025 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2027 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2029 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2030 bool lost; 2031 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2032 // Convert literal to single precision 2033 FPLiteral.convert(*getOpFltSemantics(OpTy), 2034 APFloat::rmNearestTiesToEven, &lost); 2035 // We allow precision lost but not overflow or underflow. This should be 2036 // checked earlier in isLiteralImm() 2037 2038 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2039 Inst.addOperand(MCOperand::createImm(ImmVal)); 2040 setImmKindLiteral(); 2041 return; 2042 } 2043 default: 2044 llvm_unreachable("invalid operand size"); 2045 } 2046 2047 return; 2048 } 2049 2050 // We got int literal token. 2051 // Only sign extend inline immediates. 2052 switch (OpTy) { 2053 case AMDGPU::OPERAND_REG_IMM_INT32: 2054 case AMDGPU::OPERAND_REG_IMM_FP32: 2055 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2056 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2058 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2059 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2060 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2061 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2062 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2063 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2064 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2065 if (isSafeTruncation(Val, 32) && 2066 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2067 AsmParser->hasInv2PiInlineImm())) { 2068 Inst.addOperand(MCOperand::createImm(Val)); 2069 setImmKindConst(); 2070 return; 2071 } 2072 2073 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2074 setImmKindLiteral(); 2075 return; 2076 2077 case AMDGPU::OPERAND_REG_IMM_INT64: 2078 case AMDGPU::OPERAND_REG_IMM_FP64: 2079 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2080 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2082 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2083 Inst.addOperand(MCOperand::createImm(Val)); 2084 setImmKindConst(); 2085 return; 2086 } 2087 2088 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2089 setImmKindLiteral(); 2090 return; 2091 2092 case AMDGPU::OPERAND_REG_IMM_INT16: 2093 case AMDGPU::OPERAND_REG_IMM_FP16: 2094 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2098 if (isSafeTruncation(Val, 16) && 2099 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2100 AsmParser->hasInv2PiInlineImm())) { 2101 Inst.addOperand(MCOperand::createImm(Val)); 2102 setImmKindConst(); 2103 return; 2104 } 2105 2106 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2107 setImmKindLiteral(); 2108 return; 2109 2110 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2111 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2112 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2114 assert(isSafeTruncation(Val, 16)); 2115 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2116 AsmParser->hasInv2PiInlineImm())); 2117 2118 Inst.addOperand(MCOperand::createImm(Val)); 2119 return; 2120 } 2121 default: 2122 llvm_unreachable("invalid operand size"); 2123 } 2124 } 2125 2126 template <unsigned Bitwidth> 2127 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2128 APInt Literal(64, Imm.Val); 2129 setImmKindNone(); 2130 2131 if (!Imm.IsFPImm) { 2132 // We got int literal token. 2133 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2134 return; 2135 } 2136 2137 bool Lost; 2138 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2139 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2140 APFloat::rmNearestTiesToEven, &Lost); 2141 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2142 } 2143 2144 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2145 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2146 } 2147 2148 static bool isInlineValue(unsigned Reg) { 2149 switch (Reg) { 2150 case AMDGPU::SRC_SHARED_BASE: 2151 case AMDGPU::SRC_SHARED_LIMIT: 2152 case AMDGPU::SRC_PRIVATE_BASE: 2153 case AMDGPU::SRC_PRIVATE_LIMIT: 2154 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2155 return true; 2156 case AMDGPU::SRC_VCCZ: 2157 case AMDGPU::SRC_EXECZ: 2158 case AMDGPU::SRC_SCC: 2159 return true; 2160 case AMDGPU::SGPR_NULL: 2161 return true; 2162 default: 2163 return false; 2164 } 2165 } 2166 2167 bool AMDGPUOperand::isInlineValue() const { 2168 return isRegKind() && ::isInlineValue(getReg()); 2169 } 2170 2171 //===----------------------------------------------------------------------===// 2172 // AsmParser 2173 //===----------------------------------------------------------------------===// 2174 2175 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2176 if (Is == IS_VGPR) { 2177 switch (RegWidth) { 2178 default: return -1; 2179 case 1: return AMDGPU::VGPR_32RegClassID; 2180 case 2: return AMDGPU::VReg_64RegClassID; 2181 case 3: return AMDGPU::VReg_96RegClassID; 2182 case 4: return AMDGPU::VReg_128RegClassID; 2183 case 5: return AMDGPU::VReg_160RegClassID; 2184 case 6: return AMDGPU::VReg_192RegClassID; 2185 case 8: return AMDGPU::VReg_256RegClassID; 2186 case 16: return AMDGPU::VReg_512RegClassID; 2187 case 32: return AMDGPU::VReg_1024RegClassID; 2188 } 2189 } else if (Is == IS_TTMP) { 2190 switch (RegWidth) { 2191 default: return -1; 2192 case 1: return AMDGPU::TTMP_32RegClassID; 2193 case 2: return AMDGPU::TTMP_64RegClassID; 2194 case 4: return AMDGPU::TTMP_128RegClassID; 2195 case 8: return AMDGPU::TTMP_256RegClassID; 2196 case 16: return AMDGPU::TTMP_512RegClassID; 2197 } 2198 } else if (Is == IS_SGPR) { 2199 switch (RegWidth) { 2200 default: return -1; 2201 case 1: return AMDGPU::SGPR_32RegClassID; 2202 case 2: return AMDGPU::SGPR_64RegClassID; 2203 case 3: return AMDGPU::SGPR_96RegClassID; 2204 case 4: return AMDGPU::SGPR_128RegClassID; 2205 case 5: return AMDGPU::SGPR_160RegClassID; 2206 case 6: return AMDGPU::SGPR_192RegClassID; 2207 case 8: return AMDGPU::SGPR_256RegClassID; 2208 case 16: return AMDGPU::SGPR_512RegClassID; 2209 } 2210 } else if (Is == IS_AGPR) { 2211 switch (RegWidth) { 2212 default: return -1; 2213 case 1: return AMDGPU::AGPR_32RegClassID; 2214 case 2: return AMDGPU::AReg_64RegClassID; 2215 case 3: return AMDGPU::AReg_96RegClassID; 2216 case 4: return AMDGPU::AReg_128RegClassID; 2217 case 5: return AMDGPU::AReg_160RegClassID; 2218 case 6: return AMDGPU::AReg_192RegClassID; 2219 case 8: return AMDGPU::AReg_256RegClassID; 2220 case 16: return AMDGPU::AReg_512RegClassID; 2221 case 32: return AMDGPU::AReg_1024RegClassID; 2222 } 2223 } 2224 return -1; 2225 } 2226 2227 static unsigned getSpecialRegForName(StringRef RegName) { 2228 return StringSwitch<unsigned>(RegName) 2229 .Case("exec", AMDGPU::EXEC) 2230 .Case("vcc", AMDGPU::VCC) 2231 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2232 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2233 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2234 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2235 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2236 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2237 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2238 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2239 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2240 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2241 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2242 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2243 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2244 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2245 .Case("m0", AMDGPU::M0) 2246 .Case("vccz", AMDGPU::SRC_VCCZ) 2247 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2248 .Case("execz", AMDGPU::SRC_EXECZ) 2249 .Case("src_execz", AMDGPU::SRC_EXECZ) 2250 .Case("scc", AMDGPU::SRC_SCC) 2251 .Case("src_scc", AMDGPU::SRC_SCC) 2252 .Case("tba", AMDGPU::TBA) 2253 .Case("tma", AMDGPU::TMA) 2254 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2255 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2256 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2257 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2258 .Case("vcc_lo", AMDGPU::VCC_LO) 2259 .Case("vcc_hi", AMDGPU::VCC_HI) 2260 .Case("exec_lo", AMDGPU::EXEC_LO) 2261 .Case("exec_hi", AMDGPU::EXEC_HI) 2262 .Case("tma_lo", AMDGPU::TMA_LO) 2263 .Case("tma_hi", AMDGPU::TMA_HI) 2264 .Case("tba_lo", AMDGPU::TBA_LO) 2265 .Case("tba_hi", AMDGPU::TBA_HI) 2266 .Case("pc", AMDGPU::PC_REG) 2267 .Case("null", AMDGPU::SGPR_NULL) 2268 .Default(AMDGPU::NoRegister); 2269 } 2270 2271 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2272 SMLoc &EndLoc, bool RestoreOnFailure) { 2273 auto R = parseRegister(); 2274 if (!R) return true; 2275 assert(R->isReg()); 2276 RegNo = R->getReg(); 2277 StartLoc = R->getStartLoc(); 2278 EndLoc = R->getEndLoc(); 2279 return false; 2280 } 2281 2282 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2283 SMLoc &EndLoc) { 2284 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2285 } 2286 2287 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2288 SMLoc &StartLoc, 2289 SMLoc &EndLoc) { 2290 bool Result = 2291 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2292 bool PendingErrors = getParser().hasPendingError(); 2293 getParser().clearPendingErrors(); 2294 if (PendingErrors) 2295 return MatchOperand_ParseFail; 2296 if (Result) 2297 return MatchOperand_NoMatch; 2298 return MatchOperand_Success; 2299 } 2300 2301 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2302 RegisterKind RegKind, unsigned Reg1, 2303 SMLoc Loc) { 2304 switch (RegKind) { 2305 case IS_SPECIAL: 2306 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2307 Reg = AMDGPU::EXEC; 2308 RegWidth = 2; 2309 return true; 2310 } 2311 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2312 Reg = AMDGPU::FLAT_SCR; 2313 RegWidth = 2; 2314 return true; 2315 } 2316 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2317 Reg = AMDGPU::XNACK_MASK; 2318 RegWidth = 2; 2319 return true; 2320 } 2321 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2322 Reg = AMDGPU::VCC; 2323 RegWidth = 2; 2324 return true; 2325 } 2326 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2327 Reg = AMDGPU::TBA; 2328 RegWidth = 2; 2329 return true; 2330 } 2331 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2332 Reg = AMDGPU::TMA; 2333 RegWidth = 2; 2334 return true; 2335 } 2336 Error(Loc, "register does not fit in the list"); 2337 return false; 2338 case IS_VGPR: 2339 case IS_SGPR: 2340 case IS_AGPR: 2341 case IS_TTMP: 2342 if (Reg1 != Reg + RegWidth) { 2343 Error(Loc, "registers in a list must have consecutive indices"); 2344 return false; 2345 } 2346 RegWidth++; 2347 return true; 2348 default: 2349 llvm_unreachable("unexpected register kind"); 2350 } 2351 } 2352 2353 struct RegInfo { 2354 StringLiteral Name; 2355 RegisterKind Kind; 2356 }; 2357 2358 static constexpr RegInfo RegularRegisters[] = { 2359 {{"v"}, IS_VGPR}, 2360 {{"s"}, IS_SGPR}, 2361 {{"ttmp"}, IS_TTMP}, 2362 {{"acc"}, IS_AGPR}, 2363 {{"a"}, IS_AGPR}, 2364 }; 2365 2366 static bool isRegularReg(RegisterKind Kind) { 2367 return Kind == IS_VGPR || 2368 Kind == IS_SGPR || 2369 Kind == IS_TTMP || 2370 Kind == IS_AGPR; 2371 } 2372 2373 static const RegInfo* getRegularRegInfo(StringRef Str) { 2374 for (const RegInfo &Reg : RegularRegisters) 2375 if (Str.startswith(Reg.Name)) 2376 return &Reg; 2377 return nullptr; 2378 } 2379 2380 static bool getRegNum(StringRef Str, unsigned& Num) { 2381 return !Str.getAsInteger(10, Num); 2382 } 2383 2384 bool 2385 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2386 const AsmToken &NextToken) const { 2387 2388 // A list of consecutive registers: [s0,s1,s2,s3] 2389 if (Token.is(AsmToken::LBrac)) 2390 return true; 2391 2392 if (!Token.is(AsmToken::Identifier)) 2393 return false; 2394 2395 // A single register like s0 or a range of registers like s[0:1] 2396 2397 StringRef Str = Token.getString(); 2398 const RegInfo *Reg = getRegularRegInfo(Str); 2399 if (Reg) { 2400 StringRef RegName = Reg->Name; 2401 StringRef RegSuffix = Str.substr(RegName.size()); 2402 if (!RegSuffix.empty()) { 2403 unsigned Num; 2404 // A single register with an index: rXX 2405 if (getRegNum(RegSuffix, Num)) 2406 return true; 2407 } else { 2408 // A range of registers: r[XX:YY]. 2409 if (NextToken.is(AsmToken::LBrac)) 2410 return true; 2411 } 2412 } 2413 2414 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2415 } 2416 2417 bool 2418 AMDGPUAsmParser::isRegister() 2419 { 2420 return isRegister(getToken(), peekToken()); 2421 } 2422 2423 unsigned 2424 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2425 unsigned RegNum, 2426 unsigned RegWidth, 2427 SMLoc Loc) { 2428 2429 assert(isRegularReg(RegKind)); 2430 2431 unsigned AlignSize = 1; 2432 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2433 // SGPR and TTMP registers must be aligned. 2434 // Max required alignment is 4 dwords. 2435 AlignSize = std::min(RegWidth, 4u); 2436 } 2437 2438 if (RegNum % AlignSize != 0) { 2439 Error(Loc, "invalid register alignment"); 2440 return AMDGPU::NoRegister; 2441 } 2442 2443 unsigned RegIdx = RegNum / AlignSize; 2444 int RCID = getRegClass(RegKind, RegWidth); 2445 if (RCID == -1) { 2446 Error(Loc, "invalid or unsupported register size"); 2447 return AMDGPU::NoRegister; 2448 } 2449 2450 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2451 const MCRegisterClass RC = TRI->getRegClass(RCID); 2452 if (RegIdx >= RC.getNumRegs()) { 2453 Error(Loc, "register index is out of range"); 2454 return AMDGPU::NoRegister; 2455 } 2456 2457 return RC.getRegister(RegIdx); 2458 } 2459 2460 bool 2461 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2462 int64_t RegLo, RegHi; 2463 if (!skipToken(AsmToken::LBrac, "missing register index")) 2464 return false; 2465 2466 SMLoc FirstIdxLoc = getLoc(); 2467 SMLoc SecondIdxLoc; 2468 2469 if (!parseExpr(RegLo)) 2470 return false; 2471 2472 if (trySkipToken(AsmToken::Colon)) { 2473 SecondIdxLoc = getLoc(); 2474 if (!parseExpr(RegHi)) 2475 return false; 2476 } else { 2477 RegHi = RegLo; 2478 } 2479 2480 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2481 return false; 2482 2483 if (!isUInt<32>(RegLo)) { 2484 Error(FirstIdxLoc, "invalid register index"); 2485 return false; 2486 } 2487 2488 if (!isUInt<32>(RegHi)) { 2489 Error(SecondIdxLoc, "invalid register index"); 2490 return false; 2491 } 2492 2493 if (RegLo > RegHi) { 2494 Error(FirstIdxLoc, "first register index should not exceed second index"); 2495 return false; 2496 } 2497 2498 Num = static_cast<unsigned>(RegLo); 2499 Width = (RegHi - RegLo) + 1; 2500 return true; 2501 } 2502 2503 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2504 unsigned &RegNum, unsigned &RegWidth, 2505 SmallVectorImpl<AsmToken> &Tokens) { 2506 assert(isToken(AsmToken::Identifier)); 2507 unsigned Reg = getSpecialRegForName(getTokenStr()); 2508 if (Reg) { 2509 RegNum = 0; 2510 RegWidth = 1; 2511 RegKind = IS_SPECIAL; 2512 Tokens.push_back(getToken()); 2513 lex(); // skip register name 2514 } 2515 return Reg; 2516 } 2517 2518 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2519 unsigned &RegNum, unsigned &RegWidth, 2520 SmallVectorImpl<AsmToken> &Tokens) { 2521 assert(isToken(AsmToken::Identifier)); 2522 StringRef RegName = getTokenStr(); 2523 auto Loc = getLoc(); 2524 2525 const RegInfo *RI = getRegularRegInfo(RegName); 2526 if (!RI) { 2527 Error(Loc, "invalid register name"); 2528 return AMDGPU::NoRegister; 2529 } 2530 2531 Tokens.push_back(getToken()); 2532 lex(); // skip register name 2533 2534 RegKind = RI->Kind; 2535 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2536 if (!RegSuffix.empty()) { 2537 // Single 32-bit register: vXX. 2538 if (!getRegNum(RegSuffix, RegNum)) { 2539 Error(Loc, "invalid register index"); 2540 return AMDGPU::NoRegister; 2541 } 2542 RegWidth = 1; 2543 } else { 2544 // Range of registers: v[XX:YY]. ":YY" is optional. 2545 if (!ParseRegRange(RegNum, RegWidth)) 2546 return AMDGPU::NoRegister; 2547 } 2548 2549 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2550 } 2551 2552 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2553 unsigned &RegWidth, 2554 SmallVectorImpl<AsmToken> &Tokens) { 2555 unsigned Reg = AMDGPU::NoRegister; 2556 auto ListLoc = getLoc(); 2557 2558 if (!skipToken(AsmToken::LBrac, 2559 "expected a register or a list of registers")) { 2560 return AMDGPU::NoRegister; 2561 } 2562 2563 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2564 2565 auto Loc = getLoc(); 2566 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2567 return AMDGPU::NoRegister; 2568 if (RegWidth != 1) { 2569 Error(Loc, "expected a single 32-bit register"); 2570 return AMDGPU::NoRegister; 2571 } 2572 2573 for (; trySkipToken(AsmToken::Comma); ) { 2574 RegisterKind NextRegKind; 2575 unsigned NextReg, NextRegNum, NextRegWidth; 2576 Loc = getLoc(); 2577 2578 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2579 NextRegNum, NextRegWidth, 2580 Tokens)) { 2581 return AMDGPU::NoRegister; 2582 } 2583 if (NextRegWidth != 1) { 2584 Error(Loc, "expected a single 32-bit register"); 2585 return AMDGPU::NoRegister; 2586 } 2587 if (NextRegKind != RegKind) { 2588 Error(Loc, "registers in a list must be of the same kind"); 2589 return AMDGPU::NoRegister; 2590 } 2591 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2592 return AMDGPU::NoRegister; 2593 } 2594 2595 if (!skipToken(AsmToken::RBrac, 2596 "expected a comma or a closing square bracket")) { 2597 return AMDGPU::NoRegister; 2598 } 2599 2600 if (isRegularReg(RegKind)) 2601 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2602 2603 return Reg; 2604 } 2605 2606 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2607 unsigned &RegNum, unsigned &RegWidth, 2608 SmallVectorImpl<AsmToken> &Tokens) { 2609 auto Loc = getLoc(); 2610 Reg = AMDGPU::NoRegister; 2611 2612 if (isToken(AsmToken::Identifier)) { 2613 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2614 if (Reg == AMDGPU::NoRegister) 2615 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2616 } else { 2617 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2618 } 2619 2620 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2621 if (Reg == AMDGPU::NoRegister) { 2622 assert(Parser.hasPendingError()); 2623 return false; 2624 } 2625 2626 if (!subtargetHasRegister(*TRI, Reg)) { 2627 if (Reg == AMDGPU::SGPR_NULL) { 2628 Error(Loc, "'null' operand is not supported on this GPU"); 2629 } else { 2630 Error(Loc, "register not available on this GPU"); 2631 } 2632 return false; 2633 } 2634 2635 return true; 2636 } 2637 2638 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2639 unsigned &RegNum, unsigned &RegWidth, 2640 bool RestoreOnFailure /*=false*/) { 2641 Reg = AMDGPU::NoRegister; 2642 2643 SmallVector<AsmToken, 1> Tokens; 2644 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2645 if (RestoreOnFailure) { 2646 while (!Tokens.empty()) { 2647 getLexer().UnLex(Tokens.pop_back_val()); 2648 } 2649 } 2650 return true; 2651 } 2652 return false; 2653 } 2654 2655 Optional<StringRef> 2656 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2657 switch (RegKind) { 2658 case IS_VGPR: 2659 return StringRef(".amdgcn.next_free_vgpr"); 2660 case IS_SGPR: 2661 return StringRef(".amdgcn.next_free_sgpr"); 2662 default: 2663 return None; 2664 } 2665 } 2666 2667 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2668 auto SymbolName = getGprCountSymbolName(RegKind); 2669 assert(SymbolName && "initializing invalid register kind"); 2670 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2671 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2672 } 2673 2674 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2675 unsigned DwordRegIndex, 2676 unsigned RegWidth) { 2677 // Symbols are only defined for GCN targets 2678 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2679 return true; 2680 2681 auto SymbolName = getGprCountSymbolName(RegKind); 2682 if (!SymbolName) 2683 return true; 2684 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2685 2686 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2687 int64_t OldCount; 2688 2689 if (!Sym->isVariable()) 2690 return !Error(getLoc(), 2691 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2692 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2693 return !Error( 2694 getLoc(), 2695 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2696 2697 if (OldCount <= NewMax) 2698 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2699 2700 return true; 2701 } 2702 2703 std::unique_ptr<AMDGPUOperand> 2704 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2705 const auto &Tok = getToken(); 2706 SMLoc StartLoc = Tok.getLoc(); 2707 SMLoc EndLoc = Tok.getEndLoc(); 2708 RegisterKind RegKind; 2709 unsigned Reg, RegNum, RegWidth; 2710 2711 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2712 return nullptr; 2713 } 2714 if (isHsaAbiVersion3(&getSTI())) { 2715 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2716 return nullptr; 2717 } else 2718 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2719 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2720 } 2721 2722 OperandMatchResultTy 2723 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2724 // TODO: add syntactic sugar for 1/(2*PI) 2725 2726 assert(!isRegister()); 2727 assert(!isModifier()); 2728 2729 const auto& Tok = getToken(); 2730 const auto& NextTok = peekToken(); 2731 bool IsReal = Tok.is(AsmToken::Real); 2732 SMLoc S = getLoc(); 2733 bool Negate = false; 2734 2735 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2736 lex(); 2737 IsReal = true; 2738 Negate = true; 2739 } 2740 2741 if (IsReal) { 2742 // Floating-point expressions are not supported. 2743 // Can only allow floating-point literals with an 2744 // optional sign. 2745 2746 StringRef Num = getTokenStr(); 2747 lex(); 2748 2749 APFloat RealVal(APFloat::IEEEdouble()); 2750 auto roundMode = APFloat::rmNearestTiesToEven; 2751 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2752 return MatchOperand_ParseFail; 2753 } 2754 if (Negate) 2755 RealVal.changeSign(); 2756 2757 Operands.push_back( 2758 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2759 AMDGPUOperand::ImmTyNone, true)); 2760 2761 return MatchOperand_Success; 2762 2763 } else { 2764 int64_t IntVal; 2765 const MCExpr *Expr; 2766 SMLoc S = getLoc(); 2767 2768 if (HasSP3AbsModifier) { 2769 // This is a workaround for handling expressions 2770 // as arguments of SP3 'abs' modifier, for example: 2771 // |1.0| 2772 // |-1| 2773 // |1+x| 2774 // This syntax is not compatible with syntax of standard 2775 // MC expressions (due to the trailing '|'). 2776 SMLoc EndLoc; 2777 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2778 return MatchOperand_ParseFail; 2779 } else { 2780 if (Parser.parseExpression(Expr)) 2781 return MatchOperand_ParseFail; 2782 } 2783 2784 if (Expr->evaluateAsAbsolute(IntVal)) { 2785 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2786 } else { 2787 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2788 } 2789 2790 return MatchOperand_Success; 2791 } 2792 2793 return MatchOperand_NoMatch; 2794 } 2795 2796 OperandMatchResultTy 2797 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2798 if (!isRegister()) 2799 return MatchOperand_NoMatch; 2800 2801 if (auto R = parseRegister()) { 2802 assert(R->isReg()); 2803 Operands.push_back(std::move(R)); 2804 return MatchOperand_Success; 2805 } 2806 return MatchOperand_ParseFail; 2807 } 2808 2809 OperandMatchResultTy 2810 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2811 auto res = parseReg(Operands); 2812 if (res != MatchOperand_NoMatch) { 2813 return res; 2814 } else if (isModifier()) { 2815 return MatchOperand_NoMatch; 2816 } else { 2817 return parseImm(Operands, HasSP3AbsMod); 2818 } 2819 } 2820 2821 bool 2822 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2823 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2824 const auto &str = Token.getString(); 2825 return str == "abs" || str == "neg" || str == "sext"; 2826 } 2827 return false; 2828 } 2829 2830 bool 2831 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2832 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2833 } 2834 2835 bool 2836 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2837 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2838 } 2839 2840 bool 2841 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2842 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2843 } 2844 2845 // Check if this is an operand modifier or an opcode modifier 2846 // which may look like an expression but it is not. We should 2847 // avoid parsing these modifiers as expressions. Currently 2848 // recognized sequences are: 2849 // |...| 2850 // abs(...) 2851 // neg(...) 2852 // sext(...) 2853 // -reg 2854 // -|...| 2855 // -abs(...) 2856 // name:... 2857 // Note that simple opcode modifiers like 'gds' may be parsed as 2858 // expressions; this is a special case. See getExpressionAsToken. 2859 // 2860 bool 2861 AMDGPUAsmParser::isModifier() { 2862 2863 AsmToken Tok = getToken(); 2864 AsmToken NextToken[2]; 2865 peekTokens(NextToken); 2866 2867 return isOperandModifier(Tok, NextToken[0]) || 2868 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2869 isOpcodeModifierWithVal(Tok, NextToken[0]); 2870 } 2871 2872 // Check if the current token is an SP3 'neg' modifier. 2873 // Currently this modifier is allowed in the following context: 2874 // 2875 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2876 // 2. Before an 'abs' modifier: -abs(...) 2877 // 3. Before an SP3 'abs' modifier: -|...| 2878 // 2879 // In all other cases "-" is handled as a part 2880 // of an expression that follows the sign. 2881 // 2882 // Note: When "-" is followed by an integer literal, 2883 // this is interpreted as integer negation rather 2884 // than a floating-point NEG modifier applied to N. 2885 // Beside being contr-intuitive, such use of floating-point 2886 // NEG modifier would have resulted in different meaning 2887 // of integer literals used with VOP1/2/C and VOP3, 2888 // for example: 2889 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2890 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2891 // Negative fp literals with preceding "-" are 2892 // handled likewise for unifomtity 2893 // 2894 bool 2895 AMDGPUAsmParser::parseSP3NegModifier() { 2896 2897 AsmToken NextToken[2]; 2898 peekTokens(NextToken); 2899 2900 if (isToken(AsmToken::Minus) && 2901 (isRegister(NextToken[0], NextToken[1]) || 2902 NextToken[0].is(AsmToken::Pipe) || 2903 isId(NextToken[0], "abs"))) { 2904 lex(); 2905 return true; 2906 } 2907 2908 return false; 2909 } 2910 2911 OperandMatchResultTy 2912 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2913 bool AllowImm) { 2914 bool Neg, SP3Neg; 2915 bool Abs, SP3Abs; 2916 SMLoc Loc; 2917 2918 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2919 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2920 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2921 return MatchOperand_ParseFail; 2922 } 2923 2924 SP3Neg = parseSP3NegModifier(); 2925 2926 Loc = getLoc(); 2927 Neg = trySkipId("neg"); 2928 if (Neg && SP3Neg) { 2929 Error(Loc, "expected register or immediate"); 2930 return MatchOperand_ParseFail; 2931 } 2932 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2933 return MatchOperand_ParseFail; 2934 2935 Abs = trySkipId("abs"); 2936 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2937 return MatchOperand_ParseFail; 2938 2939 Loc = getLoc(); 2940 SP3Abs = trySkipToken(AsmToken::Pipe); 2941 if (Abs && SP3Abs) { 2942 Error(Loc, "expected register or immediate"); 2943 return MatchOperand_ParseFail; 2944 } 2945 2946 OperandMatchResultTy Res; 2947 if (AllowImm) { 2948 Res = parseRegOrImm(Operands, SP3Abs); 2949 } else { 2950 Res = parseReg(Operands); 2951 } 2952 if (Res != MatchOperand_Success) { 2953 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2954 } 2955 2956 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2957 return MatchOperand_ParseFail; 2958 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2959 return MatchOperand_ParseFail; 2960 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2961 return MatchOperand_ParseFail; 2962 2963 AMDGPUOperand::Modifiers Mods; 2964 Mods.Abs = Abs || SP3Abs; 2965 Mods.Neg = Neg || SP3Neg; 2966 2967 if (Mods.hasFPModifiers()) { 2968 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2969 if (Op.isExpr()) { 2970 Error(Op.getStartLoc(), "expected an absolute expression"); 2971 return MatchOperand_ParseFail; 2972 } 2973 Op.setModifiers(Mods); 2974 } 2975 return MatchOperand_Success; 2976 } 2977 2978 OperandMatchResultTy 2979 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2980 bool AllowImm) { 2981 bool Sext = trySkipId("sext"); 2982 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2983 return MatchOperand_ParseFail; 2984 2985 OperandMatchResultTy Res; 2986 if (AllowImm) { 2987 Res = parseRegOrImm(Operands); 2988 } else { 2989 Res = parseReg(Operands); 2990 } 2991 if (Res != MatchOperand_Success) { 2992 return Sext? MatchOperand_ParseFail : Res; 2993 } 2994 2995 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2996 return MatchOperand_ParseFail; 2997 2998 AMDGPUOperand::Modifiers Mods; 2999 Mods.Sext = Sext; 3000 3001 if (Mods.hasIntModifiers()) { 3002 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3003 if (Op.isExpr()) { 3004 Error(Op.getStartLoc(), "expected an absolute expression"); 3005 return MatchOperand_ParseFail; 3006 } 3007 Op.setModifiers(Mods); 3008 } 3009 3010 return MatchOperand_Success; 3011 } 3012 3013 OperandMatchResultTy 3014 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3015 return parseRegOrImmWithFPInputMods(Operands, false); 3016 } 3017 3018 OperandMatchResultTy 3019 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3020 return parseRegOrImmWithIntInputMods(Operands, false); 3021 } 3022 3023 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3024 auto Loc = getLoc(); 3025 if (trySkipId("off")) { 3026 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3027 AMDGPUOperand::ImmTyOff, false)); 3028 return MatchOperand_Success; 3029 } 3030 3031 if (!isRegister()) 3032 return MatchOperand_NoMatch; 3033 3034 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3035 if (Reg) { 3036 Operands.push_back(std::move(Reg)); 3037 return MatchOperand_Success; 3038 } 3039 3040 return MatchOperand_ParseFail; 3041 3042 } 3043 3044 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3045 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3046 3047 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3048 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3049 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3050 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3051 return Match_InvalidOperand; 3052 3053 if ((TSFlags & SIInstrFlags::VOP3) && 3054 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3055 getForcedEncodingSize() != 64) 3056 return Match_PreferE32; 3057 3058 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3059 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3060 // v_mac_f32/16 allow only dst_sel == DWORD; 3061 auto OpNum = 3062 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3063 const auto &Op = Inst.getOperand(OpNum); 3064 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3065 return Match_InvalidOperand; 3066 } 3067 } 3068 3069 return Match_Success; 3070 } 3071 3072 static ArrayRef<unsigned> getAllVariants() { 3073 static const unsigned Variants[] = { 3074 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3075 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3076 }; 3077 3078 return makeArrayRef(Variants); 3079 } 3080 3081 // What asm variants we should check 3082 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3083 if (getForcedEncodingSize() == 32) { 3084 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3085 return makeArrayRef(Variants); 3086 } 3087 3088 if (isForcedVOP3()) { 3089 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3090 return makeArrayRef(Variants); 3091 } 3092 3093 if (isForcedSDWA()) { 3094 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3095 AMDGPUAsmVariants::SDWA9}; 3096 return makeArrayRef(Variants); 3097 } 3098 3099 if (isForcedDPP()) { 3100 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3101 return makeArrayRef(Variants); 3102 } 3103 3104 return getAllVariants(); 3105 } 3106 3107 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3108 if (getForcedEncodingSize() == 32) 3109 return "e32"; 3110 3111 if (isForcedVOP3()) 3112 return "e64"; 3113 3114 if (isForcedSDWA()) 3115 return "sdwa"; 3116 3117 if (isForcedDPP()) 3118 return "dpp"; 3119 3120 return ""; 3121 } 3122 3123 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3124 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3125 const unsigned Num = Desc.getNumImplicitUses(); 3126 for (unsigned i = 0; i < Num; ++i) { 3127 unsigned Reg = Desc.ImplicitUses[i]; 3128 switch (Reg) { 3129 case AMDGPU::FLAT_SCR: 3130 case AMDGPU::VCC: 3131 case AMDGPU::VCC_LO: 3132 case AMDGPU::VCC_HI: 3133 case AMDGPU::M0: 3134 return Reg; 3135 default: 3136 break; 3137 } 3138 } 3139 return AMDGPU::NoRegister; 3140 } 3141 3142 // NB: This code is correct only when used to check constant 3143 // bus limitations because GFX7 support no f16 inline constants. 3144 // Note that there are no cases when a GFX7 opcode violates 3145 // constant bus limitations due to the use of an f16 constant. 3146 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3147 unsigned OpIdx) const { 3148 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3149 3150 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3151 return false; 3152 } 3153 3154 const MCOperand &MO = Inst.getOperand(OpIdx); 3155 3156 int64_t Val = MO.getImm(); 3157 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3158 3159 switch (OpSize) { // expected operand size 3160 case 8: 3161 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3162 case 4: 3163 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3164 case 2: { 3165 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3166 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3167 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3168 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3169 return AMDGPU::isInlinableIntLiteral(Val); 3170 3171 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3172 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3173 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3174 return AMDGPU::isInlinableIntLiteralV216(Val); 3175 3176 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3177 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3178 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3179 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3180 3181 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3182 } 3183 default: 3184 llvm_unreachable("invalid operand size"); 3185 } 3186 } 3187 3188 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3189 if (!isGFX10Plus()) 3190 return 1; 3191 3192 switch (Opcode) { 3193 // 64-bit shift instructions can use only one scalar value input 3194 case AMDGPU::V_LSHLREV_B64_e64: 3195 case AMDGPU::V_LSHLREV_B64_gfx10: 3196 case AMDGPU::V_LSHRREV_B64_e64: 3197 case AMDGPU::V_LSHRREV_B64_gfx10: 3198 case AMDGPU::V_ASHRREV_I64_e64: 3199 case AMDGPU::V_ASHRREV_I64_gfx10: 3200 case AMDGPU::V_LSHL_B64_e64: 3201 case AMDGPU::V_LSHR_B64_e64: 3202 case AMDGPU::V_ASHR_I64_e64: 3203 return 1; 3204 default: 3205 return 2; 3206 } 3207 } 3208 3209 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3210 const MCOperand &MO = Inst.getOperand(OpIdx); 3211 if (MO.isImm()) { 3212 return !isInlineConstant(Inst, OpIdx); 3213 } else if (MO.isReg()) { 3214 auto Reg = MO.getReg(); 3215 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3216 auto PReg = mc2PseudoReg(Reg); 3217 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3218 } else { 3219 return true; 3220 } 3221 } 3222 3223 bool 3224 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3225 const OperandVector &Operands) { 3226 const unsigned Opcode = Inst.getOpcode(); 3227 const MCInstrDesc &Desc = MII.get(Opcode); 3228 unsigned LastSGPR = AMDGPU::NoRegister; 3229 unsigned ConstantBusUseCount = 0; 3230 unsigned NumLiterals = 0; 3231 unsigned LiteralSize; 3232 3233 if (Desc.TSFlags & 3234 (SIInstrFlags::VOPC | 3235 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3236 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3237 SIInstrFlags::SDWA)) { 3238 // Check special imm operands (used by madmk, etc) 3239 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3240 ++ConstantBusUseCount; 3241 } 3242 3243 SmallDenseSet<unsigned> SGPRsUsed; 3244 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3245 if (SGPRUsed != AMDGPU::NoRegister) { 3246 SGPRsUsed.insert(SGPRUsed); 3247 ++ConstantBusUseCount; 3248 } 3249 3250 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3251 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3252 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3253 3254 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3255 3256 for (int OpIdx : OpIndices) { 3257 if (OpIdx == -1) break; 3258 3259 const MCOperand &MO = Inst.getOperand(OpIdx); 3260 if (usesConstantBus(Inst, OpIdx)) { 3261 if (MO.isReg()) { 3262 LastSGPR = mc2PseudoReg(MO.getReg()); 3263 // Pairs of registers with a partial intersections like these 3264 // s0, s[0:1] 3265 // flat_scratch_lo, flat_scratch 3266 // flat_scratch_lo, flat_scratch_hi 3267 // are theoretically valid but they are disabled anyway. 3268 // Note that this code mimics SIInstrInfo::verifyInstruction 3269 if (!SGPRsUsed.count(LastSGPR)) { 3270 SGPRsUsed.insert(LastSGPR); 3271 ++ConstantBusUseCount; 3272 } 3273 } else { // Expression or a literal 3274 3275 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3276 continue; // special operand like VINTERP attr_chan 3277 3278 // An instruction may use only one literal. 3279 // This has been validated on the previous step. 3280 // See validateVOP3Literal. 3281 // This literal may be used as more than one operand. 3282 // If all these operands are of the same size, 3283 // this literal counts as one scalar value. 3284 // Otherwise it counts as 2 scalar values. 3285 // See "GFX10 Shader Programming", section 3.6.2.3. 3286 3287 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3288 if (Size < 4) Size = 4; 3289 3290 if (NumLiterals == 0) { 3291 NumLiterals = 1; 3292 LiteralSize = Size; 3293 } else if (LiteralSize != Size) { 3294 NumLiterals = 2; 3295 } 3296 } 3297 } 3298 } 3299 } 3300 ConstantBusUseCount += NumLiterals; 3301 3302 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3303 return true; 3304 3305 SMLoc LitLoc = getLitLoc(Operands); 3306 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3307 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3308 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3309 return false; 3310 } 3311 3312 bool 3313 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3314 const OperandVector &Operands) { 3315 const unsigned Opcode = Inst.getOpcode(); 3316 const MCInstrDesc &Desc = MII.get(Opcode); 3317 3318 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3319 if (DstIdx == -1 || 3320 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3321 return true; 3322 } 3323 3324 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3325 3326 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3327 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3328 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3329 3330 assert(DstIdx != -1); 3331 const MCOperand &Dst = Inst.getOperand(DstIdx); 3332 assert(Dst.isReg()); 3333 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3334 3335 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3336 3337 for (int SrcIdx : SrcIndices) { 3338 if (SrcIdx == -1) break; 3339 const MCOperand &Src = Inst.getOperand(SrcIdx); 3340 if (Src.isReg()) { 3341 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3342 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3343 Error(getRegLoc(SrcReg, Operands), 3344 "destination must be different than all sources"); 3345 return false; 3346 } 3347 } 3348 } 3349 3350 return true; 3351 } 3352 3353 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3354 3355 const unsigned Opc = Inst.getOpcode(); 3356 const MCInstrDesc &Desc = MII.get(Opc); 3357 3358 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3359 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3360 assert(ClampIdx != -1); 3361 return Inst.getOperand(ClampIdx).getImm() == 0; 3362 } 3363 3364 return true; 3365 } 3366 3367 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3368 3369 const unsigned Opc = Inst.getOpcode(); 3370 const MCInstrDesc &Desc = MII.get(Opc); 3371 3372 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3373 return true; 3374 3375 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3376 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3377 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3378 3379 assert(VDataIdx != -1); 3380 3381 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3382 return true; 3383 3384 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3385 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3386 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3387 if (DMask == 0) 3388 DMask = 1; 3389 3390 unsigned DataSize = 3391 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3392 if (hasPackedD16()) { 3393 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3394 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3395 DataSize = (DataSize + 1) / 2; 3396 } 3397 3398 return (VDataSize / 4) == DataSize + TFESize; 3399 } 3400 3401 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3402 const unsigned Opc = Inst.getOpcode(); 3403 const MCInstrDesc &Desc = MII.get(Opc); 3404 3405 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3406 return true; 3407 3408 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3409 3410 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3411 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3412 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3413 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3414 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3415 3416 assert(VAddr0Idx != -1); 3417 assert(SrsrcIdx != -1); 3418 assert(SrsrcIdx > VAddr0Idx); 3419 3420 if (DimIdx == -1) 3421 return true; // intersect_ray 3422 3423 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3424 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3425 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3426 unsigned VAddrSize = 3427 IsNSA ? SrsrcIdx - VAddr0Idx 3428 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3429 3430 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3431 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3432 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3433 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3434 if (!IsNSA) { 3435 if (AddrSize > 8) 3436 AddrSize = 16; 3437 else if (AddrSize > 4) 3438 AddrSize = 8; 3439 } 3440 3441 return VAddrSize == AddrSize; 3442 } 3443 3444 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3445 3446 const unsigned Opc = Inst.getOpcode(); 3447 const MCInstrDesc &Desc = MII.get(Opc); 3448 3449 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3450 return true; 3451 if (!Desc.mayLoad() || !Desc.mayStore()) 3452 return true; // Not atomic 3453 3454 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3455 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3456 3457 // This is an incomplete check because image_atomic_cmpswap 3458 // may only use 0x3 and 0xf while other atomic operations 3459 // may use 0x1 and 0x3. However these limitations are 3460 // verified when we check that dmask matches dst size. 3461 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3462 } 3463 3464 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3465 3466 const unsigned Opc = Inst.getOpcode(); 3467 const MCInstrDesc &Desc = MII.get(Opc); 3468 3469 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3470 return true; 3471 3472 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3473 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3474 3475 // GATHER4 instructions use dmask in a different fashion compared to 3476 // other MIMG instructions. The only useful DMASK values are 3477 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3478 // (red,red,red,red) etc.) The ISA document doesn't mention 3479 // this. 3480 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3481 } 3482 3483 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3484 const unsigned Opc = Inst.getOpcode(); 3485 const MCInstrDesc &Desc = MII.get(Opc); 3486 3487 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3488 return true; 3489 3490 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3491 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3492 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3493 3494 if (!BaseOpcode->MSAA) 3495 return true; 3496 3497 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3498 assert(DimIdx != -1); 3499 3500 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3501 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3502 3503 return DimInfo->MSAA; 3504 } 3505 3506 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3507 { 3508 switch (Opcode) { 3509 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3510 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3511 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3512 return true; 3513 default: 3514 return false; 3515 } 3516 } 3517 3518 // movrels* opcodes should only allow VGPRS as src0. 3519 // This is specified in .td description for vop1/vop3, 3520 // but sdwa is handled differently. See isSDWAOperand. 3521 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3522 const OperandVector &Operands) { 3523 3524 const unsigned Opc = Inst.getOpcode(); 3525 const MCInstrDesc &Desc = MII.get(Opc); 3526 3527 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3528 return true; 3529 3530 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3531 assert(Src0Idx != -1); 3532 3533 SMLoc ErrLoc; 3534 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3535 if (Src0.isReg()) { 3536 auto Reg = mc2PseudoReg(Src0.getReg()); 3537 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3538 if (!isSGPR(Reg, TRI)) 3539 return true; 3540 ErrLoc = getRegLoc(Reg, Operands); 3541 } else { 3542 ErrLoc = getConstLoc(Operands); 3543 } 3544 3545 Error(ErrLoc, "source operand must be a VGPR"); 3546 return false; 3547 } 3548 3549 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3550 const OperandVector &Operands) { 3551 3552 const unsigned Opc = Inst.getOpcode(); 3553 3554 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3555 return true; 3556 3557 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3558 assert(Src0Idx != -1); 3559 3560 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3561 if (!Src0.isReg()) 3562 return true; 3563 3564 auto Reg = mc2PseudoReg(Src0.getReg()); 3565 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3566 if (isSGPR(Reg, TRI)) { 3567 Error(getRegLoc(Reg, Operands), 3568 "source operand must be either a VGPR or an inline constant"); 3569 return false; 3570 } 3571 3572 return true; 3573 } 3574 3575 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3576 switch (Inst.getOpcode()) { 3577 default: 3578 return true; 3579 case V_DIV_SCALE_F32_gfx6_gfx7: 3580 case V_DIV_SCALE_F32_vi: 3581 case V_DIV_SCALE_F32_gfx10: 3582 case V_DIV_SCALE_F64_gfx6_gfx7: 3583 case V_DIV_SCALE_F64_vi: 3584 case V_DIV_SCALE_F64_gfx10: 3585 break; 3586 } 3587 3588 // TODO: Check that src0 = src1 or src2. 3589 3590 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3591 AMDGPU::OpName::src2_modifiers, 3592 AMDGPU::OpName::src2_modifiers}) { 3593 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3594 .getImm() & 3595 SISrcMods::ABS) { 3596 return false; 3597 } 3598 } 3599 3600 return true; 3601 } 3602 3603 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3604 3605 const unsigned Opc = Inst.getOpcode(); 3606 const MCInstrDesc &Desc = MII.get(Opc); 3607 3608 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3609 return true; 3610 3611 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3612 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3613 if (isCI() || isSI()) 3614 return false; 3615 } 3616 3617 return true; 3618 } 3619 3620 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3621 const unsigned Opc = Inst.getOpcode(); 3622 const MCInstrDesc &Desc = MII.get(Opc); 3623 3624 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3625 return true; 3626 3627 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3628 if (DimIdx < 0) 3629 return true; 3630 3631 long Imm = Inst.getOperand(DimIdx).getImm(); 3632 if (Imm < 0 || Imm >= 8) 3633 return false; 3634 3635 return true; 3636 } 3637 3638 static bool IsRevOpcode(const unsigned Opcode) 3639 { 3640 switch (Opcode) { 3641 case AMDGPU::V_SUBREV_F32_e32: 3642 case AMDGPU::V_SUBREV_F32_e64: 3643 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3644 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3645 case AMDGPU::V_SUBREV_F32_e32_vi: 3646 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3647 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3648 case AMDGPU::V_SUBREV_F32_e64_vi: 3649 3650 case AMDGPU::V_SUBREV_CO_U32_e32: 3651 case AMDGPU::V_SUBREV_CO_U32_e64: 3652 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3653 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3654 3655 case AMDGPU::V_SUBBREV_U32_e32: 3656 case AMDGPU::V_SUBBREV_U32_e64: 3657 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3658 case AMDGPU::V_SUBBREV_U32_e32_vi: 3659 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3660 case AMDGPU::V_SUBBREV_U32_e64_vi: 3661 3662 case AMDGPU::V_SUBREV_U32_e32: 3663 case AMDGPU::V_SUBREV_U32_e64: 3664 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3665 case AMDGPU::V_SUBREV_U32_e32_vi: 3666 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3667 case AMDGPU::V_SUBREV_U32_e64_vi: 3668 3669 case AMDGPU::V_SUBREV_F16_e32: 3670 case AMDGPU::V_SUBREV_F16_e64: 3671 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3672 case AMDGPU::V_SUBREV_F16_e32_vi: 3673 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3674 case AMDGPU::V_SUBREV_F16_e64_vi: 3675 3676 case AMDGPU::V_SUBREV_U16_e32: 3677 case AMDGPU::V_SUBREV_U16_e64: 3678 case AMDGPU::V_SUBREV_U16_e32_vi: 3679 case AMDGPU::V_SUBREV_U16_e64_vi: 3680 3681 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3682 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3683 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3684 3685 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3686 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3687 3688 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3689 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3690 3691 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3692 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3693 3694 case AMDGPU::V_LSHRREV_B32_e32: 3695 case AMDGPU::V_LSHRREV_B32_e64: 3696 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3697 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3698 case AMDGPU::V_LSHRREV_B32_e32_vi: 3699 case AMDGPU::V_LSHRREV_B32_e64_vi: 3700 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3701 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3702 3703 case AMDGPU::V_ASHRREV_I32_e32: 3704 case AMDGPU::V_ASHRREV_I32_e64: 3705 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3706 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3707 case AMDGPU::V_ASHRREV_I32_e32_vi: 3708 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3709 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3710 case AMDGPU::V_ASHRREV_I32_e64_vi: 3711 3712 case AMDGPU::V_LSHLREV_B32_e32: 3713 case AMDGPU::V_LSHLREV_B32_e64: 3714 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3715 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3716 case AMDGPU::V_LSHLREV_B32_e32_vi: 3717 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3718 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3719 case AMDGPU::V_LSHLREV_B32_e64_vi: 3720 3721 case AMDGPU::V_LSHLREV_B16_e32: 3722 case AMDGPU::V_LSHLREV_B16_e64: 3723 case AMDGPU::V_LSHLREV_B16_e32_vi: 3724 case AMDGPU::V_LSHLREV_B16_e64_vi: 3725 case AMDGPU::V_LSHLREV_B16_gfx10: 3726 3727 case AMDGPU::V_LSHRREV_B16_e32: 3728 case AMDGPU::V_LSHRREV_B16_e64: 3729 case AMDGPU::V_LSHRREV_B16_e32_vi: 3730 case AMDGPU::V_LSHRREV_B16_e64_vi: 3731 case AMDGPU::V_LSHRREV_B16_gfx10: 3732 3733 case AMDGPU::V_ASHRREV_I16_e32: 3734 case AMDGPU::V_ASHRREV_I16_e64: 3735 case AMDGPU::V_ASHRREV_I16_e32_vi: 3736 case AMDGPU::V_ASHRREV_I16_e64_vi: 3737 case AMDGPU::V_ASHRREV_I16_gfx10: 3738 3739 case AMDGPU::V_LSHLREV_B64_e64: 3740 case AMDGPU::V_LSHLREV_B64_gfx10: 3741 case AMDGPU::V_LSHLREV_B64_vi: 3742 3743 case AMDGPU::V_LSHRREV_B64_e64: 3744 case AMDGPU::V_LSHRREV_B64_gfx10: 3745 case AMDGPU::V_LSHRREV_B64_vi: 3746 3747 case AMDGPU::V_ASHRREV_I64_e64: 3748 case AMDGPU::V_ASHRREV_I64_gfx10: 3749 case AMDGPU::V_ASHRREV_I64_vi: 3750 3751 case AMDGPU::V_PK_LSHLREV_B16: 3752 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3753 case AMDGPU::V_PK_LSHLREV_B16_vi: 3754 3755 case AMDGPU::V_PK_LSHRREV_B16: 3756 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3757 case AMDGPU::V_PK_LSHRREV_B16_vi: 3758 case AMDGPU::V_PK_ASHRREV_I16: 3759 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3760 case AMDGPU::V_PK_ASHRREV_I16_vi: 3761 return true; 3762 default: 3763 return false; 3764 } 3765 } 3766 3767 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3768 3769 using namespace SIInstrFlags; 3770 const unsigned Opcode = Inst.getOpcode(); 3771 const MCInstrDesc &Desc = MII.get(Opcode); 3772 3773 // lds_direct register is defined so that it can be used 3774 // with 9-bit operands only. Ignore encodings which do not accept these. 3775 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3776 if ((Desc.TSFlags & Enc) == 0) 3777 return None; 3778 3779 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3780 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3781 if (SrcIdx == -1) 3782 break; 3783 const auto &Src = Inst.getOperand(SrcIdx); 3784 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3785 3786 if (isGFX90A()) 3787 return StringRef("lds_direct is not supported on this GPU"); 3788 3789 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3790 return StringRef("lds_direct cannot be used with this instruction"); 3791 3792 if (SrcName != OpName::src0) 3793 return StringRef("lds_direct may be used as src0 only"); 3794 } 3795 } 3796 3797 return None; 3798 } 3799 3800 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3801 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3802 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3803 if (Op.isFlatOffset()) 3804 return Op.getStartLoc(); 3805 } 3806 return getLoc(); 3807 } 3808 3809 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3810 const OperandVector &Operands) { 3811 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3812 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3813 return true; 3814 3815 auto Opcode = Inst.getOpcode(); 3816 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3817 assert(OpNum != -1); 3818 3819 const auto &Op = Inst.getOperand(OpNum); 3820 if (!hasFlatOffsets() && Op.getImm() != 0) { 3821 Error(getFlatOffsetLoc(Operands), 3822 "flat offset modifier is not supported on this GPU"); 3823 return false; 3824 } 3825 3826 // For FLAT segment the offset must be positive; 3827 // MSB is ignored and forced to zero. 3828 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3829 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3830 if (!isIntN(OffsetSize, Op.getImm())) { 3831 Error(getFlatOffsetLoc(Operands), 3832 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3833 return false; 3834 } 3835 } else { 3836 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3837 if (!isUIntN(OffsetSize, Op.getImm())) { 3838 Error(getFlatOffsetLoc(Operands), 3839 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3840 return false; 3841 } 3842 } 3843 3844 return true; 3845 } 3846 3847 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3848 // Start with second operand because SMEM Offset cannot be dst or src0. 3849 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3850 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3851 if (Op.isSMEMOffset()) 3852 return Op.getStartLoc(); 3853 } 3854 return getLoc(); 3855 } 3856 3857 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3858 const OperandVector &Operands) { 3859 if (isCI() || isSI()) 3860 return true; 3861 3862 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3863 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3864 return true; 3865 3866 auto Opcode = Inst.getOpcode(); 3867 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3868 if (OpNum == -1) 3869 return true; 3870 3871 const auto &Op = Inst.getOperand(OpNum); 3872 if (!Op.isImm()) 3873 return true; 3874 3875 uint64_t Offset = Op.getImm(); 3876 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3877 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3878 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3879 return true; 3880 3881 Error(getSMEMOffsetLoc(Operands), 3882 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3883 "expected a 21-bit signed offset"); 3884 3885 return false; 3886 } 3887 3888 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3889 unsigned Opcode = Inst.getOpcode(); 3890 const MCInstrDesc &Desc = MII.get(Opcode); 3891 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3892 return true; 3893 3894 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3895 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3896 3897 const int OpIndices[] = { Src0Idx, Src1Idx }; 3898 3899 unsigned NumExprs = 0; 3900 unsigned NumLiterals = 0; 3901 uint32_t LiteralValue; 3902 3903 for (int OpIdx : OpIndices) { 3904 if (OpIdx == -1) break; 3905 3906 const MCOperand &MO = Inst.getOperand(OpIdx); 3907 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3908 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3909 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3910 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3911 if (NumLiterals == 0 || LiteralValue != Value) { 3912 LiteralValue = Value; 3913 ++NumLiterals; 3914 } 3915 } else if (MO.isExpr()) { 3916 ++NumExprs; 3917 } 3918 } 3919 } 3920 3921 return NumLiterals + NumExprs <= 1; 3922 } 3923 3924 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3925 const unsigned Opc = Inst.getOpcode(); 3926 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3927 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3928 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3929 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3930 3931 if (OpSel & ~3) 3932 return false; 3933 } 3934 return true; 3935 } 3936 3937 // Check if VCC register matches wavefront size 3938 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3939 auto FB = getFeatureBits(); 3940 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3941 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3942 } 3943 3944 // VOP3 literal is only allowed in GFX10+ and only one can be used 3945 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3946 const OperandVector &Operands) { 3947 unsigned Opcode = Inst.getOpcode(); 3948 const MCInstrDesc &Desc = MII.get(Opcode); 3949 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3950 return true; 3951 3952 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3953 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3954 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3955 3956 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3957 3958 unsigned NumExprs = 0; 3959 unsigned NumLiterals = 0; 3960 uint32_t LiteralValue; 3961 3962 for (int OpIdx : OpIndices) { 3963 if (OpIdx == -1) break; 3964 3965 const MCOperand &MO = Inst.getOperand(OpIdx); 3966 if (!MO.isImm() && !MO.isExpr()) 3967 continue; 3968 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3969 continue; 3970 3971 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3972 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3973 Error(getConstLoc(Operands), 3974 "inline constants are not allowed for this operand"); 3975 return false; 3976 } 3977 3978 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3979 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3980 if (NumLiterals == 0 || LiteralValue != Value) { 3981 LiteralValue = Value; 3982 ++NumLiterals; 3983 } 3984 } else if (MO.isExpr()) { 3985 ++NumExprs; 3986 } 3987 } 3988 NumLiterals += NumExprs; 3989 3990 if (!NumLiterals) 3991 return true; 3992 3993 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3994 Error(getLitLoc(Operands), "literal operands are not supported"); 3995 return false; 3996 } 3997 3998 if (NumLiterals > 1) { 3999 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4000 return false; 4001 } 4002 4003 return true; 4004 } 4005 4006 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4007 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4008 const MCRegisterInfo *MRI) { 4009 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4010 if (OpIdx < 0) 4011 return -1; 4012 4013 const MCOperand &Op = Inst.getOperand(OpIdx); 4014 if (!Op.isReg()) 4015 return -1; 4016 4017 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4018 auto Reg = Sub ? Sub : Op.getReg(); 4019 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4020 return AGRP32.contains(Reg) ? 1 : 0; 4021 } 4022 4023 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4024 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4025 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4026 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4027 SIInstrFlags::DS)) == 0) 4028 return true; 4029 4030 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4031 : AMDGPU::OpName::vdata; 4032 4033 const MCRegisterInfo *MRI = getMRI(); 4034 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4035 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4036 4037 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4038 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4039 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4040 return false; 4041 } 4042 4043 auto FB = getFeatureBits(); 4044 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4045 if (DataAreg < 0 || DstAreg < 0) 4046 return true; 4047 return DstAreg == DataAreg; 4048 } 4049 4050 return DstAreg < 1 && DataAreg < 1; 4051 } 4052 4053 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4054 auto FB = getFeatureBits(); 4055 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4056 return true; 4057 4058 const MCRegisterInfo *MRI = getMRI(); 4059 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4060 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4061 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4062 const MCOperand &Op = Inst.getOperand(I); 4063 if (!Op.isReg()) 4064 continue; 4065 4066 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4067 if (!Sub) 4068 continue; 4069 4070 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4071 return false; 4072 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4073 return false; 4074 } 4075 4076 return true; 4077 } 4078 4079 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4080 const OperandVector &Operands, 4081 const SMLoc &IDLoc) { 4082 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4083 AMDGPU::OpName::cpol); 4084 if (CPolPos == -1) 4085 return true; 4086 4087 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4088 4089 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4090 if ((TSFlags & (SIInstrFlags::SMRD)) && 4091 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4092 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4093 return false; 4094 } 4095 4096 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4097 return true; 4098 4099 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4100 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4101 Error(IDLoc, "instruction must use glc"); 4102 return false; 4103 } 4104 } else { 4105 if (CPol & CPol::GLC) { 4106 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4107 StringRef CStr(S.getPointer()); 4108 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4109 Error(S, "instruction must not use glc"); 4110 return false; 4111 } 4112 } 4113 4114 if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) { 4115 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4116 StringRef CStr(S.getPointer()); 4117 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4118 Error(S, "instruction must not use scc"); 4119 return false; 4120 } 4121 4122 return true; 4123 } 4124 4125 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4126 const SMLoc &IDLoc, 4127 const OperandVector &Operands) { 4128 if (auto ErrMsg = validateLdsDirect(Inst)) { 4129 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4130 return false; 4131 } 4132 if (!validateSOPLiteral(Inst)) { 4133 Error(getLitLoc(Operands), 4134 "only one literal operand is allowed"); 4135 return false; 4136 } 4137 if (!validateVOP3Literal(Inst, Operands)) { 4138 return false; 4139 } 4140 if (!validateConstantBusLimitations(Inst, Operands)) { 4141 return false; 4142 } 4143 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4144 return false; 4145 } 4146 if (!validateIntClampSupported(Inst)) { 4147 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4148 "integer clamping is not supported on this GPU"); 4149 return false; 4150 } 4151 if (!validateOpSel(Inst)) { 4152 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4153 "invalid op_sel operand"); 4154 return false; 4155 } 4156 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4157 if (!validateMIMGD16(Inst)) { 4158 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4159 "d16 modifier is not supported on this GPU"); 4160 return false; 4161 } 4162 if (!validateMIMGDim(Inst)) { 4163 Error(IDLoc, "dim modifier is required on this GPU"); 4164 return false; 4165 } 4166 if (!validateMIMGMSAA(Inst)) { 4167 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4168 "invalid dim; must be MSAA type"); 4169 return false; 4170 } 4171 if (!validateMIMGDataSize(Inst)) { 4172 Error(IDLoc, 4173 "image data size does not match dmask and tfe"); 4174 return false; 4175 } 4176 if (!validateMIMGAddrSize(Inst)) { 4177 Error(IDLoc, 4178 "image address size does not match dim and a16"); 4179 return false; 4180 } 4181 if (!validateMIMGAtomicDMask(Inst)) { 4182 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4183 "invalid atomic image dmask"); 4184 return false; 4185 } 4186 if (!validateMIMGGatherDMask(Inst)) { 4187 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4188 "invalid image_gather dmask: only one bit must be set"); 4189 return false; 4190 } 4191 if (!validateMovrels(Inst, Operands)) { 4192 return false; 4193 } 4194 if (!validateFlatOffset(Inst, Operands)) { 4195 return false; 4196 } 4197 if (!validateSMEMOffset(Inst, Operands)) { 4198 return false; 4199 } 4200 if (!validateMAIAccWrite(Inst, Operands)) { 4201 return false; 4202 } 4203 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4204 return false; 4205 } 4206 4207 if (!validateAGPRLdSt(Inst)) { 4208 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4209 ? "invalid register class: data and dst should be all VGPR or AGPR" 4210 : "invalid register class: agpr loads and stores not supported on this GPU" 4211 ); 4212 return false; 4213 } 4214 if (!validateVGPRAlign(Inst)) { 4215 Error(IDLoc, 4216 "invalid register class: vgpr tuples must be 64 bit aligned"); 4217 return false; 4218 } 4219 4220 if (!validateDivScale(Inst)) { 4221 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4222 return false; 4223 } 4224 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4225 return false; 4226 } 4227 4228 return true; 4229 } 4230 4231 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4232 const FeatureBitset &FBS, 4233 unsigned VariantID = 0); 4234 4235 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4236 const FeatureBitset &AvailableFeatures, 4237 unsigned VariantID); 4238 4239 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4240 const FeatureBitset &FBS) { 4241 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4242 } 4243 4244 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4245 const FeatureBitset &FBS, 4246 ArrayRef<unsigned> Variants) { 4247 for (auto Variant : Variants) { 4248 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4249 return true; 4250 } 4251 4252 return false; 4253 } 4254 4255 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4256 const SMLoc &IDLoc) { 4257 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4258 4259 // Check if requested instruction variant is supported. 4260 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4261 return false; 4262 4263 // This instruction is not supported. 4264 // Clear any other pending errors because they are no longer relevant. 4265 getParser().clearPendingErrors(); 4266 4267 // Requested instruction variant is not supported. 4268 // Check if any other variants are supported. 4269 StringRef VariantName = getMatchedVariantName(); 4270 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4271 return Error(IDLoc, 4272 Twine(VariantName, 4273 " variant of this instruction is not supported")); 4274 } 4275 4276 // Finally check if this instruction is supported on any other GPU. 4277 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4278 return Error(IDLoc, "instruction not supported on this GPU"); 4279 } 4280 4281 // Instruction not supported on any GPU. Probably a typo. 4282 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4283 return Error(IDLoc, "invalid instruction" + Suggestion); 4284 } 4285 4286 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4287 OperandVector &Operands, 4288 MCStreamer &Out, 4289 uint64_t &ErrorInfo, 4290 bool MatchingInlineAsm) { 4291 MCInst Inst; 4292 unsigned Result = Match_Success; 4293 for (auto Variant : getMatchedVariants()) { 4294 uint64_t EI; 4295 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4296 Variant); 4297 // We order match statuses from least to most specific. We use most specific 4298 // status as resulting 4299 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4300 if ((R == Match_Success) || 4301 (R == Match_PreferE32) || 4302 (R == Match_MissingFeature && Result != Match_PreferE32) || 4303 (R == Match_InvalidOperand && Result != Match_MissingFeature 4304 && Result != Match_PreferE32) || 4305 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4306 && Result != Match_MissingFeature 4307 && Result != Match_PreferE32)) { 4308 Result = R; 4309 ErrorInfo = EI; 4310 } 4311 if (R == Match_Success) 4312 break; 4313 } 4314 4315 if (Result == Match_Success) { 4316 if (!validateInstruction(Inst, IDLoc, Operands)) { 4317 return true; 4318 } 4319 Inst.setLoc(IDLoc); 4320 Out.emitInstruction(Inst, getSTI()); 4321 return false; 4322 } 4323 4324 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4325 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4326 return true; 4327 } 4328 4329 switch (Result) { 4330 default: break; 4331 case Match_MissingFeature: 4332 // It has been verified that the specified instruction 4333 // mnemonic is valid. A match was found but it requires 4334 // features which are not supported on this GPU. 4335 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4336 4337 case Match_InvalidOperand: { 4338 SMLoc ErrorLoc = IDLoc; 4339 if (ErrorInfo != ~0ULL) { 4340 if (ErrorInfo >= Operands.size()) { 4341 return Error(IDLoc, "too few operands for instruction"); 4342 } 4343 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4344 if (ErrorLoc == SMLoc()) 4345 ErrorLoc = IDLoc; 4346 } 4347 return Error(ErrorLoc, "invalid operand for instruction"); 4348 } 4349 4350 case Match_PreferE32: 4351 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4352 "should be encoded as e32"); 4353 case Match_MnemonicFail: 4354 llvm_unreachable("Invalid instructions should have been handled already"); 4355 } 4356 llvm_unreachable("Implement any new match types added!"); 4357 } 4358 4359 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4360 int64_t Tmp = -1; 4361 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4362 return true; 4363 } 4364 if (getParser().parseAbsoluteExpression(Tmp)) { 4365 return true; 4366 } 4367 Ret = static_cast<uint32_t>(Tmp); 4368 return false; 4369 } 4370 4371 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4372 uint32_t &Minor) { 4373 if (ParseAsAbsoluteExpression(Major)) 4374 return TokError("invalid major version"); 4375 4376 if (!trySkipToken(AsmToken::Comma)) 4377 return TokError("minor version number required, comma expected"); 4378 4379 if (ParseAsAbsoluteExpression(Minor)) 4380 return TokError("invalid minor version"); 4381 4382 return false; 4383 } 4384 4385 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4386 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4387 return TokError("directive only supported for amdgcn architecture"); 4388 4389 std::string Target; 4390 4391 SMLoc TargetStart = getLoc(); 4392 if (getParser().parseEscapedString(Target)) 4393 return true; 4394 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4395 4396 std::string ExpectedTarget; 4397 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4398 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4399 4400 if (Target != ExpectedTargetOS.str()) 4401 return Error(TargetRange.Start, "target must match options", TargetRange); 4402 4403 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4404 return false; 4405 } 4406 4407 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4408 return Error(Range.Start, "value out of range", Range); 4409 } 4410 4411 bool AMDGPUAsmParser::calculateGPRBlocks( 4412 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4413 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4414 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4415 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4416 // TODO(scott.linder): These calculations are duplicated from 4417 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4418 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4419 4420 unsigned NumVGPRs = NextFreeVGPR; 4421 unsigned NumSGPRs = NextFreeSGPR; 4422 4423 if (Version.Major >= 10) 4424 NumSGPRs = 0; 4425 else { 4426 unsigned MaxAddressableNumSGPRs = 4427 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4428 4429 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4430 NumSGPRs > MaxAddressableNumSGPRs) 4431 return OutOfRangeError(SGPRRange); 4432 4433 NumSGPRs += 4434 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4435 4436 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4437 NumSGPRs > MaxAddressableNumSGPRs) 4438 return OutOfRangeError(SGPRRange); 4439 4440 if (Features.test(FeatureSGPRInitBug)) 4441 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4442 } 4443 4444 VGPRBlocks = 4445 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4446 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4447 4448 return false; 4449 } 4450 4451 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4452 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4453 return TokError("directive only supported for amdgcn architecture"); 4454 4455 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4456 return TokError("directive only supported for amdhsa OS"); 4457 4458 StringRef KernelName; 4459 if (getParser().parseIdentifier(KernelName)) 4460 return true; 4461 4462 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4463 4464 StringSet<> Seen; 4465 4466 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4467 4468 SMRange VGPRRange; 4469 uint64_t NextFreeVGPR = 0; 4470 uint64_t AccumOffset = 0; 4471 SMRange SGPRRange; 4472 uint64_t NextFreeSGPR = 0; 4473 unsigned UserSGPRCount = 0; 4474 bool ReserveVCC = true; 4475 bool ReserveFlatScr = true; 4476 bool ReserveXNACK = hasXNACK(); 4477 Optional<bool> EnableWavefrontSize32; 4478 4479 while (true) { 4480 while (trySkipToken(AsmToken::EndOfStatement)); 4481 4482 StringRef ID; 4483 SMRange IDRange = getTok().getLocRange(); 4484 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4485 return true; 4486 4487 if (ID == ".end_amdhsa_kernel") 4488 break; 4489 4490 if (Seen.find(ID) != Seen.end()) 4491 return TokError(".amdhsa_ directives cannot be repeated"); 4492 Seen.insert(ID); 4493 4494 SMLoc ValStart = getLoc(); 4495 int64_t IVal; 4496 if (getParser().parseAbsoluteExpression(IVal)) 4497 return true; 4498 SMLoc ValEnd = getLoc(); 4499 SMRange ValRange = SMRange(ValStart, ValEnd); 4500 4501 if (IVal < 0) 4502 return OutOfRangeError(ValRange); 4503 4504 uint64_t Val = IVal; 4505 4506 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4507 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4508 return OutOfRangeError(RANGE); \ 4509 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4510 4511 if (ID == ".amdhsa_group_segment_fixed_size") { 4512 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4513 return OutOfRangeError(ValRange); 4514 KD.group_segment_fixed_size = Val; 4515 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4516 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4517 return OutOfRangeError(ValRange); 4518 KD.private_segment_fixed_size = Val; 4519 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4520 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4521 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4522 Val, ValRange); 4523 if (Val) 4524 UserSGPRCount += 4; 4525 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4526 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4527 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4528 ValRange); 4529 if (Val) 4530 UserSGPRCount += 2; 4531 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4532 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4533 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4534 ValRange); 4535 if (Val) 4536 UserSGPRCount += 2; 4537 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4538 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4539 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4540 Val, ValRange); 4541 if (Val) 4542 UserSGPRCount += 2; 4543 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4544 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4545 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4546 ValRange); 4547 if (Val) 4548 UserSGPRCount += 2; 4549 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4550 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4552 ValRange); 4553 if (Val) 4554 UserSGPRCount += 2; 4555 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4556 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4557 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4558 Val, ValRange); 4559 if (Val) 4560 UserSGPRCount += 1; 4561 } else if (ID == ".amdhsa_wavefront_size32") { 4562 if (IVersion.Major < 10) 4563 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4564 EnableWavefrontSize32 = Val; 4565 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4566 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4567 Val, ValRange); 4568 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4569 PARSE_BITS_ENTRY( 4570 KD.compute_pgm_rsrc2, 4571 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4572 ValRange); 4573 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4574 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4575 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4576 ValRange); 4577 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4578 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4579 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4580 ValRange); 4581 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4582 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4583 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4584 ValRange); 4585 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4586 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4587 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4588 ValRange); 4589 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4590 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4591 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4592 ValRange); 4593 } else if (ID == ".amdhsa_next_free_vgpr") { 4594 VGPRRange = ValRange; 4595 NextFreeVGPR = Val; 4596 } else if (ID == ".amdhsa_next_free_sgpr") { 4597 SGPRRange = ValRange; 4598 NextFreeSGPR = Val; 4599 } else if (ID == ".amdhsa_accum_offset") { 4600 if (!isGFX90A()) 4601 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4602 AccumOffset = Val; 4603 } else if (ID == ".amdhsa_reserve_vcc") { 4604 if (!isUInt<1>(Val)) 4605 return OutOfRangeError(ValRange); 4606 ReserveVCC = Val; 4607 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4608 if (IVersion.Major < 7) 4609 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4610 if (!isUInt<1>(Val)) 4611 return OutOfRangeError(ValRange); 4612 ReserveFlatScr = Val; 4613 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4614 if (IVersion.Major < 8) 4615 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4616 if (!isUInt<1>(Val)) 4617 return OutOfRangeError(ValRange); 4618 ReserveXNACK = Val; 4619 } else if (ID == ".amdhsa_float_round_mode_32") { 4620 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4621 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4622 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4623 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4624 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4625 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4626 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4627 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4628 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4629 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4630 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4631 ValRange); 4632 } else if (ID == ".amdhsa_dx10_clamp") { 4633 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4634 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4635 } else if (ID == ".amdhsa_ieee_mode") { 4636 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4637 Val, ValRange); 4638 } else if (ID == ".amdhsa_fp16_overflow") { 4639 if (IVersion.Major < 9) 4640 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4641 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4642 ValRange); 4643 } else if (ID == ".amdhsa_tg_split") { 4644 if (!isGFX90A()) 4645 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4646 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4647 ValRange); 4648 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4649 if (IVersion.Major < 10) 4650 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4651 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4652 ValRange); 4653 } else if (ID == ".amdhsa_memory_ordered") { 4654 if (IVersion.Major < 10) 4655 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4656 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4657 ValRange); 4658 } else if (ID == ".amdhsa_forward_progress") { 4659 if (IVersion.Major < 10) 4660 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4661 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4662 ValRange); 4663 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4664 PARSE_BITS_ENTRY( 4665 KD.compute_pgm_rsrc2, 4666 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4667 ValRange); 4668 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4670 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4671 Val, ValRange); 4672 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4673 PARSE_BITS_ENTRY( 4674 KD.compute_pgm_rsrc2, 4675 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4676 ValRange); 4677 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4678 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4680 Val, ValRange); 4681 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4683 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4684 Val, ValRange); 4685 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4687 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4688 Val, ValRange); 4689 } else if (ID == ".amdhsa_exception_int_div_zero") { 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4691 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4692 Val, ValRange); 4693 } else { 4694 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4695 } 4696 4697 #undef PARSE_BITS_ENTRY 4698 } 4699 4700 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4701 return TokError(".amdhsa_next_free_vgpr directive is required"); 4702 4703 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4704 return TokError(".amdhsa_next_free_sgpr directive is required"); 4705 4706 unsigned VGPRBlocks; 4707 unsigned SGPRBlocks; 4708 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4709 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4710 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4711 SGPRBlocks)) 4712 return true; 4713 4714 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4715 VGPRBlocks)) 4716 return OutOfRangeError(VGPRRange); 4717 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4718 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4719 4720 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4721 SGPRBlocks)) 4722 return OutOfRangeError(SGPRRange); 4723 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4724 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4725 SGPRBlocks); 4726 4727 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4728 return TokError("too many user SGPRs enabled"); 4729 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4730 UserSGPRCount); 4731 4732 if (isGFX90A()) { 4733 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4734 return TokError(".amdhsa_accum_offset directive is required"); 4735 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4736 return TokError("accum_offset should be in range [4..256] in " 4737 "increments of 4"); 4738 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4739 return TokError("accum_offset exceeds total VGPR allocation"); 4740 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4741 (AccumOffset / 4 - 1)); 4742 } 4743 4744 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4745 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4746 ReserveFlatScr, ReserveXNACK); 4747 return false; 4748 } 4749 4750 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4751 uint32_t Major; 4752 uint32_t Minor; 4753 4754 if (ParseDirectiveMajorMinor(Major, Minor)) 4755 return true; 4756 4757 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4758 return false; 4759 } 4760 4761 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4762 uint32_t Major; 4763 uint32_t Minor; 4764 uint32_t Stepping; 4765 StringRef VendorName; 4766 StringRef ArchName; 4767 4768 // If this directive has no arguments, then use the ISA version for the 4769 // targeted GPU. 4770 if (isToken(AsmToken::EndOfStatement)) { 4771 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4772 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4773 ISA.Stepping, 4774 "AMD", "AMDGPU"); 4775 return false; 4776 } 4777 4778 if (ParseDirectiveMajorMinor(Major, Minor)) 4779 return true; 4780 4781 if (!trySkipToken(AsmToken::Comma)) 4782 return TokError("stepping version number required, comma expected"); 4783 4784 if (ParseAsAbsoluteExpression(Stepping)) 4785 return TokError("invalid stepping version"); 4786 4787 if (!trySkipToken(AsmToken::Comma)) 4788 return TokError("vendor name required, comma expected"); 4789 4790 if (!parseString(VendorName, "invalid vendor name")) 4791 return true; 4792 4793 if (!trySkipToken(AsmToken::Comma)) 4794 return TokError("arch name required, comma expected"); 4795 4796 if (!parseString(ArchName, "invalid arch name")) 4797 return true; 4798 4799 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4800 VendorName, ArchName); 4801 return false; 4802 } 4803 4804 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4805 amd_kernel_code_t &Header) { 4806 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4807 // assembly for backwards compatibility. 4808 if (ID == "max_scratch_backing_memory_byte_size") { 4809 Parser.eatToEndOfStatement(); 4810 return false; 4811 } 4812 4813 SmallString<40> ErrStr; 4814 raw_svector_ostream Err(ErrStr); 4815 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4816 return TokError(Err.str()); 4817 } 4818 Lex(); 4819 4820 if (ID == "enable_wavefront_size32") { 4821 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4822 if (!isGFX10Plus()) 4823 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4824 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4825 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4826 } else { 4827 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4828 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4829 } 4830 } 4831 4832 if (ID == "wavefront_size") { 4833 if (Header.wavefront_size == 5) { 4834 if (!isGFX10Plus()) 4835 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4836 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4837 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4838 } else if (Header.wavefront_size == 6) { 4839 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4840 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4841 } 4842 } 4843 4844 if (ID == "enable_wgp_mode") { 4845 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4846 !isGFX10Plus()) 4847 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4848 } 4849 4850 if (ID == "enable_mem_ordered") { 4851 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4852 !isGFX10Plus()) 4853 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4854 } 4855 4856 if (ID == "enable_fwd_progress") { 4857 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4858 !isGFX10Plus()) 4859 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4860 } 4861 4862 return false; 4863 } 4864 4865 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4866 amd_kernel_code_t Header; 4867 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4868 4869 while (true) { 4870 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4871 // will set the current token to EndOfStatement. 4872 while(trySkipToken(AsmToken::EndOfStatement)); 4873 4874 StringRef ID; 4875 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4876 return true; 4877 4878 if (ID == ".end_amd_kernel_code_t") 4879 break; 4880 4881 if (ParseAMDKernelCodeTValue(ID, Header)) 4882 return true; 4883 } 4884 4885 getTargetStreamer().EmitAMDKernelCodeT(Header); 4886 4887 return false; 4888 } 4889 4890 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4891 StringRef KernelName; 4892 if (!parseId(KernelName, "expected symbol name")) 4893 return true; 4894 4895 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4896 ELF::STT_AMDGPU_HSA_KERNEL); 4897 4898 KernelScope.initialize(getContext()); 4899 return false; 4900 } 4901 4902 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4903 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4904 return Error(getLoc(), 4905 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4906 "architectures"); 4907 } 4908 4909 auto ISAVersionStringFromASM = getToken().getStringContents(); 4910 4911 std::string ISAVersionStringFromSTI; 4912 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4913 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4914 4915 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4916 return Error(getLoc(), 4917 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4918 "arguments specified through the command line"); 4919 } 4920 4921 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4922 Lex(); 4923 4924 return false; 4925 } 4926 4927 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4928 const char *AssemblerDirectiveBegin; 4929 const char *AssemblerDirectiveEnd; 4930 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4931 isHsaAbiVersion3(&getSTI()) 4932 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4933 HSAMD::V3::AssemblerDirectiveEnd) 4934 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4935 HSAMD::AssemblerDirectiveEnd); 4936 4937 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4938 return Error(getLoc(), 4939 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4940 "not available on non-amdhsa OSes")).str()); 4941 } 4942 4943 std::string HSAMetadataString; 4944 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4945 HSAMetadataString)) 4946 return true; 4947 4948 if (isHsaAbiVersion3(&getSTI())) { 4949 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4950 return Error(getLoc(), "invalid HSA metadata"); 4951 } else { 4952 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4953 return Error(getLoc(), "invalid HSA metadata"); 4954 } 4955 4956 return false; 4957 } 4958 4959 /// Common code to parse out a block of text (typically YAML) between start and 4960 /// end directives. 4961 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4962 const char *AssemblerDirectiveEnd, 4963 std::string &CollectString) { 4964 4965 raw_string_ostream CollectStream(CollectString); 4966 4967 getLexer().setSkipSpace(false); 4968 4969 bool FoundEnd = false; 4970 while (!isToken(AsmToken::Eof)) { 4971 while (isToken(AsmToken::Space)) { 4972 CollectStream << getTokenStr(); 4973 Lex(); 4974 } 4975 4976 if (trySkipId(AssemblerDirectiveEnd)) { 4977 FoundEnd = true; 4978 break; 4979 } 4980 4981 CollectStream << Parser.parseStringToEndOfStatement() 4982 << getContext().getAsmInfo()->getSeparatorString(); 4983 4984 Parser.eatToEndOfStatement(); 4985 } 4986 4987 getLexer().setSkipSpace(true); 4988 4989 if (isToken(AsmToken::Eof) && !FoundEnd) { 4990 return TokError(Twine("expected directive ") + 4991 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4992 } 4993 4994 CollectStream.flush(); 4995 return false; 4996 } 4997 4998 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4999 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5000 std::string String; 5001 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5002 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5003 return true; 5004 5005 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5006 if (!PALMetadata->setFromString(String)) 5007 return Error(getLoc(), "invalid PAL metadata"); 5008 return false; 5009 } 5010 5011 /// Parse the assembler directive for old linear-format PAL metadata. 5012 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5013 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5014 return Error(getLoc(), 5015 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5016 "not available on non-amdpal OSes")).str()); 5017 } 5018 5019 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5020 PALMetadata->setLegacy(); 5021 for (;;) { 5022 uint32_t Key, Value; 5023 if (ParseAsAbsoluteExpression(Key)) { 5024 return TokError(Twine("invalid value in ") + 5025 Twine(PALMD::AssemblerDirective)); 5026 } 5027 if (!trySkipToken(AsmToken::Comma)) { 5028 return TokError(Twine("expected an even number of values in ") + 5029 Twine(PALMD::AssemblerDirective)); 5030 } 5031 if (ParseAsAbsoluteExpression(Value)) { 5032 return TokError(Twine("invalid value in ") + 5033 Twine(PALMD::AssemblerDirective)); 5034 } 5035 PALMetadata->setRegister(Key, Value); 5036 if (!trySkipToken(AsmToken::Comma)) 5037 break; 5038 } 5039 return false; 5040 } 5041 5042 /// ParseDirectiveAMDGPULDS 5043 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5044 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5045 if (getParser().checkForValidSection()) 5046 return true; 5047 5048 StringRef Name; 5049 SMLoc NameLoc = getLoc(); 5050 if (getParser().parseIdentifier(Name)) 5051 return TokError("expected identifier in directive"); 5052 5053 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5054 if (parseToken(AsmToken::Comma, "expected ','")) 5055 return true; 5056 5057 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5058 5059 int64_t Size; 5060 SMLoc SizeLoc = getLoc(); 5061 if (getParser().parseAbsoluteExpression(Size)) 5062 return true; 5063 if (Size < 0) 5064 return Error(SizeLoc, "size must be non-negative"); 5065 if (Size > LocalMemorySize) 5066 return Error(SizeLoc, "size is too large"); 5067 5068 int64_t Alignment = 4; 5069 if (trySkipToken(AsmToken::Comma)) { 5070 SMLoc AlignLoc = getLoc(); 5071 if (getParser().parseAbsoluteExpression(Alignment)) 5072 return true; 5073 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5074 return Error(AlignLoc, "alignment must be a power of two"); 5075 5076 // Alignment larger than the size of LDS is possible in theory, as long 5077 // as the linker manages to place to symbol at address 0, but we do want 5078 // to make sure the alignment fits nicely into a 32-bit integer. 5079 if (Alignment >= 1u << 31) 5080 return Error(AlignLoc, "alignment is too large"); 5081 } 5082 5083 if (parseToken(AsmToken::EndOfStatement, 5084 "unexpected token in '.amdgpu_lds' directive")) 5085 return true; 5086 5087 Symbol->redefineIfPossible(); 5088 if (!Symbol->isUndefined()) 5089 return Error(NameLoc, "invalid symbol redefinition"); 5090 5091 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5092 return false; 5093 } 5094 5095 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5096 StringRef IDVal = DirectiveID.getString(); 5097 5098 if (isHsaAbiVersion3(&getSTI())) { 5099 if (IDVal == ".amdgcn_target") 5100 return ParseDirectiveAMDGCNTarget(); 5101 5102 if (IDVal == ".amdhsa_kernel") 5103 return ParseDirectiveAMDHSAKernel(); 5104 5105 // TODO: Restructure/combine with PAL metadata directive. 5106 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5107 return ParseDirectiveHSAMetadata(); 5108 } else { 5109 if (IDVal == ".hsa_code_object_version") 5110 return ParseDirectiveHSACodeObjectVersion(); 5111 5112 if (IDVal == ".hsa_code_object_isa") 5113 return ParseDirectiveHSACodeObjectISA(); 5114 5115 if (IDVal == ".amd_kernel_code_t") 5116 return ParseDirectiveAMDKernelCodeT(); 5117 5118 if (IDVal == ".amdgpu_hsa_kernel") 5119 return ParseDirectiveAMDGPUHsaKernel(); 5120 5121 if (IDVal == ".amd_amdgpu_isa") 5122 return ParseDirectiveISAVersion(); 5123 5124 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5125 return ParseDirectiveHSAMetadata(); 5126 } 5127 5128 if (IDVal == ".amdgpu_lds") 5129 return ParseDirectiveAMDGPULDS(); 5130 5131 if (IDVal == PALMD::AssemblerDirectiveBegin) 5132 return ParseDirectivePALMetadataBegin(); 5133 5134 if (IDVal == PALMD::AssemblerDirective) 5135 return ParseDirectivePALMetadata(); 5136 5137 return true; 5138 } 5139 5140 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5141 unsigned RegNo) const { 5142 5143 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5144 R.isValid(); ++R) { 5145 if (*R == RegNo) 5146 return isGFX9Plus(); 5147 } 5148 5149 // GFX10 has 2 more SGPRs 104 and 105. 5150 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5151 R.isValid(); ++R) { 5152 if (*R == RegNo) 5153 return hasSGPR104_SGPR105(); 5154 } 5155 5156 switch (RegNo) { 5157 case AMDGPU::SRC_SHARED_BASE: 5158 case AMDGPU::SRC_SHARED_LIMIT: 5159 case AMDGPU::SRC_PRIVATE_BASE: 5160 case AMDGPU::SRC_PRIVATE_LIMIT: 5161 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5162 return isGFX9Plus(); 5163 case AMDGPU::TBA: 5164 case AMDGPU::TBA_LO: 5165 case AMDGPU::TBA_HI: 5166 case AMDGPU::TMA: 5167 case AMDGPU::TMA_LO: 5168 case AMDGPU::TMA_HI: 5169 return !isGFX9Plus(); 5170 case AMDGPU::XNACK_MASK: 5171 case AMDGPU::XNACK_MASK_LO: 5172 case AMDGPU::XNACK_MASK_HI: 5173 return (isVI() || isGFX9()) && hasXNACK(); 5174 case AMDGPU::SGPR_NULL: 5175 return isGFX10Plus(); 5176 default: 5177 break; 5178 } 5179 5180 if (isCI()) 5181 return true; 5182 5183 if (isSI() || isGFX10Plus()) { 5184 // No flat_scr on SI. 5185 // On GFX10 flat scratch is not a valid register operand and can only be 5186 // accessed with s_setreg/s_getreg. 5187 switch (RegNo) { 5188 case AMDGPU::FLAT_SCR: 5189 case AMDGPU::FLAT_SCR_LO: 5190 case AMDGPU::FLAT_SCR_HI: 5191 return false; 5192 default: 5193 return true; 5194 } 5195 } 5196 5197 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5198 // SI/CI have. 5199 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5200 R.isValid(); ++R) { 5201 if (*R == RegNo) 5202 return hasSGPR102_SGPR103(); 5203 } 5204 5205 return true; 5206 } 5207 5208 OperandMatchResultTy 5209 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5210 OperandMode Mode) { 5211 // Try to parse with a custom parser 5212 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5213 5214 // If we successfully parsed the operand or if there as an error parsing, 5215 // we are done. 5216 // 5217 // If we are parsing after we reach EndOfStatement then this means we 5218 // are appending default values to the Operands list. This is only done 5219 // by custom parser, so we shouldn't continue on to the generic parsing. 5220 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5221 isToken(AsmToken::EndOfStatement)) 5222 return ResTy; 5223 5224 SMLoc RBraceLoc; 5225 SMLoc LBraceLoc = getLoc(); 5226 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5227 unsigned Prefix = Operands.size(); 5228 5229 for (;;) { 5230 auto Loc = getLoc(); 5231 ResTy = parseReg(Operands); 5232 if (ResTy == MatchOperand_NoMatch) 5233 Error(Loc, "expected a register"); 5234 if (ResTy != MatchOperand_Success) 5235 return MatchOperand_ParseFail; 5236 5237 RBraceLoc = getLoc(); 5238 if (trySkipToken(AsmToken::RBrac)) 5239 break; 5240 5241 if (!skipToken(AsmToken::Comma, 5242 "expected a comma or a closing square bracket")) { 5243 return MatchOperand_ParseFail; 5244 } 5245 } 5246 5247 if (Operands.size() - Prefix > 1) { 5248 Operands.insert(Operands.begin() + Prefix, 5249 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5250 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5251 } 5252 5253 return MatchOperand_Success; 5254 } 5255 5256 return parseRegOrImm(Operands); 5257 } 5258 5259 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5260 // Clear any forced encodings from the previous instruction. 5261 setForcedEncodingSize(0); 5262 setForcedDPP(false); 5263 setForcedSDWA(false); 5264 5265 if (Name.endswith("_e64")) { 5266 setForcedEncodingSize(64); 5267 return Name.substr(0, Name.size() - 4); 5268 } else if (Name.endswith("_e32")) { 5269 setForcedEncodingSize(32); 5270 return Name.substr(0, Name.size() - 4); 5271 } else if (Name.endswith("_dpp")) { 5272 setForcedDPP(true); 5273 return Name.substr(0, Name.size() - 4); 5274 } else if (Name.endswith("_sdwa")) { 5275 setForcedSDWA(true); 5276 return Name.substr(0, Name.size() - 5); 5277 } 5278 return Name; 5279 } 5280 5281 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5282 StringRef Name, 5283 SMLoc NameLoc, OperandVector &Operands) { 5284 // Add the instruction mnemonic 5285 Name = parseMnemonicSuffix(Name); 5286 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5287 5288 bool IsMIMG = Name.startswith("image_"); 5289 5290 while (!trySkipToken(AsmToken::EndOfStatement)) { 5291 OperandMode Mode = OperandMode_Default; 5292 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5293 Mode = OperandMode_NSA; 5294 CPolSeen = 0; 5295 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5296 5297 if (Res != MatchOperand_Success) { 5298 checkUnsupportedInstruction(Name, NameLoc); 5299 if (!Parser.hasPendingError()) { 5300 // FIXME: use real operand location rather than the current location. 5301 StringRef Msg = 5302 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5303 "not a valid operand."; 5304 Error(getLoc(), Msg); 5305 } 5306 while (!trySkipToken(AsmToken::EndOfStatement)) { 5307 lex(); 5308 } 5309 return true; 5310 } 5311 5312 // Eat the comma or space if there is one. 5313 trySkipToken(AsmToken::Comma); 5314 } 5315 5316 return false; 5317 } 5318 5319 //===----------------------------------------------------------------------===// 5320 // Utility functions 5321 //===----------------------------------------------------------------------===// 5322 5323 OperandMatchResultTy 5324 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5325 5326 if (!trySkipId(Prefix, AsmToken::Colon)) 5327 return MatchOperand_NoMatch; 5328 5329 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5330 } 5331 5332 OperandMatchResultTy 5333 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5334 AMDGPUOperand::ImmTy ImmTy, 5335 bool (*ConvertResult)(int64_t&)) { 5336 SMLoc S = getLoc(); 5337 int64_t Value = 0; 5338 5339 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5340 if (Res != MatchOperand_Success) 5341 return Res; 5342 5343 if (ConvertResult && !ConvertResult(Value)) { 5344 Error(S, "invalid " + StringRef(Prefix) + " value."); 5345 } 5346 5347 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5348 return MatchOperand_Success; 5349 } 5350 5351 OperandMatchResultTy 5352 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5353 OperandVector &Operands, 5354 AMDGPUOperand::ImmTy ImmTy, 5355 bool (*ConvertResult)(int64_t&)) { 5356 SMLoc S = getLoc(); 5357 if (!trySkipId(Prefix, AsmToken::Colon)) 5358 return MatchOperand_NoMatch; 5359 5360 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5361 return MatchOperand_ParseFail; 5362 5363 unsigned Val = 0; 5364 const unsigned MaxSize = 4; 5365 5366 // FIXME: How to verify the number of elements matches the number of src 5367 // operands? 5368 for (int I = 0; ; ++I) { 5369 int64_t Op; 5370 SMLoc Loc = getLoc(); 5371 if (!parseExpr(Op)) 5372 return MatchOperand_ParseFail; 5373 5374 if (Op != 0 && Op != 1) { 5375 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5376 return MatchOperand_ParseFail; 5377 } 5378 5379 Val |= (Op << I); 5380 5381 if (trySkipToken(AsmToken::RBrac)) 5382 break; 5383 5384 if (I + 1 == MaxSize) { 5385 Error(getLoc(), "expected a closing square bracket"); 5386 return MatchOperand_ParseFail; 5387 } 5388 5389 if (!skipToken(AsmToken::Comma, "expected a comma")) 5390 return MatchOperand_ParseFail; 5391 } 5392 5393 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5394 return MatchOperand_Success; 5395 } 5396 5397 OperandMatchResultTy 5398 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5399 AMDGPUOperand::ImmTy ImmTy) { 5400 int64_t Bit; 5401 SMLoc S = getLoc(); 5402 5403 if (trySkipId(Name)) { 5404 Bit = 1; 5405 } else if (trySkipId("no", Name)) { 5406 Bit = 0; 5407 } else { 5408 return MatchOperand_NoMatch; 5409 } 5410 5411 if (Name == "r128" && !hasMIMG_R128()) { 5412 Error(S, "r128 modifier is not supported on this GPU"); 5413 return MatchOperand_ParseFail; 5414 } 5415 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5416 Error(S, "a16 modifier is not supported on this GPU"); 5417 return MatchOperand_ParseFail; 5418 } 5419 5420 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5421 ImmTy = AMDGPUOperand::ImmTyR128A16; 5422 5423 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5424 return MatchOperand_Success; 5425 } 5426 5427 OperandMatchResultTy 5428 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5429 unsigned CPolOn = 0; 5430 unsigned CPolOff = 0; 5431 SMLoc S = getLoc(); 5432 5433 if (trySkipId("glc")) 5434 CPolOn = AMDGPU::CPol::GLC; 5435 else if (trySkipId("noglc")) 5436 CPolOff = AMDGPU::CPol::GLC; 5437 else if (trySkipId("slc")) 5438 CPolOn = AMDGPU::CPol::SLC; 5439 else if (trySkipId("noslc")) 5440 CPolOff = AMDGPU::CPol::SLC; 5441 else if (trySkipId("dlc")) 5442 CPolOn = AMDGPU::CPol::DLC; 5443 else if (trySkipId("nodlc")) 5444 CPolOff = AMDGPU::CPol::DLC; 5445 else if (trySkipId("scc")) 5446 CPolOn = AMDGPU::CPol::SCC; 5447 else if (trySkipId("noscc")) 5448 CPolOff = AMDGPU::CPol::SCC; 5449 else 5450 return MatchOperand_NoMatch; 5451 5452 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5453 Error(S, "dlc modifier is not supported on this GPU"); 5454 return MatchOperand_ParseFail; 5455 } 5456 5457 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5458 Error(S, "scc modifier is not supported on this GPU"); 5459 return MatchOperand_ParseFail; 5460 } 5461 5462 if (CPolSeen & (CPolOn | CPolOff)) { 5463 Error(S, "duplicate cache policy modifier"); 5464 return MatchOperand_ParseFail; 5465 } 5466 5467 CPolSeen |= (CPolOn | CPolOff); 5468 5469 for (unsigned I = 1; I != Operands.size(); ++I) { 5470 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5471 if (Op.isCPol()) { 5472 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5473 return MatchOperand_Success; 5474 } 5475 } 5476 5477 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5478 AMDGPUOperand::ImmTyCPol)); 5479 5480 return MatchOperand_Success; 5481 } 5482 5483 static void addOptionalImmOperand( 5484 MCInst& Inst, const OperandVector& Operands, 5485 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5486 AMDGPUOperand::ImmTy ImmT, 5487 int64_t Default = 0) { 5488 auto i = OptionalIdx.find(ImmT); 5489 if (i != OptionalIdx.end()) { 5490 unsigned Idx = i->second; 5491 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5492 } else { 5493 Inst.addOperand(MCOperand::createImm(Default)); 5494 } 5495 } 5496 5497 OperandMatchResultTy 5498 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5499 StringRef &Value, 5500 SMLoc &StringLoc) { 5501 if (!trySkipId(Prefix, AsmToken::Colon)) 5502 return MatchOperand_NoMatch; 5503 5504 StringLoc = getLoc(); 5505 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5506 : MatchOperand_ParseFail; 5507 } 5508 5509 //===----------------------------------------------------------------------===// 5510 // MTBUF format 5511 //===----------------------------------------------------------------------===// 5512 5513 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5514 int64_t MaxVal, 5515 int64_t &Fmt) { 5516 int64_t Val; 5517 SMLoc Loc = getLoc(); 5518 5519 auto Res = parseIntWithPrefix(Pref, Val); 5520 if (Res == MatchOperand_ParseFail) 5521 return false; 5522 if (Res == MatchOperand_NoMatch) 5523 return true; 5524 5525 if (Val < 0 || Val > MaxVal) { 5526 Error(Loc, Twine("out of range ", StringRef(Pref))); 5527 return false; 5528 } 5529 5530 Fmt = Val; 5531 return true; 5532 } 5533 5534 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5535 // values to live in a joint format operand in the MCInst encoding. 5536 OperandMatchResultTy 5537 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5538 using namespace llvm::AMDGPU::MTBUFFormat; 5539 5540 int64_t Dfmt = DFMT_UNDEF; 5541 int64_t Nfmt = NFMT_UNDEF; 5542 5543 // dfmt and nfmt can appear in either order, and each is optional. 5544 for (int I = 0; I < 2; ++I) { 5545 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5546 return MatchOperand_ParseFail; 5547 5548 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5549 return MatchOperand_ParseFail; 5550 } 5551 // Skip optional comma between dfmt/nfmt 5552 // but guard against 2 commas following each other. 5553 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5554 !peekToken().is(AsmToken::Comma)) { 5555 trySkipToken(AsmToken::Comma); 5556 } 5557 } 5558 5559 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5560 return MatchOperand_NoMatch; 5561 5562 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5563 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5564 5565 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5566 return MatchOperand_Success; 5567 } 5568 5569 OperandMatchResultTy 5570 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5571 using namespace llvm::AMDGPU::MTBUFFormat; 5572 5573 int64_t Fmt = UFMT_UNDEF; 5574 5575 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5576 return MatchOperand_ParseFail; 5577 5578 if (Fmt == UFMT_UNDEF) 5579 return MatchOperand_NoMatch; 5580 5581 Format = Fmt; 5582 return MatchOperand_Success; 5583 } 5584 5585 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5586 int64_t &Nfmt, 5587 StringRef FormatStr, 5588 SMLoc Loc) { 5589 using namespace llvm::AMDGPU::MTBUFFormat; 5590 int64_t Format; 5591 5592 Format = getDfmt(FormatStr); 5593 if (Format != DFMT_UNDEF) { 5594 Dfmt = Format; 5595 return true; 5596 } 5597 5598 Format = getNfmt(FormatStr, getSTI()); 5599 if (Format != NFMT_UNDEF) { 5600 Nfmt = Format; 5601 return true; 5602 } 5603 5604 Error(Loc, "unsupported format"); 5605 return false; 5606 } 5607 5608 OperandMatchResultTy 5609 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5610 SMLoc FormatLoc, 5611 int64_t &Format) { 5612 using namespace llvm::AMDGPU::MTBUFFormat; 5613 5614 int64_t Dfmt = DFMT_UNDEF; 5615 int64_t Nfmt = NFMT_UNDEF; 5616 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5617 return MatchOperand_ParseFail; 5618 5619 if (trySkipToken(AsmToken::Comma)) { 5620 StringRef Str; 5621 SMLoc Loc = getLoc(); 5622 if (!parseId(Str, "expected a format string") || 5623 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5624 return MatchOperand_ParseFail; 5625 } 5626 if (Dfmt == DFMT_UNDEF) { 5627 Error(Loc, "duplicate numeric format"); 5628 return MatchOperand_ParseFail; 5629 } else if (Nfmt == NFMT_UNDEF) { 5630 Error(Loc, "duplicate data format"); 5631 return MatchOperand_ParseFail; 5632 } 5633 } 5634 5635 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5636 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5637 5638 if (isGFX10Plus()) { 5639 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5640 if (Ufmt == UFMT_UNDEF) { 5641 Error(FormatLoc, "unsupported format"); 5642 return MatchOperand_ParseFail; 5643 } 5644 Format = Ufmt; 5645 } else { 5646 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5647 } 5648 5649 return MatchOperand_Success; 5650 } 5651 5652 OperandMatchResultTy 5653 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5654 SMLoc Loc, 5655 int64_t &Format) { 5656 using namespace llvm::AMDGPU::MTBUFFormat; 5657 5658 auto Id = getUnifiedFormat(FormatStr); 5659 if (Id == UFMT_UNDEF) 5660 return MatchOperand_NoMatch; 5661 5662 if (!isGFX10Plus()) { 5663 Error(Loc, "unified format is not supported on this GPU"); 5664 return MatchOperand_ParseFail; 5665 } 5666 5667 Format = Id; 5668 return MatchOperand_Success; 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5673 using namespace llvm::AMDGPU::MTBUFFormat; 5674 SMLoc Loc = getLoc(); 5675 5676 if (!parseExpr(Format)) 5677 return MatchOperand_ParseFail; 5678 if (!isValidFormatEncoding(Format, getSTI())) { 5679 Error(Loc, "out of range format"); 5680 return MatchOperand_ParseFail; 5681 } 5682 5683 return MatchOperand_Success; 5684 } 5685 5686 OperandMatchResultTy 5687 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5688 using namespace llvm::AMDGPU::MTBUFFormat; 5689 5690 if (!trySkipId("format", AsmToken::Colon)) 5691 return MatchOperand_NoMatch; 5692 5693 if (trySkipToken(AsmToken::LBrac)) { 5694 StringRef FormatStr; 5695 SMLoc Loc = getLoc(); 5696 if (!parseId(FormatStr, "expected a format string")) 5697 return MatchOperand_ParseFail; 5698 5699 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5700 if (Res == MatchOperand_NoMatch) 5701 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5702 if (Res != MatchOperand_Success) 5703 return Res; 5704 5705 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5706 return MatchOperand_ParseFail; 5707 5708 return MatchOperand_Success; 5709 } 5710 5711 return parseNumericFormat(Format); 5712 } 5713 5714 OperandMatchResultTy 5715 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5716 using namespace llvm::AMDGPU::MTBUFFormat; 5717 5718 int64_t Format = getDefaultFormatEncoding(getSTI()); 5719 OperandMatchResultTy Res; 5720 SMLoc Loc = getLoc(); 5721 5722 // Parse legacy format syntax. 5723 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5724 if (Res == MatchOperand_ParseFail) 5725 return Res; 5726 5727 bool FormatFound = (Res == MatchOperand_Success); 5728 5729 Operands.push_back( 5730 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5731 5732 if (FormatFound) 5733 trySkipToken(AsmToken::Comma); 5734 5735 if (isToken(AsmToken::EndOfStatement)) { 5736 // We are expecting an soffset operand, 5737 // but let matcher handle the error. 5738 return MatchOperand_Success; 5739 } 5740 5741 // Parse soffset. 5742 Res = parseRegOrImm(Operands); 5743 if (Res != MatchOperand_Success) 5744 return Res; 5745 5746 trySkipToken(AsmToken::Comma); 5747 5748 if (!FormatFound) { 5749 Res = parseSymbolicOrNumericFormat(Format); 5750 if (Res == MatchOperand_ParseFail) 5751 return Res; 5752 if (Res == MatchOperand_Success) { 5753 auto Size = Operands.size(); 5754 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5755 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5756 Op.setImm(Format); 5757 } 5758 return MatchOperand_Success; 5759 } 5760 5761 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5762 Error(getLoc(), "duplicate format"); 5763 return MatchOperand_ParseFail; 5764 } 5765 return MatchOperand_Success; 5766 } 5767 5768 //===----------------------------------------------------------------------===// 5769 // ds 5770 //===----------------------------------------------------------------------===// 5771 5772 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5773 const OperandVector &Operands) { 5774 OptionalImmIndexMap OptionalIdx; 5775 5776 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5777 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5778 5779 // Add the register arguments 5780 if (Op.isReg()) { 5781 Op.addRegOperands(Inst, 1); 5782 continue; 5783 } 5784 5785 // Handle optional arguments 5786 OptionalIdx[Op.getImmTy()] = i; 5787 } 5788 5789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5792 5793 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5794 } 5795 5796 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5797 bool IsGdsHardcoded) { 5798 OptionalImmIndexMap OptionalIdx; 5799 5800 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5801 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5802 5803 // Add the register arguments 5804 if (Op.isReg()) { 5805 Op.addRegOperands(Inst, 1); 5806 continue; 5807 } 5808 5809 if (Op.isToken() && Op.getToken() == "gds") { 5810 IsGdsHardcoded = true; 5811 continue; 5812 } 5813 5814 // Handle optional arguments 5815 OptionalIdx[Op.getImmTy()] = i; 5816 } 5817 5818 AMDGPUOperand::ImmTy OffsetType = 5819 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5820 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5821 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5822 AMDGPUOperand::ImmTyOffset; 5823 5824 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5825 5826 if (!IsGdsHardcoded) { 5827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5828 } 5829 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5830 } 5831 5832 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5833 OptionalImmIndexMap OptionalIdx; 5834 5835 unsigned OperandIdx[4]; 5836 unsigned EnMask = 0; 5837 int SrcIdx = 0; 5838 5839 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5840 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5841 5842 // Add the register arguments 5843 if (Op.isReg()) { 5844 assert(SrcIdx < 4); 5845 OperandIdx[SrcIdx] = Inst.size(); 5846 Op.addRegOperands(Inst, 1); 5847 ++SrcIdx; 5848 continue; 5849 } 5850 5851 if (Op.isOff()) { 5852 assert(SrcIdx < 4); 5853 OperandIdx[SrcIdx] = Inst.size(); 5854 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5855 ++SrcIdx; 5856 continue; 5857 } 5858 5859 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5860 Op.addImmOperands(Inst, 1); 5861 continue; 5862 } 5863 5864 if (Op.isToken() && Op.getToken() == "done") 5865 continue; 5866 5867 // Handle optional arguments 5868 OptionalIdx[Op.getImmTy()] = i; 5869 } 5870 5871 assert(SrcIdx == 4); 5872 5873 bool Compr = false; 5874 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5875 Compr = true; 5876 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5877 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5878 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5879 } 5880 5881 for (auto i = 0; i < SrcIdx; ++i) { 5882 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5883 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5884 } 5885 } 5886 5887 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5889 5890 Inst.addOperand(MCOperand::createImm(EnMask)); 5891 } 5892 5893 //===----------------------------------------------------------------------===// 5894 // s_waitcnt 5895 //===----------------------------------------------------------------------===// 5896 5897 static bool 5898 encodeCnt( 5899 const AMDGPU::IsaVersion ISA, 5900 int64_t &IntVal, 5901 int64_t CntVal, 5902 bool Saturate, 5903 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5904 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5905 { 5906 bool Failed = false; 5907 5908 IntVal = encode(ISA, IntVal, CntVal); 5909 if (CntVal != decode(ISA, IntVal)) { 5910 if (Saturate) { 5911 IntVal = encode(ISA, IntVal, -1); 5912 } else { 5913 Failed = true; 5914 } 5915 } 5916 return Failed; 5917 } 5918 5919 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5920 5921 SMLoc CntLoc = getLoc(); 5922 StringRef CntName = getTokenStr(); 5923 5924 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5925 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5926 return false; 5927 5928 int64_t CntVal; 5929 SMLoc ValLoc = getLoc(); 5930 if (!parseExpr(CntVal)) 5931 return false; 5932 5933 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5934 5935 bool Failed = true; 5936 bool Sat = CntName.endswith("_sat"); 5937 5938 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5939 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5940 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5941 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5942 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5943 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5944 } else { 5945 Error(CntLoc, "invalid counter name " + CntName); 5946 return false; 5947 } 5948 5949 if (Failed) { 5950 Error(ValLoc, "too large value for " + CntName); 5951 return false; 5952 } 5953 5954 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5955 return false; 5956 5957 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5958 if (isToken(AsmToken::EndOfStatement)) { 5959 Error(getLoc(), "expected a counter name"); 5960 return false; 5961 } 5962 } 5963 5964 return true; 5965 } 5966 5967 OperandMatchResultTy 5968 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5969 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5970 int64_t Waitcnt = getWaitcntBitMask(ISA); 5971 SMLoc S = getLoc(); 5972 5973 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5974 while (!isToken(AsmToken::EndOfStatement)) { 5975 if (!parseCnt(Waitcnt)) 5976 return MatchOperand_ParseFail; 5977 } 5978 } else { 5979 if (!parseExpr(Waitcnt)) 5980 return MatchOperand_ParseFail; 5981 } 5982 5983 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5984 return MatchOperand_Success; 5985 } 5986 5987 bool 5988 AMDGPUOperand::isSWaitCnt() const { 5989 return isImm(); 5990 } 5991 5992 //===----------------------------------------------------------------------===// 5993 // hwreg 5994 //===----------------------------------------------------------------------===// 5995 5996 bool 5997 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5998 OperandInfoTy &Offset, 5999 OperandInfoTy &Width) { 6000 using namespace llvm::AMDGPU::Hwreg; 6001 6002 // The register may be specified by name or using a numeric code 6003 HwReg.Loc = getLoc(); 6004 if (isToken(AsmToken::Identifier) && 6005 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6006 HwReg.IsSymbolic = true; 6007 lex(); // skip register name 6008 } else if (!parseExpr(HwReg.Id, "a register name")) { 6009 return false; 6010 } 6011 6012 if (trySkipToken(AsmToken::RParen)) 6013 return true; 6014 6015 // parse optional params 6016 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6017 return false; 6018 6019 Offset.Loc = getLoc(); 6020 if (!parseExpr(Offset.Id)) 6021 return false; 6022 6023 if (!skipToken(AsmToken::Comma, "expected a comma")) 6024 return false; 6025 6026 Width.Loc = getLoc(); 6027 return parseExpr(Width.Id) && 6028 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6029 } 6030 6031 bool 6032 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6033 const OperandInfoTy &Offset, 6034 const OperandInfoTy &Width) { 6035 6036 using namespace llvm::AMDGPU::Hwreg; 6037 6038 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6039 Error(HwReg.Loc, 6040 "specified hardware register is not supported on this GPU"); 6041 return false; 6042 } 6043 if (!isValidHwreg(HwReg.Id)) { 6044 Error(HwReg.Loc, 6045 "invalid code of hardware register: only 6-bit values are legal"); 6046 return false; 6047 } 6048 if (!isValidHwregOffset(Offset.Id)) { 6049 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6050 return false; 6051 } 6052 if (!isValidHwregWidth(Width.Id)) { 6053 Error(Width.Loc, 6054 "invalid bitfield width: only values from 1 to 32 are legal"); 6055 return false; 6056 } 6057 return true; 6058 } 6059 6060 OperandMatchResultTy 6061 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6062 using namespace llvm::AMDGPU::Hwreg; 6063 6064 int64_t ImmVal = 0; 6065 SMLoc Loc = getLoc(); 6066 6067 if (trySkipId("hwreg", AsmToken::LParen)) { 6068 OperandInfoTy HwReg(ID_UNKNOWN_); 6069 OperandInfoTy Offset(OFFSET_DEFAULT_); 6070 OperandInfoTy Width(WIDTH_DEFAULT_); 6071 if (parseHwregBody(HwReg, Offset, Width) && 6072 validateHwreg(HwReg, Offset, Width)) { 6073 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6074 } else { 6075 return MatchOperand_ParseFail; 6076 } 6077 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6078 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6079 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6080 return MatchOperand_ParseFail; 6081 } 6082 } else { 6083 return MatchOperand_ParseFail; 6084 } 6085 6086 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6087 return MatchOperand_Success; 6088 } 6089 6090 bool AMDGPUOperand::isHwreg() const { 6091 return isImmTy(ImmTyHwreg); 6092 } 6093 6094 //===----------------------------------------------------------------------===// 6095 // sendmsg 6096 //===----------------------------------------------------------------------===// 6097 6098 bool 6099 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6100 OperandInfoTy &Op, 6101 OperandInfoTy &Stream) { 6102 using namespace llvm::AMDGPU::SendMsg; 6103 6104 Msg.Loc = getLoc(); 6105 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6106 Msg.IsSymbolic = true; 6107 lex(); // skip message name 6108 } else if (!parseExpr(Msg.Id, "a message name")) { 6109 return false; 6110 } 6111 6112 if (trySkipToken(AsmToken::Comma)) { 6113 Op.IsDefined = true; 6114 Op.Loc = getLoc(); 6115 if (isToken(AsmToken::Identifier) && 6116 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6117 lex(); // skip operation name 6118 } else if (!parseExpr(Op.Id, "an operation name")) { 6119 return false; 6120 } 6121 6122 if (trySkipToken(AsmToken::Comma)) { 6123 Stream.IsDefined = true; 6124 Stream.Loc = getLoc(); 6125 if (!parseExpr(Stream.Id)) 6126 return false; 6127 } 6128 } 6129 6130 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6131 } 6132 6133 bool 6134 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6135 const OperandInfoTy &Op, 6136 const OperandInfoTy &Stream) { 6137 using namespace llvm::AMDGPU::SendMsg; 6138 6139 // Validation strictness depends on whether message is specified 6140 // in a symbolc or in a numeric form. In the latter case 6141 // only encoding possibility is checked. 6142 bool Strict = Msg.IsSymbolic; 6143 6144 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6145 Error(Msg.Loc, "invalid message id"); 6146 return false; 6147 } 6148 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6149 if (Op.IsDefined) { 6150 Error(Op.Loc, "message does not support operations"); 6151 } else { 6152 Error(Msg.Loc, "missing message operation"); 6153 } 6154 return false; 6155 } 6156 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6157 Error(Op.Loc, "invalid operation id"); 6158 return false; 6159 } 6160 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6161 Error(Stream.Loc, "message operation does not support streams"); 6162 return false; 6163 } 6164 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6165 Error(Stream.Loc, "invalid message stream id"); 6166 return false; 6167 } 6168 return true; 6169 } 6170 6171 OperandMatchResultTy 6172 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6173 using namespace llvm::AMDGPU::SendMsg; 6174 6175 int64_t ImmVal = 0; 6176 SMLoc Loc = getLoc(); 6177 6178 if (trySkipId("sendmsg", AsmToken::LParen)) { 6179 OperandInfoTy Msg(ID_UNKNOWN_); 6180 OperandInfoTy Op(OP_NONE_); 6181 OperandInfoTy Stream(STREAM_ID_NONE_); 6182 if (parseSendMsgBody(Msg, Op, Stream) && 6183 validateSendMsg(Msg, Op, Stream)) { 6184 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6185 } else { 6186 return MatchOperand_ParseFail; 6187 } 6188 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6189 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6190 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6191 return MatchOperand_ParseFail; 6192 } 6193 } else { 6194 return MatchOperand_ParseFail; 6195 } 6196 6197 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6198 return MatchOperand_Success; 6199 } 6200 6201 bool AMDGPUOperand::isSendMsg() const { 6202 return isImmTy(ImmTySendMsg); 6203 } 6204 6205 //===----------------------------------------------------------------------===// 6206 // v_interp 6207 //===----------------------------------------------------------------------===// 6208 6209 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6210 StringRef Str; 6211 SMLoc S = getLoc(); 6212 6213 if (!parseId(Str)) 6214 return MatchOperand_NoMatch; 6215 6216 int Slot = StringSwitch<int>(Str) 6217 .Case("p10", 0) 6218 .Case("p20", 1) 6219 .Case("p0", 2) 6220 .Default(-1); 6221 6222 if (Slot == -1) { 6223 Error(S, "invalid interpolation slot"); 6224 return MatchOperand_ParseFail; 6225 } 6226 6227 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6228 AMDGPUOperand::ImmTyInterpSlot)); 6229 return MatchOperand_Success; 6230 } 6231 6232 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6233 StringRef Str; 6234 SMLoc S = getLoc(); 6235 6236 if (!parseId(Str)) 6237 return MatchOperand_NoMatch; 6238 6239 if (!Str.startswith("attr")) { 6240 Error(S, "invalid interpolation attribute"); 6241 return MatchOperand_ParseFail; 6242 } 6243 6244 StringRef Chan = Str.take_back(2); 6245 int AttrChan = StringSwitch<int>(Chan) 6246 .Case(".x", 0) 6247 .Case(".y", 1) 6248 .Case(".z", 2) 6249 .Case(".w", 3) 6250 .Default(-1); 6251 if (AttrChan == -1) { 6252 Error(S, "invalid or missing interpolation attribute channel"); 6253 return MatchOperand_ParseFail; 6254 } 6255 6256 Str = Str.drop_back(2).drop_front(4); 6257 6258 uint8_t Attr; 6259 if (Str.getAsInteger(10, Attr)) { 6260 Error(S, "invalid or missing interpolation attribute number"); 6261 return MatchOperand_ParseFail; 6262 } 6263 6264 if (Attr > 63) { 6265 Error(S, "out of bounds interpolation attribute number"); 6266 return MatchOperand_ParseFail; 6267 } 6268 6269 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6270 6271 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6272 AMDGPUOperand::ImmTyInterpAttr)); 6273 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6274 AMDGPUOperand::ImmTyAttrChan)); 6275 return MatchOperand_Success; 6276 } 6277 6278 //===----------------------------------------------------------------------===// 6279 // exp 6280 //===----------------------------------------------------------------------===// 6281 6282 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6283 using namespace llvm::AMDGPU::Exp; 6284 6285 StringRef Str; 6286 SMLoc S = getLoc(); 6287 6288 if (!parseId(Str)) 6289 return MatchOperand_NoMatch; 6290 6291 unsigned Id = getTgtId(Str); 6292 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6293 Error(S, (Id == ET_INVALID) ? 6294 "invalid exp target" : 6295 "exp target is not supported on this GPU"); 6296 return MatchOperand_ParseFail; 6297 } 6298 6299 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6300 AMDGPUOperand::ImmTyExpTgt)); 6301 return MatchOperand_Success; 6302 } 6303 6304 //===----------------------------------------------------------------------===// 6305 // parser helpers 6306 //===----------------------------------------------------------------------===// 6307 6308 bool 6309 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6310 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6311 } 6312 6313 bool 6314 AMDGPUAsmParser::isId(const StringRef Id) const { 6315 return isId(getToken(), Id); 6316 } 6317 6318 bool 6319 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6320 return getTokenKind() == Kind; 6321 } 6322 6323 bool 6324 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6325 if (isId(Id)) { 6326 lex(); 6327 return true; 6328 } 6329 return false; 6330 } 6331 6332 bool 6333 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6334 if (isToken(AsmToken::Identifier)) { 6335 StringRef Tok = getTokenStr(); 6336 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6337 lex(); 6338 return true; 6339 } 6340 } 6341 return false; 6342 } 6343 6344 bool 6345 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6346 if (isId(Id) && peekToken().is(Kind)) { 6347 lex(); 6348 lex(); 6349 return true; 6350 } 6351 return false; 6352 } 6353 6354 bool 6355 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6356 if (isToken(Kind)) { 6357 lex(); 6358 return true; 6359 } 6360 return false; 6361 } 6362 6363 bool 6364 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6365 const StringRef ErrMsg) { 6366 if (!trySkipToken(Kind)) { 6367 Error(getLoc(), ErrMsg); 6368 return false; 6369 } 6370 return true; 6371 } 6372 6373 bool 6374 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6375 SMLoc S = getLoc(); 6376 6377 const MCExpr *Expr; 6378 if (Parser.parseExpression(Expr)) 6379 return false; 6380 6381 if (Expr->evaluateAsAbsolute(Imm)) 6382 return true; 6383 6384 if (Expected.empty()) { 6385 Error(S, "expected absolute expression"); 6386 } else { 6387 Error(S, Twine("expected ", Expected) + 6388 Twine(" or an absolute expression")); 6389 } 6390 return false; 6391 } 6392 6393 bool 6394 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6395 SMLoc S = getLoc(); 6396 6397 const MCExpr *Expr; 6398 if (Parser.parseExpression(Expr)) 6399 return false; 6400 6401 int64_t IntVal; 6402 if (Expr->evaluateAsAbsolute(IntVal)) { 6403 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6404 } else { 6405 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6406 } 6407 return true; 6408 } 6409 6410 bool 6411 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6412 if (isToken(AsmToken::String)) { 6413 Val = getToken().getStringContents(); 6414 lex(); 6415 return true; 6416 } else { 6417 Error(getLoc(), ErrMsg); 6418 return false; 6419 } 6420 } 6421 6422 bool 6423 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6424 if (isToken(AsmToken::Identifier)) { 6425 Val = getTokenStr(); 6426 lex(); 6427 return true; 6428 } else { 6429 if (!ErrMsg.empty()) 6430 Error(getLoc(), ErrMsg); 6431 return false; 6432 } 6433 } 6434 6435 AsmToken 6436 AMDGPUAsmParser::getToken() const { 6437 return Parser.getTok(); 6438 } 6439 6440 AsmToken 6441 AMDGPUAsmParser::peekToken() { 6442 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6443 } 6444 6445 void 6446 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6447 auto TokCount = getLexer().peekTokens(Tokens); 6448 6449 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6450 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6451 } 6452 6453 AsmToken::TokenKind 6454 AMDGPUAsmParser::getTokenKind() const { 6455 return getLexer().getKind(); 6456 } 6457 6458 SMLoc 6459 AMDGPUAsmParser::getLoc() const { 6460 return getToken().getLoc(); 6461 } 6462 6463 StringRef 6464 AMDGPUAsmParser::getTokenStr() const { 6465 return getToken().getString(); 6466 } 6467 6468 void 6469 AMDGPUAsmParser::lex() { 6470 Parser.Lex(); 6471 } 6472 6473 SMLoc 6474 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6475 const OperandVector &Operands) const { 6476 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6477 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6478 if (Test(Op)) 6479 return Op.getStartLoc(); 6480 } 6481 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6482 } 6483 6484 SMLoc 6485 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6486 const OperandVector &Operands) const { 6487 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6488 return getOperandLoc(Test, Operands); 6489 } 6490 6491 SMLoc 6492 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6493 const OperandVector &Operands) const { 6494 auto Test = [=](const AMDGPUOperand& Op) { 6495 return Op.isRegKind() && Op.getReg() == Reg; 6496 }; 6497 return getOperandLoc(Test, Operands); 6498 } 6499 6500 SMLoc 6501 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6502 auto Test = [](const AMDGPUOperand& Op) { 6503 return Op.IsImmKindLiteral() || Op.isExpr(); 6504 }; 6505 return getOperandLoc(Test, Operands); 6506 } 6507 6508 SMLoc 6509 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6510 auto Test = [](const AMDGPUOperand& Op) { 6511 return Op.isImmKindConst(); 6512 }; 6513 return getOperandLoc(Test, Operands); 6514 } 6515 6516 //===----------------------------------------------------------------------===// 6517 // swizzle 6518 //===----------------------------------------------------------------------===// 6519 6520 LLVM_READNONE 6521 static unsigned 6522 encodeBitmaskPerm(const unsigned AndMask, 6523 const unsigned OrMask, 6524 const unsigned XorMask) { 6525 using namespace llvm::AMDGPU::Swizzle; 6526 6527 return BITMASK_PERM_ENC | 6528 (AndMask << BITMASK_AND_SHIFT) | 6529 (OrMask << BITMASK_OR_SHIFT) | 6530 (XorMask << BITMASK_XOR_SHIFT); 6531 } 6532 6533 bool 6534 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6535 const unsigned MinVal, 6536 const unsigned MaxVal, 6537 const StringRef ErrMsg, 6538 SMLoc &Loc) { 6539 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6540 return false; 6541 } 6542 Loc = getLoc(); 6543 if (!parseExpr(Op)) { 6544 return false; 6545 } 6546 if (Op < MinVal || Op > MaxVal) { 6547 Error(Loc, ErrMsg); 6548 return false; 6549 } 6550 6551 return true; 6552 } 6553 6554 bool 6555 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6556 const unsigned MinVal, 6557 const unsigned MaxVal, 6558 const StringRef ErrMsg) { 6559 SMLoc Loc; 6560 for (unsigned i = 0; i < OpNum; ++i) { 6561 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6562 return false; 6563 } 6564 6565 return true; 6566 } 6567 6568 bool 6569 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6570 using namespace llvm::AMDGPU::Swizzle; 6571 6572 int64_t Lane[LANE_NUM]; 6573 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6574 "expected a 2-bit lane id")) { 6575 Imm = QUAD_PERM_ENC; 6576 for (unsigned I = 0; I < LANE_NUM; ++I) { 6577 Imm |= Lane[I] << (LANE_SHIFT * I); 6578 } 6579 return true; 6580 } 6581 return false; 6582 } 6583 6584 bool 6585 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6586 using namespace llvm::AMDGPU::Swizzle; 6587 6588 SMLoc Loc; 6589 int64_t GroupSize; 6590 int64_t LaneIdx; 6591 6592 if (!parseSwizzleOperand(GroupSize, 6593 2, 32, 6594 "group size must be in the interval [2,32]", 6595 Loc)) { 6596 return false; 6597 } 6598 if (!isPowerOf2_64(GroupSize)) { 6599 Error(Loc, "group size must be a power of two"); 6600 return false; 6601 } 6602 if (parseSwizzleOperand(LaneIdx, 6603 0, GroupSize - 1, 6604 "lane id must be in the interval [0,group size - 1]", 6605 Loc)) { 6606 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6607 return true; 6608 } 6609 return false; 6610 } 6611 6612 bool 6613 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6614 using namespace llvm::AMDGPU::Swizzle; 6615 6616 SMLoc Loc; 6617 int64_t GroupSize; 6618 6619 if (!parseSwizzleOperand(GroupSize, 6620 2, 32, 6621 "group size must be in the interval [2,32]", 6622 Loc)) { 6623 return false; 6624 } 6625 if (!isPowerOf2_64(GroupSize)) { 6626 Error(Loc, "group size must be a power of two"); 6627 return false; 6628 } 6629 6630 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6631 return true; 6632 } 6633 6634 bool 6635 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6636 using namespace llvm::AMDGPU::Swizzle; 6637 6638 SMLoc Loc; 6639 int64_t GroupSize; 6640 6641 if (!parseSwizzleOperand(GroupSize, 6642 1, 16, 6643 "group size must be in the interval [1,16]", 6644 Loc)) { 6645 return false; 6646 } 6647 if (!isPowerOf2_64(GroupSize)) { 6648 Error(Loc, "group size must be a power of two"); 6649 return false; 6650 } 6651 6652 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6653 return true; 6654 } 6655 6656 bool 6657 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6658 using namespace llvm::AMDGPU::Swizzle; 6659 6660 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6661 return false; 6662 } 6663 6664 StringRef Ctl; 6665 SMLoc StrLoc = getLoc(); 6666 if (!parseString(Ctl)) { 6667 return false; 6668 } 6669 if (Ctl.size() != BITMASK_WIDTH) { 6670 Error(StrLoc, "expected a 5-character mask"); 6671 return false; 6672 } 6673 6674 unsigned AndMask = 0; 6675 unsigned OrMask = 0; 6676 unsigned XorMask = 0; 6677 6678 for (size_t i = 0; i < Ctl.size(); ++i) { 6679 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6680 switch(Ctl[i]) { 6681 default: 6682 Error(StrLoc, "invalid mask"); 6683 return false; 6684 case '0': 6685 break; 6686 case '1': 6687 OrMask |= Mask; 6688 break; 6689 case 'p': 6690 AndMask |= Mask; 6691 break; 6692 case 'i': 6693 AndMask |= Mask; 6694 XorMask |= Mask; 6695 break; 6696 } 6697 } 6698 6699 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6700 return true; 6701 } 6702 6703 bool 6704 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6705 6706 SMLoc OffsetLoc = getLoc(); 6707 6708 if (!parseExpr(Imm, "a swizzle macro")) { 6709 return false; 6710 } 6711 if (!isUInt<16>(Imm)) { 6712 Error(OffsetLoc, "expected a 16-bit offset"); 6713 return false; 6714 } 6715 return true; 6716 } 6717 6718 bool 6719 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6720 using namespace llvm::AMDGPU::Swizzle; 6721 6722 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6723 6724 SMLoc ModeLoc = getLoc(); 6725 bool Ok = false; 6726 6727 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6728 Ok = parseSwizzleQuadPerm(Imm); 6729 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6730 Ok = parseSwizzleBitmaskPerm(Imm); 6731 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6732 Ok = parseSwizzleBroadcast(Imm); 6733 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6734 Ok = parseSwizzleSwap(Imm); 6735 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6736 Ok = parseSwizzleReverse(Imm); 6737 } else { 6738 Error(ModeLoc, "expected a swizzle mode"); 6739 } 6740 6741 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6742 } 6743 6744 return false; 6745 } 6746 6747 OperandMatchResultTy 6748 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6749 SMLoc S = getLoc(); 6750 int64_t Imm = 0; 6751 6752 if (trySkipId("offset")) { 6753 6754 bool Ok = false; 6755 if (skipToken(AsmToken::Colon, "expected a colon")) { 6756 if (trySkipId("swizzle")) { 6757 Ok = parseSwizzleMacro(Imm); 6758 } else { 6759 Ok = parseSwizzleOffset(Imm); 6760 } 6761 } 6762 6763 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6764 6765 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6766 } else { 6767 // Swizzle "offset" operand is optional. 6768 // If it is omitted, try parsing other optional operands. 6769 return parseOptionalOpr(Operands); 6770 } 6771 } 6772 6773 bool 6774 AMDGPUOperand::isSwizzle() const { 6775 return isImmTy(ImmTySwizzle); 6776 } 6777 6778 //===----------------------------------------------------------------------===// 6779 // VGPR Index Mode 6780 //===----------------------------------------------------------------------===// 6781 6782 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6783 6784 using namespace llvm::AMDGPU::VGPRIndexMode; 6785 6786 if (trySkipToken(AsmToken::RParen)) { 6787 return OFF; 6788 } 6789 6790 int64_t Imm = 0; 6791 6792 while (true) { 6793 unsigned Mode = 0; 6794 SMLoc S = getLoc(); 6795 6796 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6797 if (trySkipId(IdSymbolic[ModeId])) { 6798 Mode = 1 << ModeId; 6799 break; 6800 } 6801 } 6802 6803 if (Mode == 0) { 6804 Error(S, (Imm == 0)? 6805 "expected a VGPR index mode or a closing parenthesis" : 6806 "expected a VGPR index mode"); 6807 return UNDEF; 6808 } 6809 6810 if (Imm & Mode) { 6811 Error(S, "duplicate VGPR index mode"); 6812 return UNDEF; 6813 } 6814 Imm |= Mode; 6815 6816 if (trySkipToken(AsmToken::RParen)) 6817 break; 6818 if (!skipToken(AsmToken::Comma, 6819 "expected a comma or a closing parenthesis")) 6820 return UNDEF; 6821 } 6822 6823 return Imm; 6824 } 6825 6826 OperandMatchResultTy 6827 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6828 6829 using namespace llvm::AMDGPU::VGPRIndexMode; 6830 6831 int64_t Imm = 0; 6832 SMLoc S = getLoc(); 6833 6834 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6835 Imm = parseGPRIdxMacro(); 6836 if (Imm == UNDEF) 6837 return MatchOperand_ParseFail; 6838 } else { 6839 if (getParser().parseAbsoluteExpression(Imm)) 6840 return MatchOperand_ParseFail; 6841 if (Imm < 0 || !isUInt<4>(Imm)) { 6842 Error(S, "invalid immediate: only 4-bit values are legal"); 6843 return MatchOperand_ParseFail; 6844 } 6845 } 6846 6847 Operands.push_back( 6848 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6849 return MatchOperand_Success; 6850 } 6851 6852 bool AMDGPUOperand::isGPRIdxMode() const { 6853 return isImmTy(ImmTyGprIdxMode); 6854 } 6855 6856 //===----------------------------------------------------------------------===// 6857 // sopp branch targets 6858 //===----------------------------------------------------------------------===// 6859 6860 OperandMatchResultTy 6861 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6862 6863 // Make sure we are not parsing something 6864 // that looks like a label or an expression but is not. 6865 // This will improve error messages. 6866 if (isRegister() || isModifier()) 6867 return MatchOperand_NoMatch; 6868 6869 if (!parseExpr(Operands)) 6870 return MatchOperand_ParseFail; 6871 6872 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6873 assert(Opr.isImm() || Opr.isExpr()); 6874 SMLoc Loc = Opr.getStartLoc(); 6875 6876 // Currently we do not support arbitrary expressions as branch targets. 6877 // Only labels and absolute expressions are accepted. 6878 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6879 Error(Loc, "expected an absolute expression or a label"); 6880 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6881 Error(Loc, "expected a 16-bit signed jump offset"); 6882 } 6883 6884 return MatchOperand_Success; 6885 } 6886 6887 //===----------------------------------------------------------------------===// 6888 // Boolean holding registers 6889 //===----------------------------------------------------------------------===// 6890 6891 OperandMatchResultTy 6892 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6893 return parseReg(Operands); 6894 } 6895 6896 //===----------------------------------------------------------------------===// 6897 // mubuf 6898 //===----------------------------------------------------------------------===// 6899 6900 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6901 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6902 } 6903 6904 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6905 const OperandVector &Operands, 6906 bool IsAtomic, 6907 bool IsLds) { 6908 bool IsLdsOpcode = IsLds; 6909 bool HasLdsModifier = false; 6910 OptionalImmIndexMap OptionalIdx; 6911 unsigned FirstOperandIdx = 1; 6912 bool IsAtomicReturn = false; 6913 6914 if (IsAtomic) { 6915 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6916 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6917 if (!Op.isCPol()) 6918 continue; 6919 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6920 break; 6921 } 6922 6923 if (!IsAtomicReturn) { 6924 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6925 if (NewOpc != -1) 6926 Inst.setOpcode(NewOpc); 6927 } 6928 6929 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6930 SIInstrFlags::IsAtomicRet; 6931 } 6932 6933 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6934 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6935 6936 // Add the register arguments 6937 if (Op.isReg()) { 6938 Op.addRegOperands(Inst, 1); 6939 // Insert a tied src for atomic return dst. 6940 // This cannot be postponed as subsequent calls to 6941 // addImmOperands rely on correct number of MC operands. 6942 if (IsAtomicReturn && i == FirstOperandIdx) 6943 Op.addRegOperands(Inst, 1); 6944 continue; 6945 } 6946 6947 // Handle the case where soffset is an immediate 6948 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6949 Op.addImmOperands(Inst, 1); 6950 continue; 6951 } 6952 6953 HasLdsModifier |= Op.isLDS(); 6954 6955 // Handle tokens like 'offen' which are sometimes hard-coded into the 6956 // asm string. There are no MCInst operands for these. 6957 if (Op.isToken()) { 6958 continue; 6959 } 6960 assert(Op.isImm()); 6961 6962 // Handle optional arguments 6963 OptionalIdx[Op.getImmTy()] = i; 6964 } 6965 6966 // This is a workaround for an llvm quirk which may result in an 6967 // incorrect instruction selection. Lds and non-lds versions of 6968 // MUBUF instructions are identical except that lds versions 6969 // have mandatory 'lds' modifier. However this modifier follows 6970 // optional modifiers and llvm asm matcher regards this 'lds' 6971 // modifier as an optional one. As a result, an lds version 6972 // of opcode may be selected even if it has no 'lds' modifier. 6973 if (IsLdsOpcode && !HasLdsModifier) { 6974 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6975 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6976 Inst.setOpcode(NoLdsOpcode); 6977 IsLdsOpcode = false; 6978 } 6979 } 6980 6981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6983 6984 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6985 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6986 } 6987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6988 } 6989 6990 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6991 OptionalImmIndexMap OptionalIdx; 6992 6993 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6994 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6995 6996 // Add the register arguments 6997 if (Op.isReg()) { 6998 Op.addRegOperands(Inst, 1); 6999 continue; 7000 } 7001 7002 // Handle the case where soffset is an immediate 7003 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7004 Op.addImmOperands(Inst, 1); 7005 continue; 7006 } 7007 7008 // Handle tokens like 'offen' which are sometimes hard-coded into the 7009 // asm string. There are no MCInst operands for these. 7010 if (Op.isToken()) { 7011 continue; 7012 } 7013 assert(Op.isImm()); 7014 7015 // Handle optional arguments 7016 OptionalIdx[Op.getImmTy()] = i; 7017 } 7018 7019 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7020 AMDGPUOperand::ImmTyOffset); 7021 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7022 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7023 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7024 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7025 } 7026 7027 //===----------------------------------------------------------------------===// 7028 // mimg 7029 //===----------------------------------------------------------------------===// 7030 7031 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7032 bool IsAtomic) { 7033 unsigned I = 1; 7034 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7035 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7036 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7037 } 7038 7039 if (IsAtomic) { 7040 // Add src, same as dst 7041 assert(Desc.getNumDefs() == 1); 7042 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7043 } 7044 7045 OptionalImmIndexMap OptionalIdx; 7046 7047 for (unsigned E = Operands.size(); I != E; ++I) { 7048 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7049 7050 // Add the register arguments 7051 if (Op.isReg()) { 7052 Op.addRegOperands(Inst, 1); 7053 } else if (Op.isImmModifier()) { 7054 OptionalIdx[Op.getImmTy()] = I; 7055 } else if (!Op.isToken()) { 7056 llvm_unreachable("unexpected operand type"); 7057 } 7058 } 7059 7060 bool IsGFX10Plus = isGFX10Plus(); 7061 7062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7063 if (IsGFX10Plus) 7064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7068 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7070 if (IsGFX10Plus) 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7073 if (!IsGFX10Plus) 7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7076 } 7077 7078 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7079 cvtMIMG(Inst, Operands, true); 7080 } 7081 7082 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7083 OptionalImmIndexMap OptionalIdx; 7084 bool IsAtomicReturn = false; 7085 7086 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7088 if (!Op.isCPol()) 7089 continue; 7090 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7091 break; 7092 } 7093 7094 if (!IsAtomicReturn) { 7095 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7096 if (NewOpc != -1) 7097 Inst.setOpcode(NewOpc); 7098 } 7099 7100 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7101 SIInstrFlags::IsAtomicRet; 7102 7103 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7104 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7105 7106 // Add the register arguments 7107 if (Op.isReg()) { 7108 Op.addRegOperands(Inst, 1); 7109 if (IsAtomicReturn && i == 1) 7110 Op.addRegOperands(Inst, 1); 7111 continue; 7112 } 7113 7114 // Handle the case where soffset is an immediate 7115 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7116 Op.addImmOperands(Inst, 1); 7117 continue; 7118 } 7119 7120 // Handle tokens like 'offen' which are sometimes hard-coded into the 7121 // asm string. There are no MCInst operands for these. 7122 if (Op.isToken()) { 7123 continue; 7124 } 7125 assert(Op.isImm()); 7126 7127 // Handle optional arguments 7128 OptionalIdx[Op.getImmTy()] = i; 7129 } 7130 7131 if ((int)Inst.getNumOperands() <= 7132 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7135 } 7136 7137 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7138 const OperandVector &Operands) { 7139 for (unsigned I = 1; I < Operands.size(); ++I) { 7140 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7141 if (Operand.isReg()) 7142 Operand.addRegOperands(Inst, 1); 7143 } 7144 7145 Inst.addOperand(MCOperand::createImm(1)); // a16 7146 } 7147 7148 //===----------------------------------------------------------------------===// 7149 // smrd 7150 //===----------------------------------------------------------------------===// 7151 7152 bool AMDGPUOperand::isSMRDOffset8() const { 7153 return isImm() && isUInt<8>(getImm()); 7154 } 7155 7156 bool AMDGPUOperand::isSMEMOffset() const { 7157 return isImm(); // Offset range is checked later by validator. 7158 } 7159 7160 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7161 // 32-bit literals are only supported on CI and we only want to use them 7162 // when the offset is > 8-bits. 7163 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7164 } 7165 7166 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7167 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7168 } 7169 7170 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7171 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7172 } 7173 7174 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7175 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7176 } 7177 7178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7179 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7180 } 7181 7182 //===----------------------------------------------------------------------===// 7183 // vop3 7184 //===----------------------------------------------------------------------===// 7185 7186 static bool ConvertOmodMul(int64_t &Mul) { 7187 if (Mul != 1 && Mul != 2 && Mul != 4) 7188 return false; 7189 7190 Mul >>= 1; 7191 return true; 7192 } 7193 7194 static bool ConvertOmodDiv(int64_t &Div) { 7195 if (Div == 1) { 7196 Div = 0; 7197 return true; 7198 } 7199 7200 if (Div == 2) { 7201 Div = 3; 7202 return true; 7203 } 7204 7205 return false; 7206 } 7207 7208 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7209 // This is intentional and ensures compatibility with sp3. 7210 // See bug 35397 for details. 7211 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7212 if (BoundCtrl == 0 || BoundCtrl == 1) { 7213 BoundCtrl = 1; 7214 return true; 7215 } 7216 return false; 7217 } 7218 7219 // Note: the order in this table matches the order of operands in AsmString. 7220 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7221 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7222 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7223 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7224 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7225 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7226 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7227 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7228 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7229 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7230 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7231 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7232 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7233 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7234 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7235 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7236 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7237 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7238 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7239 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7240 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7241 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7242 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7243 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7244 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7245 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7246 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7247 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7248 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7249 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7250 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7251 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7252 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7253 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7254 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7255 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7256 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7257 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7258 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7259 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7260 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7261 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7262 }; 7263 7264 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7265 7266 OperandMatchResultTy res = parseOptionalOpr(Operands); 7267 7268 // This is a hack to enable hardcoded mandatory operands which follow 7269 // optional operands. 7270 // 7271 // Current design assumes that all operands after the first optional operand 7272 // are also optional. However implementation of some instructions violates 7273 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7274 // 7275 // To alleviate this problem, we have to (implicitly) parse extra operands 7276 // to make sure autogenerated parser of custom operands never hit hardcoded 7277 // mandatory operands. 7278 7279 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7280 if (res != MatchOperand_Success || 7281 isToken(AsmToken::EndOfStatement)) 7282 break; 7283 7284 trySkipToken(AsmToken::Comma); 7285 res = parseOptionalOpr(Operands); 7286 } 7287 7288 return res; 7289 } 7290 7291 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7292 OperandMatchResultTy res; 7293 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7294 // try to parse any optional operand here 7295 if (Op.IsBit) { 7296 res = parseNamedBit(Op.Name, Operands, Op.Type); 7297 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7298 res = parseOModOperand(Operands); 7299 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7300 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7301 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7302 res = parseSDWASel(Operands, Op.Name, Op.Type); 7303 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7304 res = parseSDWADstUnused(Operands); 7305 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7306 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7307 Op.Type == AMDGPUOperand::ImmTyNegLo || 7308 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7309 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7310 Op.ConvertResult); 7311 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7312 res = parseDim(Operands); 7313 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7314 res = parseCPol(Operands); 7315 } else { 7316 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7317 } 7318 if (res != MatchOperand_NoMatch) { 7319 return res; 7320 } 7321 } 7322 return MatchOperand_NoMatch; 7323 } 7324 7325 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7326 StringRef Name = getTokenStr(); 7327 if (Name == "mul") { 7328 return parseIntWithPrefix("mul", Operands, 7329 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7330 } 7331 7332 if (Name == "div") { 7333 return parseIntWithPrefix("div", Operands, 7334 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7335 } 7336 7337 return MatchOperand_NoMatch; 7338 } 7339 7340 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7341 cvtVOP3P(Inst, Operands); 7342 7343 int Opc = Inst.getOpcode(); 7344 7345 int SrcNum; 7346 const int Ops[] = { AMDGPU::OpName::src0, 7347 AMDGPU::OpName::src1, 7348 AMDGPU::OpName::src2 }; 7349 for (SrcNum = 0; 7350 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7351 ++SrcNum); 7352 assert(SrcNum > 0); 7353 7354 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7355 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7356 7357 if ((OpSel & (1 << SrcNum)) != 0) { 7358 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7359 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7360 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7361 } 7362 } 7363 7364 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7365 // 1. This operand is input modifiers 7366 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7367 // 2. This is not last operand 7368 && Desc.NumOperands > (OpNum + 1) 7369 // 3. Next operand is register class 7370 && Desc.OpInfo[OpNum + 1].RegClass != -1 7371 // 4. Next register is not tied to any other operand 7372 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7373 } 7374 7375 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7376 { 7377 OptionalImmIndexMap OptionalIdx; 7378 unsigned Opc = Inst.getOpcode(); 7379 7380 unsigned I = 1; 7381 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7382 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7383 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7384 } 7385 7386 for (unsigned E = Operands.size(); I != E; ++I) { 7387 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7388 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7389 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7390 } else if (Op.isInterpSlot() || 7391 Op.isInterpAttr() || 7392 Op.isAttrChan()) { 7393 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7394 } else if (Op.isImmModifier()) { 7395 OptionalIdx[Op.getImmTy()] = I; 7396 } else { 7397 llvm_unreachable("unhandled operand type"); 7398 } 7399 } 7400 7401 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7402 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7403 } 7404 7405 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7406 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7407 } 7408 7409 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7411 } 7412 } 7413 7414 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7415 OptionalImmIndexMap &OptionalIdx) { 7416 unsigned Opc = Inst.getOpcode(); 7417 7418 unsigned I = 1; 7419 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7420 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7421 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7422 } 7423 7424 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7425 // This instruction has src modifiers 7426 for (unsigned E = Operands.size(); I != E; ++I) { 7427 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7428 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7429 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7430 } else if (Op.isImmModifier()) { 7431 OptionalIdx[Op.getImmTy()] = I; 7432 } else if (Op.isRegOrImm()) { 7433 Op.addRegOrImmOperands(Inst, 1); 7434 } else { 7435 llvm_unreachable("unhandled operand type"); 7436 } 7437 } 7438 } else { 7439 // No src modifiers 7440 for (unsigned E = Operands.size(); I != E; ++I) { 7441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7442 if (Op.isMod()) { 7443 OptionalIdx[Op.getImmTy()] = I; 7444 } else { 7445 Op.addRegOrImmOperands(Inst, 1); 7446 } 7447 } 7448 } 7449 7450 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7451 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7452 } 7453 7454 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7456 } 7457 7458 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7459 // it has src2 register operand that is tied to dst operand 7460 // we don't allow modifiers for this operand in assembler so src2_modifiers 7461 // should be 0. 7462 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7463 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7464 Opc == AMDGPU::V_MAC_F32_e64_vi || 7465 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7466 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7467 Opc == AMDGPU::V_MAC_F16_e64_vi || 7468 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7469 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7470 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7471 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7472 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7473 auto it = Inst.begin(); 7474 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7475 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7476 ++it; 7477 // Copy the operand to ensure it's not invalidated when Inst grows. 7478 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7479 } 7480 } 7481 7482 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7483 OptionalImmIndexMap OptionalIdx; 7484 cvtVOP3(Inst, Operands, OptionalIdx); 7485 } 7486 7487 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7488 const OperandVector &Operands) { 7489 OptionalImmIndexMap OptIdx; 7490 const int Opc = Inst.getOpcode(); 7491 const MCInstrDesc &Desc = MII.get(Opc); 7492 7493 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7494 7495 cvtVOP3(Inst, Operands, OptIdx); 7496 7497 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7498 assert(!IsPacked); 7499 Inst.addOperand(Inst.getOperand(0)); 7500 } 7501 7502 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7503 // instruction, and then figure out where to actually put the modifiers 7504 7505 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7506 7507 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7508 if (OpSelHiIdx != -1) { 7509 int DefaultVal = IsPacked ? -1 : 0; 7510 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7511 DefaultVal); 7512 } 7513 7514 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7515 if (NegLoIdx != -1) { 7516 assert(IsPacked); 7517 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7518 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7519 } 7520 7521 const int Ops[] = { AMDGPU::OpName::src0, 7522 AMDGPU::OpName::src1, 7523 AMDGPU::OpName::src2 }; 7524 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7525 AMDGPU::OpName::src1_modifiers, 7526 AMDGPU::OpName::src2_modifiers }; 7527 7528 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7529 7530 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7531 unsigned OpSelHi = 0; 7532 unsigned NegLo = 0; 7533 unsigned NegHi = 0; 7534 7535 if (OpSelHiIdx != -1) { 7536 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7537 } 7538 7539 if (NegLoIdx != -1) { 7540 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7541 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7542 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7543 } 7544 7545 for (int J = 0; J < 3; ++J) { 7546 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7547 if (OpIdx == -1) 7548 break; 7549 7550 uint32_t ModVal = 0; 7551 7552 if ((OpSel & (1 << J)) != 0) 7553 ModVal |= SISrcMods::OP_SEL_0; 7554 7555 if ((OpSelHi & (1 << J)) != 0) 7556 ModVal |= SISrcMods::OP_SEL_1; 7557 7558 if ((NegLo & (1 << J)) != 0) 7559 ModVal |= SISrcMods::NEG; 7560 7561 if ((NegHi & (1 << J)) != 0) 7562 ModVal |= SISrcMods::NEG_HI; 7563 7564 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7565 7566 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7567 } 7568 } 7569 7570 //===----------------------------------------------------------------------===// 7571 // dpp 7572 //===----------------------------------------------------------------------===// 7573 7574 bool AMDGPUOperand::isDPP8() const { 7575 return isImmTy(ImmTyDPP8); 7576 } 7577 7578 bool AMDGPUOperand::isDPPCtrl() const { 7579 using namespace AMDGPU::DPP; 7580 7581 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7582 if (result) { 7583 int64_t Imm = getImm(); 7584 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7585 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7586 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7587 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7588 (Imm == DppCtrl::WAVE_SHL1) || 7589 (Imm == DppCtrl::WAVE_ROL1) || 7590 (Imm == DppCtrl::WAVE_SHR1) || 7591 (Imm == DppCtrl::WAVE_ROR1) || 7592 (Imm == DppCtrl::ROW_MIRROR) || 7593 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7594 (Imm == DppCtrl::BCAST15) || 7595 (Imm == DppCtrl::BCAST31) || 7596 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7597 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7598 } 7599 return false; 7600 } 7601 7602 //===----------------------------------------------------------------------===// 7603 // mAI 7604 //===----------------------------------------------------------------------===// 7605 7606 bool AMDGPUOperand::isBLGP() const { 7607 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7608 } 7609 7610 bool AMDGPUOperand::isCBSZ() const { 7611 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7612 } 7613 7614 bool AMDGPUOperand::isABID() const { 7615 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7616 } 7617 7618 bool AMDGPUOperand::isS16Imm() const { 7619 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7620 } 7621 7622 bool AMDGPUOperand::isU16Imm() const { 7623 return isImm() && isUInt<16>(getImm()); 7624 } 7625 7626 //===----------------------------------------------------------------------===// 7627 // dim 7628 //===----------------------------------------------------------------------===// 7629 7630 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7631 // We want to allow "dim:1D" etc., 7632 // but the initial 1 is tokenized as an integer. 7633 std::string Token; 7634 if (isToken(AsmToken::Integer)) { 7635 SMLoc Loc = getToken().getEndLoc(); 7636 Token = std::string(getTokenStr()); 7637 lex(); 7638 if (getLoc() != Loc) 7639 return false; 7640 } 7641 7642 StringRef Suffix; 7643 if (!parseId(Suffix)) 7644 return false; 7645 Token += Suffix; 7646 7647 StringRef DimId = Token; 7648 if (DimId.startswith("SQ_RSRC_IMG_")) 7649 DimId = DimId.drop_front(12); 7650 7651 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7652 if (!DimInfo) 7653 return false; 7654 7655 Encoding = DimInfo->Encoding; 7656 return true; 7657 } 7658 7659 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7660 if (!isGFX10Plus()) 7661 return MatchOperand_NoMatch; 7662 7663 SMLoc S = getLoc(); 7664 7665 if (!trySkipId("dim", AsmToken::Colon)) 7666 return MatchOperand_NoMatch; 7667 7668 unsigned Encoding; 7669 SMLoc Loc = getLoc(); 7670 if (!parseDimId(Encoding)) { 7671 Error(Loc, "invalid dim value"); 7672 return MatchOperand_ParseFail; 7673 } 7674 7675 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7676 AMDGPUOperand::ImmTyDim)); 7677 return MatchOperand_Success; 7678 } 7679 7680 //===----------------------------------------------------------------------===// 7681 // dpp 7682 //===----------------------------------------------------------------------===// 7683 7684 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7685 SMLoc S = getLoc(); 7686 7687 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7688 return MatchOperand_NoMatch; 7689 7690 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7691 7692 int64_t Sels[8]; 7693 7694 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7695 return MatchOperand_ParseFail; 7696 7697 for (size_t i = 0; i < 8; ++i) { 7698 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7699 return MatchOperand_ParseFail; 7700 7701 SMLoc Loc = getLoc(); 7702 if (getParser().parseAbsoluteExpression(Sels[i])) 7703 return MatchOperand_ParseFail; 7704 if (0 > Sels[i] || 7 < Sels[i]) { 7705 Error(Loc, "expected a 3-bit value"); 7706 return MatchOperand_ParseFail; 7707 } 7708 } 7709 7710 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7711 return MatchOperand_ParseFail; 7712 7713 unsigned DPP8 = 0; 7714 for (size_t i = 0; i < 8; ++i) 7715 DPP8 |= (Sels[i] << (i * 3)); 7716 7717 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7718 return MatchOperand_Success; 7719 } 7720 7721 bool 7722 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7723 const OperandVector &Operands) { 7724 if (Ctrl == "row_newbcast") 7725 return isGFX90A(); 7726 7727 // DPP64 is supported for row_newbcast only. 7728 const MCRegisterInfo *MRI = getMRI(); 7729 if (Operands.size() > 2 && Operands[1]->isReg() && 7730 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7731 return false; 7732 7733 if (Ctrl == "row_share" || 7734 Ctrl == "row_xmask") 7735 return isGFX10Plus(); 7736 7737 if (Ctrl == "wave_shl" || 7738 Ctrl == "wave_shr" || 7739 Ctrl == "wave_rol" || 7740 Ctrl == "wave_ror" || 7741 Ctrl == "row_bcast") 7742 return isVI() || isGFX9(); 7743 7744 return Ctrl == "row_mirror" || 7745 Ctrl == "row_half_mirror" || 7746 Ctrl == "quad_perm" || 7747 Ctrl == "row_shl" || 7748 Ctrl == "row_shr" || 7749 Ctrl == "row_ror"; 7750 } 7751 7752 int64_t 7753 AMDGPUAsmParser::parseDPPCtrlPerm() { 7754 // quad_perm:[%d,%d,%d,%d] 7755 7756 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7757 return -1; 7758 7759 int64_t Val = 0; 7760 for (int i = 0; i < 4; ++i) { 7761 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7762 return -1; 7763 7764 int64_t Temp; 7765 SMLoc Loc = getLoc(); 7766 if (getParser().parseAbsoluteExpression(Temp)) 7767 return -1; 7768 if (Temp < 0 || Temp > 3) { 7769 Error(Loc, "expected a 2-bit value"); 7770 return -1; 7771 } 7772 7773 Val += (Temp << i * 2); 7774 } 7775 7776 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7777 return -1; 7778 7779 return Val; 7780 } 7781 7782 int64_t 7783 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7784 using namespace AMDGPU::DPP; 7785 7786 // sel:%d 7787 7788 int64_t Val; 7789 SMLoc Loc = getLoc(); 7790 7791 if (getParser().parseAbsoluteExpression(Val)) 7792 return -1; 7793 7794 struct DppCtrlCheck { 7795 int64_t Ctrl; 7796 int Lo; 7797 int Hi; 7798 }; 7799 7800 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7801 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7802 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7803 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7804 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7805 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7806 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7807 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7808 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7809 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7810 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7811 .Default({-1, 0, 0}); 7812 7813 bool Valid; 7814 if (Check.Ctrl == -1) { 7815 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7816 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7817 } else { 7818 Valid = Check.Lo <= Val && Val <= Check.Hi; 7819 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7820 } 7821 7822 if (!Valid) { 7823 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7824 return -1; 7825 } 7826 7827 return Val; 7828 } 7829 7830 OperandMatchResultTy 7831 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7832 using namespace AMDGPU::DPP; 7833 7834 if (!isToken(AsmToken::Identifier) || 7835 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7836 return MatchOperand_NoMatch; 7837 7838 SMLoc S = getLoc(); 7839 int64_t Val = -1; 7840 StringRef Ctrl; 7841 7842 parseId(Ctrl); 7843 7844 if (Ctrl == "row_mirror") { 7845 Val = DppCtrl::ROW_MIRROR; 7846 } else if (Ctrl == "row_half_mirror") { 7847 Val = DppCtrl::ROW_HALF_MIRROR; 7848 } else { 7849 if (skipToken(AsmToken::Colon, "expected a colon")) { 7850 if (Ctrl == "quad_perm") { 7851 Val = parseDPPCtrlPerm(); 7852 } else { 7853 Val = parseDPPCtrlSel(Ctrl); 7854 } 7855 } 7856 } 7857 7858 if (Val == -1) 7859 return MatchOperand_ParseFail; 7860 7861 Operands.push_back( 7862 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7863 return MatchOperand_Success; 7864 } 7865 7866 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7867 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7868 } 7869 7870 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7871 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7872 } 7873 7874 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7875 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7876 } 7877 7878 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7879 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7880 } 7881 7882 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7883 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7884 } 7885 7886 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7887 OptionalImmIndexMap OptionalIdx; 7888 7889 unsigned I = 1; 7890 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7891 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7892 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7893 } 7894 7895 int Fi = 0; 7896 for (unsigned E = Operands.size(); I != E; ++I) { 7897 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7898 MCOI::TIED_TO); 7899 if (TiedTo != -1) { 7900 assert((unsigned)TiedTo < Inst.getNumOperands()); 7901 // handle tied old or src2 for MAC instructions 7902 Inst.addOperand(Inst.getOperand(TiedTo)); 7903 } 7904 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7905 // Add the register arguments 7906 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7907 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7908 // Skip it. 7909 continue; 7910 } 7911 7912 if (IsDPP8) { 7913 if (Op.isDPP8()) { 7914 Op.addImmOperands(Inst, 1); 7915 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7916 Op.addRegWithFPInputModsOperands(Inst, 2); 7917 } else if (Op.isFI()) { 7918 Fi = Op.getImm(); 7919 } else if (Op.isReg()) { 7920 Op.addRegOperands(Inst, 1); 7921 } else { 7922 llvm_unreachable("Invalid operand type"); 7923 } 7924 } else { 7925 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7926 Op.addRegWithFPInputModsOperands(Inst, 2); 7927 } else if (Op.isDPPCtrl()) { 7928 Op.addImmOperands(Inst, 1); 7929 } else if (Op.isImm()) { 7930 // Handle optional arguments 7931 OptionalIdx[Op.getImmTy()] = I; 7932 } else { 7933 llvm_unreachable("Invalid operand type"); 7934 } 7935 } 7936 } 7937 7938 if (IsDPP8) { 7939 using namespace llvm::AMDGPU::DPP; 7940 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7941 } else { 7942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7945 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7947 } 7948 } 7949 } 7950 7951 //===----------------------------------------------------------------------===// 7952 // sdwa 7953 //===----------------------------------------------------------------------===// 7954 7955 OperandMatchResultTy 7956 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7957 AMDGPUOperand::ImmTy Type) { 7958 using namespace llvm::AMDGPU::SDWA; 7959 7960 SMLoc S = getLoc(); 7961 StringRef Value; 7962 OperandMatchResultTy res; 7963 7964 SMLoc StringLoc; 7965 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7966 if (res != MatchOperand_Success) { 7967 return res; 7968 } 7969 7970 int64_t Int; 7971 Int = StringSwitch<int64_t>(Value) 7972 .Case("BYTE_0", SdwaSel::BYTE_0) 7973 .Case("BYTE_1", SdwaSel::BYTE_1) 7974 .Case("BYTE_2", SdwaSel::BYTE_2) 7975 .Case("BYTE_3", SdwaSel::BYTE_3) 7976 .Case("WORD_0", SdwaSel::WORD_0) 7977 .Case("WORD_1", SdwaSel::WORD_1) 7978 .Case("DWORD", SdwaSel::DWORD) 7979 .Default(0xffffffff); 7980 7981 if (Int == 0xffffffff) { 7982 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7983 return MatchOperand_ParseFail; 7984 } 7985 7986 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7987 return MatchOperand_Success; 7988 } 7989 7990 OperandMatchResultTy 7991 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7992 using namespace llvm::AMDGPU::SDWA; 7993 7994 SMLoc S = getLoc(); 7995 StringRef Value; 7996 OperandMatchResultTy res; 7997 7998 SMLoc StringLoc; 7999 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8000 if (res != MatchOperand_Success) { 8001 return res; 8002 } 8003 8004 int64_t Int; 8005 Int = StringSwitch<int64_t>(Value) 8006 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8007 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8008 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8009 .Default(0xffffffff); 8010 8011 if (Int == 0xffffffff) { 8012 Error(StringLoc, "invalid dst_unused value"); 8013 return MatchOperand_ParseFail; 8014 } 8015 8016 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8017 return MatchOperand_Success; 8018 } 8019 8020 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8021 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8022 } 8023 8024 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8025 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8026 } 8027 8028 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8029 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8030 } 8031 8032 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8033 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8034 } 8035 8036 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8037 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8038 } 8039 8040 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8041 uint64_t BasicInstType, 8042 bool SkipDstVcc, 8043 bool SkipSrcVcc) { 8044 using namespace llvm::AMDGPU::SDWA; 8045 8046 OptionalImmIndexMap OptionalIdx; 8047 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8048 bool SkippedVcc = false; 8049 8050 unsigned I = 1; 8051 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8052 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8053 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8054 } 8055 8056 for (unsigned E = Operands.size(); I != E; ++I) { 8057 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8058 if (SkipVcc && !SkippedVcc && Op.isReg() && 8059 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8060 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8061 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8062 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8063 // Skip VCC only if we didn't skip it on previous iteration. 8064 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8065 if (BasicInstType == SIInstrFlags::VOP2 && 8066 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8067 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8068 SkippedVcc = true; 8069 continue; 8070 } else if (BasicInstType == SIInstrFlags::VOPC && 8071 Inst.getNumOperands() == 0) { 8072 SkippedVcc = true; 8073 continue; 8074 } 8075 } 8076 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8077 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8078 } else if (Op.isImm()) { 8079 // Handle optional arguments 8080 OptionalIdx[Op.getImmTy()] = I; 8081 } else { 8082 llvm_unreachable("Invalid operand type"); 8083 } 8084 SkippedVcc = false; 8085 } 8086 8087 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8088 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8089 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8090 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8091 switch (BasicInstType) { 8092 case SIInstrFlags::VOP1: 8093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8094 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8096 } 8097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8098 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8100 break; 8101 8102 case SIInstrFlags::VOP2: 8103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8104 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8106 } 8107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8111 break; 8112 8113 case SIInstrFlags::VOPC: 8114 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8118 break; 8119 8120 default: 8121 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8122 } 8123 } 8124 8125 // special case v_mac_{f16, f32}: 8126 // it has src2 register operand that is tied to dst operand 8127 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8128 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8129 auto it = Inst.begin(); 8130 std::advance( 8131 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8132 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8133 } 8134 } 8135 8136 //===----------------------------------------------------------------------===// 8137 // mAI 8138 //===----------------------------------------------------------------------===// 8139 8140 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8141 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8142 } 8143 8144 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8145 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8146 } 8147 8148 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8149 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8150 } 8151 8152 /// Force static initialization. 8153 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8154 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8155 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8156 } 8157 8158 #define GET_REGISTER_MATCHER 8159 #define GET_MATCHER_IMPLEMENTATION 8160 #define GET_MNEMONIC_SPELL_CHECKER 8161 #define GET_MNEMONIC_CHECKER 8162 #include "AMDGPUGenAsmMatcher.inc" 8163 8164 // This fuction should be defined after auto-generated include so that we have 8165 // MatchClassKind enum defined 8166 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8167 unsigned Kind) { 8168 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8169 // But MatchInstructionImpl() expects to meet token and fails to validate 8170 // operand. This method checks if we are given immediate operand but expect to 8171 // get corresponding token. 8172 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8173 switch (Kind) { 8174 case MCK_addr64: 8175 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8176 case MCK_gds: 8177 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8178 case MCK_lds: 8179 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8180 case MCK_idxen: 8181 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8182 case MCK_offen: 8183 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8184 case MCK_SSrcB32: 8185 // When operands have expression values, they will return true for isToken, 8186 // because it is not possible to distinguish between a token and an 8187 // expression at parse time. MatchInstructionImpl() will always try to 8188 // match an operand as a token, when isToken returns true, and when the 8189 // name of the expression is not a valid token, the match will fail, 8190 // so we need to handle it here. 8191 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8192 case MCK_SSrcF32: 8193 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8194 case MCK_SoppBrTarget: 8195 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8196 case MCK_VReg32OrOff: 8197 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8198 case MCK_InterpSlot: 8199 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8200 case MCK_Attr: 8201 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8202 case MCK_AttrChan: 8203 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8204 case MCK_ImmSMEMOffset: 8205 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8206 case MCK_SReg_64: 8207 case MCK_SReg_64_XEXEC: 8208 // Null is defined as a 32-bit register but 8209 // it should also be enabled with 64-bit operands. 8210 // The following code enables it for SReg_64 operands 8211 // used as source and destination. Remaining source 8212 // operands are handled in isInlinableImm. 8213 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8214 default: 8215 return Match_InvalidOperand; 8216 } 8217 } 8218 8219 //===----------------------------------------------------------------------===// 8220 // endpgm 8221 //===----------------------------------------------------------------------===// 8222 8223 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8224 SMLoc S = getLoc(); 8225 int64_t Imm = 0; 8226 8227 if (!parseExpr(Imm)) { 8228 // The operand is optional, if not present default to 0 8229 Imm = 0; 8230 } 8231 8232 if (!isUInt<16>(Imm)) { 8233 Error(S, "expected a 16-bit value"); 8234 return MatchOperand_ParseFail; 8235 } 8236 8237 Operands.push_back( 8238 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8239 return MatchOperand_Success; 8240 } 8241 8242 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8243