1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasSGPR102_SGPR103() const { 1377 return !isVI() && !isGFX9(); 1378 } 1379 1380 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1381 1382 bool hasIntClamp() const { 1383 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1384 } 1385 1386 AMDGPUTargetStreamer &getTargetStreamer() { 1387 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1388 return static_cast<AMDGPUTargetStreamer &>(TS); 1389 } 1390 1391 const MCRegisterInfo *getMRI() const { 1392 // We need this const_cast because for some reason getContext() is not const 1393 // in MCAsmParser. 1394 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1395 } 1396 1397 const MCInstrInfo *getMII() const { 1398 return &MII; 1399 } 1400 1401 const FeatureBitset &getFeatureBits() const { 1402 return getSTI().getFeatureBits(); 1403 } 1404 1405 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1406 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1407 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1408 1409 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1410 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1411 bool isForcedDPP() const { return ForcedDPP; } 1412 bool isForcedSDWA() const { return ForcedSDWA; } 1413 ArrayRef<unsigned> getMatchedVariants() const; 1414 StringRef getMatchedVariantName() const; 1415 1416 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1418 bool RestoreOnFailure); 1419 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1420 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1421 SMLoc &EndLoc) override; 1422 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1423 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1424 unsigned Kind) override; 1425 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1426 OperandVector &Operands, MCStreamer &Out, 1427 uint64_t &ErrorInfo, 1428 bool MatchingInlineAsm) override; 1429 bool ParseDirective(AsmToken DirectiveID) override; 1430 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1431 OperandMode Mode = OperandMode_Default); 1432 StringRef parseMnemonicSuffix(StringRef Name); 1433 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1434 SMLoc NameLoc, OperandVector &Operands) override; 1435 //bool ProcessInstruction(MCInst &Inst); 1436 1437 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1438 1439 OperandMatchResultTy 1440 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1441 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1442 bool (*ConvertResult)(int64_t &) = nullptr); 1443 1444 OperandMatchResultTy 1445 parseOperandArrayWithPrefix(const char *Prefix, 1446 OperandVector &Operands, 1447 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1448 bool (*ConvertResult)(int64_t&) = nullptr); 1449 1450 OperandMatchResultTy 1451 parseNamedBit(StringRef Name, OperandVector &Operands, 1452 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1453 OperandMatchResultTy parseCPol(OperandVector &Operands); 1454 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1455 StringRef &Value, 1456 SMLoc &StringLoc); 1457 1458 bool isModifier(); 1459 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1460 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1462 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1463 bool parseSP3NegModifier(); 1464 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1465 OperandMatchResultTy parseReg(OperandVector &Operands); 1466 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1467 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1468 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1469 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1470 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1471 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1472 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1473 OperandMatchResultTy parseUfmt(int64_t &Format); 1474 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1475 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1476 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1477 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1478 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1479 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1480 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1481 1482 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1483 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1484 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1485 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1486 1487 bool parseCnt(int64_t &IntVal); 1488 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1489 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1490 1491 private: 1492 struct OperandInfoTy { 1493 SMLoc Loc; 1494 int64_t Id; 1495 bool IsSymbolic = false; 1496 bool IsDefined = false; 1497 1498 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1499 }; 1500 1501 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1502 bool validateSendMsg(const OperandInfoTy &Msg, 1503 const OperandInfoTy &Op, 1504 const OperandInfoTy &Stream); 1505 1506 bool parseHwregBody(OperandInfoTy &HwReg, 1507 OperandInfoTy &Offset, 1508 OperandInfoTy &Width); 1509 bool validateHwreg(const OperandInfoTy &HwReg, 1510 const OperandInfoTy &Offset, 1511 const OperandInfoTy &Width); 1512 1513 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1514 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1515 1516 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1517 const OperandVector &Operands) const; 1518 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1519 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1520 SMLoc getLitLoc(const OperandVector &Operands) const; 1521 SMLoc getConstLoc(const OperandVector &Operands) const; 1522 1523 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1524 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1525 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateSOPLiteral(const MCInst &Inst) const; 1527 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1528 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateIntClampSupported(const MCInst &Inst); 1530 bool validateMIMGAtomicDMask(const MCInst &Inst); 1531 bool validateMIMGGatherDMask(const MCInst &Inst); 1532 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateMIMGDataSize(const MCInst &Inst); 1534 bool validateMIMGAddrSize(const MCInst &Inst); 1535 bool validateMIMGD16(const MCInst &Inst); 1536 bool validateMIMGDim(const MCInst &Inst); 1537 bool validateMIMGMSAA(const MCInst &Inst); 1538 bool validateOpSel(const MCInst &Inst); 1539 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateVccOperand(unsigned Reg) const; 1541 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1542 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1543 bool validateAGPRLdSt(const MCInst &Inst) const; 1544 bool validateVGPRAlign(const MCInst &Inst) const; 1545 bool validateDivScale(const MCInst &Inst); 1546 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1547 const SMLoc &IDLoc); 1548 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1549 unsigned getConstantBusLimit(unsigned Opcode) const; 1550 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1551 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1552 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1553 1554 bool isSupportedMnemo(StringRef Mnemo, 1555 const FeatureBitset &FBS); 1556 bool isSupportedMnemo(StringRef Mnemo, 1557 const FeatureBitset &FBS, 1558 ArrayRef<unsigned> Variants); 1559 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1560 1561 bool isId(const StringRef Id) const; 1562 bool isId(const AsmToken &Token, const StringRef Id) const; 1563 bool isToken(const AsmToken::TokenKind Kind) const; 1564 bool trySkipId(const StringRef Id); 1565 bool trySkipId(const StringRef Pref, const StringRef Id); 1566 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1567 bool trySkipToken(const AsmToken::TokenKind Kind); 1568 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1569 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1570 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1571 1572 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1573 AsmToken::TokenKind getTokenKind() const; 1574 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1575 bool parseExpr(OperandVector &Operands); 1576 StringRef getTokenStr() const; 1577 AsmToken peekToken(); 1578 AsmToken getToken() const; 1579 SMLoc getLoc() const; 1580 void lex(); 1581 1582 public: 1583 void onBeginOfFile() override; 1584 1585 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1586 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1589 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1590 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1591 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1592 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1593 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1594 1595 bool parseSwizzleOperand(int64_t &Op, 1596 const unsigned MinVal, 1597 const unsigned MaxVal, 1598 const StringRef ErrMsg, 1599 SMLoc &Loc); 1600 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1601 const unsigned MinVal, 1602 const unsigned MaxVal, 1603 const StringRef ErrMsg); 1604 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1605 bool parseSwizzleOffset(int64_t &Imm); 1606 bool parseSwizzleMacro(int64_t &Imm); 1607 bool parseSwizzleQuadPerm(int64_t &Imm); 1608 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1609 bool parseSwizzleBroadcast(int64_t &Imm); 1610 bool parseSwizzleSwap(int64_t &Imm); 1611 bool parseSwizzleReverse(int64_t &Imm); 1612 1613 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1614 int64_t parseGPRIdxMacro(); 1615 1616 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1617 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1618 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1619 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1620 1621 AMDGPUOperand::Ptr defaultCPol() const; 1622 1623 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1624 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1625 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1626 AMDGPUOperand::Ptr defaultFlatOffset() const; 1627 1628 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1629 1630 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1631 OptionalImmIndexMap &OptionalIdx); 1632 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1633 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1634 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1635 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1636 OptionalImmIndexMap &OptionalIdx); 1637 1638 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1639 1640 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1641 bool IsAtomic = false); 1642 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1643 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1646 1647 bool parseDimId(unsigned &Encoding); 1648 OperandMatchResultTy parseDim(OperandVector &Operands); 1649 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1650 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1651 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1652 int64_t parseDPPCtrlSel(StringRef Ctrl); 1653 int64_t parseDPPCtrlPerm(); 1654 AMDGPUOperand::Ptr defaultRowMask() const; 1655 AMDGPUOperand::Ptr defaultBankMask() const; 1656 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1657 AMDGPUOperand::Ptr defaultFI() const; 1658 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1659 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1660 1661 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1662 AMDGPUOperand::ImmTy Type); 1663 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1664 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1667 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1668 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1669 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1670 uint64_t BasicInstType, 1671 bool SkipDstVcc = false, 1672 bool SkipSrcVcc = false); 1673 1674 AMDGPUOperand::Ptr defaultBLGP() const; 1675 AMDGPUOperand::Ptr defaultCBSZ() const; 1676 AMDGPUOperand::Ptr defaultABID() const; 1677 1678 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1679 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1680 }; 1681 1682 struct OptionalOperand { 1683 const char *Name; 1684 AMDGPUOperand::ImmTy Type; 1685 bool IsBit; 1686 bool (*ConvertResult)(int64_t&); 1687 }; 1688 1689 } // end anonymous namespace 1690 1691 // May be called with integer type with equivalent bitwidth. 1692 static const fltSemantics *getFltSemantics(unsigned Size) { 1693 switch (Size) { 1694 case 4: 1695 return &APFloat::IEEEsingle(); 1696 case 8: 1697 return &APFloat::IEEEdouble(); 1698 case 2: 1699 return &APFloat::IEEEhalf(); 1700 default: 1701 llvm_unreachable("unsupported fp type"); 1702 } 1703 } 1704 1705 static const fltSemantics *getFltSemantics(MVT VT) { 1706 return getFltSemantics(VT.getSizeInBits() / 8); 1707 } 1708 1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1710 switch (OperandType) { 1711 case AMDGPU::OPERAND_REG_IMM_INT32: 1712 case AMDGPU::OPERAND_REG_IMM_FP32: 1713 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1715 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1716 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1717 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1718 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1720 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1721 return &APFloat::IEEEsingle(); 1722 case AMDGPU::OPERAND_REG_IMM_INT64: 1723 case AMDGPU::OPERAND_REG_IMM_FP64: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1727 return &APFloat::IEEEdouble(); 1728 case AMDGPU::OPERAND_REG_IMM_INT16: 1729 case AMDGPU::OPERAND_REG_IMM_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1738 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1740 return &APFloat::IEEEhalf(); 1741 default: 1742 llvm_unreachable("unsupported fp type"); 1743 } 1744 } 1745 1746 //===----------------------------------------------------------------------===// 1747 // Operand 1748 //===----------------------------------------------------------------------===// 1749 1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1751 bool Lost; 1752 1753 // Convert literal to single precision 1754 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1755 APFloat::rmNearestTiesToEven, 1756 &Lost); 1757 // We allow precision lost but not overflow or underflow 1758 if (Status != APFloat::opOK && 1759 Lost && 1760 ((Status & APFloat::opOverflow) != 0 || 1761 (Status & APFloat::opUnderflow) != 0)) { 1762 return false; 1763 } 1764 1765 return true; 1766 } 1767 1768 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1769 return isUIntN(Size, Val) || isIntN(Size, Val); 1770 } 1771 1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1773 if (VT.getScalarType() == MVT::i16) { 1774 // FP immediate values are broken. 1775 return isInlinableIntLiteral(Val); 1776 } 1777 1778 // f16/v2f16 operands work correctly for all values. 1779 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1780 } 1781 1782 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1783 1784 // This is a hack to enable named inline values like 1785 // shared_base with both 32-bit and 64-bit operands. 1786 // Note that these values are defined as 1787 // 32-bit operands only. 1788 if (isInlineValue()) { 1789 return true; 1790 } 1791 1792 if (!isImmTy(ImmTyNone)) { 1793 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1794 return false; 1795 } 1796 // TODO: We should avoid using host float here. It would be better to 1797 // check the float bit values which is what a few other places do. 1798 // We've had bot failures before due to weird NaN support on mips hosts. 1799 1800 APInt Literal(64, Imm.Val); 1801 1802 if (Imm.IsFPImm) { // We got fp literal token 1803 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1804 return AMDGPU::isInlinableLiteral64(Imm.Val, 1805 AsmParser->hasInv2PiInlineImm()); 1806 } 1807 1808 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1809 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1810 return false; 1811 1812 if (type.getScalarSizeInBits() == 16) { 1813 return isInlineableLiteralOp16( 1814 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1815 type, AsmParser->hasInv2PiInlineImm()); 1816 } 1817 1818 // Check if single precision literal is inlinable 1819 return AMDGPU::isInlinableLiteral32( 1820 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1821 AsmParser->hasInv2PiInlineImm()); 1822 } 1823 1824 // We got int literal token. 1825 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1826 return AMDGPU::isInlinableLiteral64(Imm.Val, 1827 AsmParser->hasInv2PiInlineImm()); 1828 } 1829 1830 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1831 return false; 1832 } 1833 1834 if (type.getScalarSizeInBits() == 16) { 1835 return isInlineableLiteralOp16( 1836 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1837 type, AsmParser->hasInv2PiInlineImm()); 1838 } 1839 1840 return AMDGPU::isInlinableLiteral32( 1841 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1842 AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1846 // Check that this immediate can be added as literal 1847 if (!isImmTy(ImmTyNone)) { 1848 return false; 1849 } 1850 1851 if (!Imm.IsFPImm) { 1852 // We got int literal token. 1853 1854 if (type == MVT::f64 && hasFPModifiers()) { 1855 // Cannot apply fp modifiers to int literals preserving the same semantics 1856 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1857 // disable these cases. 1858 return false; 1859 } 1860 1861 unsigned Size = type.getSizeInBits(); 1862 if (Size == 64) 1863 Size = 32; 1864 1865 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1866 // types. 1867 return isSafeTruncation(Imm.Val, Size); 1868 } 1869 1870 // We got fp literal token 1871 if (type == MVT::f64) { // Expected 64-bit fp operand 1872 // We would set low 64-bits of literal to zeroes but we accept this literals 1873 return true; 1874 } 1875 1876 if (type == MVT::i64) { // Expected 64-bit int operand 1877 // We don't allow fp literals in 64-bit integer instructions. It is 1878 // unclear how we should encode them. 1879 return false; 1880 } 1881 1882 // We allow fp literals with f16x2 operands assuming that the specified 1883 // literal goes into the lower half and the upper half is zero. We also 1884 // require that the literal may be losslesly converted to f16. 1885 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1886 (type == MVT::v2i16)? MVT::i16 : 1887 (type == MVT::v2f32)? MVT::f32 : type; 1888 1889 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1890 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1891 } 1892 1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1894 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1895 } 1896 1897 bool AMDGPUOperand::isVRegWithInputMods() const { 1898 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1899 // GFX90A allows DPP on 64-bit operands. 1900 (isRegClass(AMDGPU::VReg_64RegClassID) && 1901 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1902 } 1903 1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1905 if (AsmParser->isVI()) 1906 return isVReg32(); 1907 else if (AsmParser->isGFX9Plus()) 1908 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1909 else 1910 return false; 1911 } 1912 1913 bool AMDGPUOperand::isSDWAFP16Operand() const { 1914 return isSDWAOperand(MVT::f16); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAFP32Operand() const { 1918 return isSDWAOperand(MVT::f32); 1919 } 1920 1921 bool AMDGPUOperand::isSDWAInt16Operand() const { 1922 return isSDWAOperand(MVT::i16); 1923 } 1924 1925 bool AMDGPUOperand::isSDWAInt32Operand() const { 1926 return isSDWAOperand(MVT::i32); 1927 } 1928 1929 bool AMDGPUOperand::isBoolReg() const { 1930 auto FB = AsmParser->getFeatureBits(); 1931 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1932 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1933 } 1934 1935 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1936 { 1937 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1938 assert(Size == 2 || Size == 4 || Size == 8); 1939 1940 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1941 1942 if (Imm.Mods.Abs) { 1943 Val &= ~FpSignMask; 1944 } 1945 if (Imm.Mods.Neg) { 1946 Val ^= FpSignMask; 1947 } 1948 1949 return Val; 1950 } 1951 1952 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1953 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1954 Inst.getNumOperands())) { 1955 addLiteralImmOperand(Inst, Imm.Val, 1956 ApplyModifiers & 1957 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1958 } else { 1959 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1960 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1961 setImmKindNone(); 1962 } 1963 } 1964 1965 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1966 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1967 auto OpNum = Inst.getNumOperands(); 1968 // Check that this operand accepts literals 1969 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1970 1971 if (ApplyModifiers) { 1972 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1973 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1974 Val = applyInputFPModifiers(Val, Size); 1975 } 1976 1977 APInt Literal(64, Val); 1978 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1979 1980 if (Imm.IsFPImm) { // We got fp literal token 1981 switch (OpTy) { 1982 case AMDGPU::OPERAND_REG_IMM_INT64: 1983 case AMDGPU::OPERAND_REG_IMM_FP64: 1984 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1985 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1986 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1987 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1988 AsmParser->hasInv2PiInlineImm())) { 1989 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1990 setImmKindConst(); 1991 return; 1992 } 1993 1994 // Non-inlineable 1995 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1996 // For fp operands we check if low 32 bits are zeros 1997 if (Literal.getLoBits(32) != 0) { 1998 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1999 "Can't encode literal as exact 64-bit floating-point operand. " 2000 "Low 32-bits will be set to zero"); 2001 } 2002 2003 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2004 setImmKindLiteral(); 2005 return; 2006 } 2007 2008 // We don't allow fp literals in 64-bit integer instructions. It is 2009 // unclear how we should encode them. This case should be checked earlier 2010 // in predicate methods (isLiteralImm()) 2011 llvm_unreachable("fp literal in 64-bit integer instruction."); 2012 2013 case AMDGPU::OPERAND_REG_IMM_INT32: 2014 case AMDGPU::OPERAND_REG_IMM_FP32: 2015 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2016 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2017 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2018 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2019 case AMDGPU::OPERAND_REG_IMM_INT16: 2020 case AMDGPU::OPERAND_REG_IMM_FP16: 2021 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2022 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2023 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2024 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2026 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2027 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2028 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2029 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2030 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2031 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2032 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2033 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2034 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2035 bool lost; 2036 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2037 // Convert literal to single precision 2038 FPLiteral.convert(*getOpFltSemantics(OpTy), 2039 APFloat::rmNearestTiesToEven, &lost); 2040 // We allow precision lost but not overflow or underflow. This should be 2041 // checked earlier in isLiteralImm() 2042 2043 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2044 Inst.addOperand(MCOperand::createImm(ImmVal)); 2045 setImmKindLiteral(); 2046 return; 2047 } 2048 default: 2049 llvm_unreachable("invalid operand size"); 2050 } 2051 2052 return; 2053 } 2054 2055 // We got int literal token. 2056 // Only sign extend inline immediates. 2057 switch (OpTy) { 2058 case AMDGPU::OPERAND_REG_IMM_INT32: 2059 case AMDGPU::OPERAND_REG_IMM_FP32: 2060 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2061 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2062 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2063 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2064 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2065 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2066 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2068 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2069 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2070 if (isSafeTruncation(Val, 32) && 2071 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2072 AsmParser->hasInv2PiInlineImm())) { 2073 Inst.addOperand(MCOperand::createImm(Val)); 2074 setImmKindConst(); 2075 return; 2076 } 2077 2078 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2079 setImmKindLiteral(); 2080 return; 2081 2082 case AMDGPU::OPERAND_REG_IMM_INT64: 2083 case AMDGPU::OPERAND_REG_IMM_FP64: 2084 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2085 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2086 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2087 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2088 Inst.addOperand(MCOperand::createImm(Val)); 2089 setImmKindConst(); 2090 return; 2091 } 2092 2093 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2094 setImmKindLiteral(); 2095 return; 2096 2097 case AMDGPU::OPERAND_REG_IMM_INT16: 2098 case AMDGPU::OPERAND_REG_IMM_FP16: 2099 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2100 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2102 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2103 if (isSafeTruncation(Val, 16) && 2104 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2105 AsmParser->hasInv2PiInlineImm())) { 2106 Inst.addOperand(MCOperand::createImm(Val)); 2107 setImmKindConst(); 2108 return; 2109 } 2110 2111 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2112 setImmKindLiteral(); 2113 return; 2114 2115 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2116 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2118 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2119 assert(isSafeTruncation(Val, 16)); 2120 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2121 AsmParser->hasInv2PiInlineImm())); 2122 2123 Inst.addOperand(MCOperand::createImm(Val)); 2124 return; 2125 } 2126 default: 2127 llvm_unreachable("invalid operand size"); 2128 } 2129 } 2130 2131 template <unsigned Bitwidth> 2132 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2133 APInt Literal(64, Imm.Val); 2134 setImmKindNone(); 2135 2136 if (!Imm.IsFPImm) { 2137 // We got int literal token. 2138 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2139 return; 2140 } 2141 2142 bool Lost; 2143 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2144 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2145 APFloat::rmNearestTiesToEven, &Lost); 2146 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2147 } 2148 2149 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2150 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2151 } 2152 2153 static bool isInlineValue(unsigned Reg) { 2154 switch (Reg) { 2155 case AMDGPU::SRC_SHARED_BASE: 2156 case AMDGPU::SRC_SHARED_LIMIT: 2157 case AMDGPU::SRC_PRIVATE_BASE: 2158 case AMDGPU::SRC_PRIVATE_LIMIT: 2159 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2160 return true; 2161 case AMDGPU::SRC_VCCZ: 2162 case AMDGPU::SRC_EXECZ: 2163 case AMDGPU::SRC_SCC: 2164 return true; 2165 case AMDGPU::SGPR_NULL: 2166 return true; 2167 default: 2168 return false; 2169 } 2170 } 2171 2172 bool AMDGPUOperand::isInlineValue() const { 2173 return isRegKind() && ::isInlineValue(getReg()); 2174 } 2175 2176 //===----------------------------------------------------------------------===// 2177 // AsmParser 2178 //===----------------------------------------------------------------------===// 2179 2180 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2181 if (Is == IS_VGPR) { 2182 switch (RegWidth) { 2183 default: return -1; 2184 case 1: return AMDGPU::VGPR_32RegClassID; 2185 case 2: return AMDGPU::VReg_64RegClassID; 2186 case 3: return AMDGPU::VReg_96RegClassID; 2187 case 4: return AMDGPU::VReg_128RegClassID; 2188 case 5: return AMDGPU::VReg_160RegClassID; 2189 case 6: return AMDGPU::VReg_192RegClassID; 2190 case 8: return AMDGPU::VReg_256RegClassID; 2191 case 16: return AMDGPU::VReg_512RegClassID; 2192 case 32: return AMDGPU::VReg_1024RegClassID; 2193 } 2194 } else if (Is == IS_TTMP) { 2195 switch (RegWidth) { 2196 default: return -1; 2197 case 1: return AMDGPU::TTMP_32RegClassID; 2198 case 2: return AMDGPU::TTMP_64RegClassID; 2199 case 4: return AMDGPU::TTMP_128RegClassID; 2200 case 8: return AMDGPU::TTMP_256RegClassID; 2201 case 16: return AMDGPU::TTMP_512RegClassID; 2202 } 2203 } else if (Is == IS_SGPR) { 2204 switch (RegWidth) { 2205 default: return -1; 2206 case 1: return AMDGPU::SGPR_32RegClassID; 2207 case 2: return AMDGPU::SGPR_64RegClassID; 2208 case 3: return AMDGPU::SGPR_96RegClassID; 2209 case 4: return AMDGPU::SGPR_128RegClassID; 2210 case 5: return AMDGPU::SGPR_160RegClassID; 2211 case 6: return AMDGPU::SGPR_192RegClassID; 2212 case 8: return AMDGPU::SGPR_256RegClassID; 2213 case 16: return AMDGPU::SGPR_512RegClassID; 2214 } 2215 } else if (Is == IS_AGPR) { 2216 switch (RegWidth) { 2217 default: return -1; 2218 case 1: return AMDGPU::AGPR_32RegClassID; 2219 case 2: return AMDGPU::AReg_64RegClassID; 2220 case 3: return AMDGPU::AReg_96RegClassID; 2221 case 4: return AMDGPU::AReg_128RegClassID; 2222 case 5: return AMDGPU::AReg_160RegClassID; 2223 case 6: return AMDGPU::AReg_192RegClassID; 2224 case 8: return AMDGPU::AReg_256RegClassID; 2225 case 16: return AMDGPU::AReg_512RegClassID; 2226 case 32: return AMDGPU::AReg_1024RegClassID; 2227 } 2228 } 2229 return -1; 2230 } 2231 2232 static unsigned getSpecialRegForName(StringRef RegName) { 2233 return StringSwitch<unsigned>(RegName) 2234 .Case("exec", AMDGPU::EXEC) 2235 .Case("vcc", AMDGPU::VCC) 2236 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2237 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2238 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2239 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2240 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2241 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2242 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2243 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2244 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2245 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2246 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2247 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2248 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2249 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2250 .Case("m0", AMDGPU::M0) 2251 .Case("vccz", AMDGPU::SRC_VCCZ) 2252 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2253 .Case("execz", AMDGPU::SRC_EXECZ) 2254 .Case("src_execz", AMDGPU::SRC_EXECZ) 2255 .Case("scc", AMDGPU::SRC_SCC) 2256 .Case("src_scc", AMDGPU::SRC_SCC) 2257 .Case("tba", AMDGPU::TBA) 2258 .Case("tma", AMDGPU::TMA) 2259 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2260 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2261 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2262 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2263 .Case("vcc_lo", AMDGPU::VCC_LO) 2264 .Case("vcc_hi", AMDGPU::VCC_HI) 2265 .Case("exec_lo", AMDGPU::EXEC_LO) 2266 .Case("exec_hi", AMDGPU::EXEC_HI) 2267 .Case("tma_lo", AMDGPU::TMA_LO) 2268 .Case("tma_hi", AMDGPU::TMA_HI) 2269 .Case("tba_lo", AMDGPU::TBA_LO) 2270 .Case("tba_hi", AMDGPU::TBA_HI) 2271 .Case("pc", AMDGPU::PC_REG) 2272 .Case("null", AMDGPU::SGPR_NULL) 2273 .Default(AMDGPU::NoRegister); 2274 } 2275 2276 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2277 SMLoc &EndLoc, bool RestoreOnFailure) { 2278 auto R = parseRegister(); 2279 if (!R) return true; 2280 assert(R->isReg()); 2281 RegNo = R->getReg(); 2282 StartLoc = R->getStartLoc(); 2283 EndLoc = R->getEndLoc(); 2284 return false; 2285 } 2286 2287 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2288 SMLoc &EndLoc) { 2289 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2290 } 2291 2292 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2293 SMLoc &StartLoc, 2294 SMLoc &EndLoc) { 2295 bool Result = 2296 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2297 bool PendingErrors = getParser().hasPendingError(); 2298 getParser().clearPendingErrors(); 2299 if (PendingErrors) 2300 return MatchOperand_ParseFail; 2301 if (Result) 2302 return MatchOperand_NoMatch; 2303 return MatchOperand_Success; 2304 } 2305 2306 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2307 RegisterKind RegKind, unsigned Reg1, 2308 SMLoc Loc) { 2309 switch (RegKind) { 2310 case IS_SPECIAL: 2311 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2312 Reg = AMDGPU::EXEC; 2313 RegWidth = 2; 2314 return true; 2315 } 2316 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2317 Reg = AMDGPU::FLAT_SCR; 2318 RegWidth = 2; 2319 return true; 2320 } 2321 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2322 Reg = AMDGPU::XNACK_MASK; 2323 RegWidth = 2; 2324 return true; 2325 } 2326 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2327 Reg = AMDGPU::VCC; 2328 RegWidth = 2; 2329 return true; 2330 } 2331 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2332 Reg = AMDGPU::TBA; 2333 RegWidth = 2; 2334 return true; 2335 } 2336 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2337 Reg = AMDGPU::TMA; 2338 RegWidth = 2; 2339 return true; 2340 } 2341 Error(Loc, "register does not fit in the list"); 2342 return false; 2343 case IS_VGPR: 2344 case IS_SGPR: 2345 case IS_AGPR: 2346 case IS_TTMP: 2347 if (Reg1 != Reg + RegWidth) { 2348 Error(Loc, "registers in a list must have consecutive indices"); 2349 return false; 2350 } 2351 RegWidth++; 2352 return true; 2353 default: 2354 llvm_unreachable("unexpected register kind"); 2355 } 2356 } 2357 2358 struct RegInfo { 2359 StringLiteral Name; 2360 RegisterKind Kind; 2361 }; 2362 2363 static constexpr RegInfo RegularRegisters[] = { 2364 {{"v"}, IS_VGPR}, 2365 {{"s"}, IS_SGPR}, 2366 {{"ttmp"}, IS_TTMP}, 2367 {{"acc"}, IS_AGPR}, 2368 {{"a"}, IS_AGPR}, 2369 }; 2370 2371 static bool isRegularReg(RegisterKind Kind) { 2372 return Kind == IS_VGPR || 2373 Kind == IS_SGPR || 2374 Kind == IS_TTMP || 2375 Kind == IS_AGPR; 2376 } 2377 2378 static const RegInfo* getRegularRegInfo(StringRef Str) { 2379 for (const RegInfo &Reg : RegularRegisters) 2380 if (Str.startswith(Reg.Name)) 2381 return &Reg; 2382 return nullptr; 2383 } 2384 2385 static bool getRegNum(StringRef Str, unsigned& Num) { 2386 return !Str.getAsInteger(10, Num); 2387 } 2388 2389 bool 2390 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2391 const AsmToken &NextToken) const { 2392 2393 // A list of consecutive registers: [s0,s1,s2,s3] 2394 if (Token.is(AsmToken::LBrac)) 2395 return true; 2396 2397 if (!Token.is(AsmToken::Identifier)) 2398 return false; 2399 2400 // A single register like s0 or a range of registers like s[0:1] 2401 2402 StringRef Str = Token.getString(); 2403 const RegInfo *Reg = getRegularRegInfo(Str); 2404 if (Reg) { 2405 StringRef RegName = Reg->Name; 2406 StringRef RegSuffix = Str.substr(RegName.size()); 2407 if (!RegSuffix.empty()) { 2408 unsigned Num; 2409 // A single register with an index: rXX 2410 if (getRegNum(RegSuffix, Num)) 2411 return true; 2412 } else { 2413 // A range of registers: r[XX:YY]. 2414 if (NextToken.is(AsmToken::LBrac)) 2415 return true; 2416 } 2417 } 2418 2419 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2420 } 2421 2422 bool 2423 AMDGPUAsmParser::isRegister() 2424 { 2425 return isRegister(getToken(), peekToken()); 2426 } 2427 2428 unsigned 2429 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2430 unsigned RegNum, 2431 unsigned RegWidth, 2432 SMLoc Loc) { 2433 2434 assert(isRegularReg(RegKind)); 2435 2436 unsigned AlignSize = 1; 2437 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2438 // SGPR and TTMP registers must be aligned. 2439 // Max required alignment is 4 dwords. 2440 AlignSize = std::min(RegWidth, 4u); 2441 } 2442 2443 if (RegNum % AlignSize != 0) { 2444 Error(Loc, "invalid register alignment"); 2445 return AMDGPU::NoRegister; 2446 } 2447 2448 unsigned RegIdx = RegNum / AlignSize; 2449 int RCID = getRegClass(RegKind, RegWidth); 2450 if (RCID == -1) { 2451 Error(Loc, "invalid or unsupported register size"); 2452 return AMDGPU::NoRegister; 2453 } 2454 2455 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2456 const MCRegisterClass RC = TRI->getRegClass(RCID); 2457 if (RegIdx >= RC.getNumRegs()) { 2458 Error(Loc, "register index is out of range"); 2459 return AMDGPU::NoRegister; 2460 } 2461 2462 return RC.getRegister(RegIdx); 2463 } 2464 2465 bool 2466 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2467 int64_t RegLo, RegHi; 2468 if (!skipToken(AsmToken::LBrac, "missing register index")) 2469 return false; 2470 2471 SMLoc FirstIdxLoc = getLoc(); 2472 SMLoc SecondIdxLoc; 2473 2474 if (!parseExpr(RegLo)) 2475 return false; 2476 2477 if (trySkipToken(AsmToken::Colon)) { 2478 SecondIdxLoc = getLoc(); 2479 if (!parseExpr(RegHi)) 2480 return false; 2481 } else { 2482 RegHi = RegLo; 2483 } 2484 2485 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2486 return false; 2487 2488 if (!isUInt<32>(RegLo)) { 2489 Error(FirstIdxLoc, "invalid register index"); 2490 return false; 2491 } 2492 2493 if (!isUInt<32>(RegHi)) { 2494 Error(SecondIdxLoc, "invalid register index"); 2495 return false; 2496 } 2497 2498 if (RegLo > RegHi) { 2499 Error(FirstIdxLoc, "first register index should not exceed second index"); 2500 return false; 2501 } 2502 2503 Num = static_cast<unsigned>(RegLo); 2504 Width = (RegHi - RegLo) + 1; 2505 return true; 2506 } 2507 2508 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2509 unsigned &RegNum, unsigned &RegWidth, 2510 SmallVectorImpl<AsmToken> &Tokens) { 2511 assert(isToken(AsmToken::Identifier)); 2512 unsigned Reg = getSpecialRegForName(getTokenStr()); 2513 if (Reg) { 2514 RegNum = 0; 2515 RegWidth = 1; 2516 RegKind = IS_SPECIAL; 2517 Tokens.push_back(getToken()); 2518 lex(); // skip register name 2519 } 2520 return Reg; 2521 } 2522 2523 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2524 unsigned &RegNum, unsigned &RegWidth, 2525 SmallVectorImpl<AsmToken> &Tokens) { 2526 assert(isToken(AsmToken::Identifier)); 2527 StringRef RegName = getTokenStr(); 2528 auto Loc = getLoc(); 2529 2530 const RegInfo *RI = getRegularRegInfo(RegName); 2531 if (!RI) { 2532 Error(Loc, "invalid register name"); 2533 return AMDGPU::NoRegister; 2534 } 2535 2536 Tokens.push_back(getToken()); 2537 lex(); // skip register name 2538 2539 RegKind = RI->Kind; 2540 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2541 if (!RegSuffix.empty()) { 2542 // Single 32-bit register: vXX. 2543 if (!getRegNum(RegSuffix, RegNum)) { 2544 Error(Loc, "invalid register index"); 2545 return AMDGPU::NoRegister; 2546 } 2547 RegWidth = 1; 2548 } else { 2549 // Range of registers: v[XX:YY]. ":YY" is optional. 2550 if (!ParseRegRange(RegNum, RegWidth)) 2551 return AMDGPU::NoRegister; 2552 } 2553 2554 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2555 } 2556 2557 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2558 unsigned &RegWidth, 2559 SmallVectorImpl<AsmToken> &Tokens) { 2560 unsigned Reg = AMDGPU::NoRegister; 2561 auto ListLoc = getLoc(); 2562 2563 if (!skipToken(AsmToken::LBrac, 2564 "expected a register or a list of registers")) { 2565 return AMDGPU::NoRegister; 2566 } 2567 2568 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2569 2570 auto Loc = getLoc(); 2571 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2572 return AMDGPU::NoRegister; 2573 if (RegWidth != 1) { 2574 Error(Loc, "expected a single 32-bit register"); 2575 return AMDGPU::NoRegister; 2576 } 2577 2578 for (; trySkipToken(AsmToken::Comma); ) { 2579 RegisterKind NextRegKind; 2580 unsigned NextReg, NextRegNum, NextRegWidth; 2581 Loc = getLoc(); 2582 2583 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2584 NextRegNum, NextRegWidth, 2585 Tokens)) { 2586 return AMDGPU::NoRegister; 2587 } 2588 if (NextRegWidth != 1) { 2589 Error(Loc, "expected a single 32-bit register"); 2590 return AMDGPU::NoRegister; 2591 } 2592 if (NextRegKind != RegKind) { 2593 Error(Loc, "registers in a list must be of the same kind"); 2594 return AMDGPU::NoRegister; 2595 } 2596 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2597 return AMDGPU::NoRegister; 2598 } 2599 2600 if (!skipToken(AsmToken::RBrac, 2601 "expected a comma or a closing square bracket")) { 2602 return AMDGPU::NoRegister; 2603 } 2604 2605 if (isRegularReg(RegKind)) 2606 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2607 2608 return Reg; 2609 } 2610 2611 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2612 unsigned &RegNum, unsigned &RegWidth, 2613 SmallVectorImpl<AsmToken> &Tokens) { 2614 auto Loc = getLoc(); 2615 Reg = AMDGPU::NoRegister; 2616 2617 if (isToken(AsmToken::Identifier)) { 2618 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2619 if (Reg == AMDGPU::NoRegister) 2620 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2621 } else { 2622 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2623 } 2624 2625 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2626 if (Reg == AMDGPU::NoRegister) { 2627 assert(Parser.hasPendingError()); 2628 return false; 2629 } 2630 2631 if (!subtargetHasRegister(*TRI, Reg)) { 2632 if (Reg == AMDGPU::SGPR_NULL) { 2633 Error(Loc, "'null' operand is not supported on this GPU"); 2634 } else { 2635 Error(Loc, "register not available on this GPU"); 2636 } 2637 return false; 2638 } 2639 2640 return true; 2641 } 2642 2643 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2644 unsigned &RegNum, unsigned &RegWidth, 2645 bool RestoreOnFailure /*=false*/) { 2646 Reg = AMDGPU::NoRegister; 2647 2648 SmallVector<AsmToken, 1> Tokens; 2649 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2650 if (RestoreOnFailure) { 2651 while (!Tokens.empty()) { 2652 getLexer().UnLex(Tokens.pop_back_val()); 2653 } 2654 } 2655 return true; 2656 } 2657 return false; 2658 } 2659 2660 Optional<StringRef> 2661 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2662 switch (RegKind) { 2663 case IS_VGPR: 2664 return StringRef(".amdgcn.next_free_vgpr"); 2665 case IS_SGPR: 2666 return StringRef(".amdgcn.next_free_sgpr"); 2667 default: 2668 return None; 2669 } 2670 } 2671 2672 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2673 auto SymbolName = getGprCountSymbolName(RegKind); 2674 assert(SymbolName && "initializing invalid register kind"); 2675 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2676 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2677 } 2678 2679 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2680 unsigned DwordRegIndex, 2681 unsigned RegWidth) { 2682 // Symbols are only defined for GCN targets 2683 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2684 return true; 2685 2686 auto SymbolName = getGprCountSymbolName(RegKind); 2687 if (!SymbolName) 2688 return true; 2689 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2690 2691 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2692 int64_t OldCount; 2693 2694 if (!Sym->isVariable()) 2695 return !Error(getLoc(), 2696 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2697 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2698 return !Error( 2699 getLoc(), 2700 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2701 2702 if (OldCount <= NewMax) 2703 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2704 2705 return true; 2706 } 2707 2708 std::unique_ptr<AMDGPUOperand> 2709 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2710 const auto &Tok = getToken(); 2711 SMLoc StartLoc = Tok.getLoc(); 2712 SMLoc EndLoc = Tok.getEndLoc(); 2713 RegisterKind RegKind; 2714 unsigned Reg, RegNum, RegWidth; 2715 2716 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2717 return nullptr; 2718 } 2719 if (isHsaAbiVersion3Or4(&getSTI())) { 2720 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2721 return nullptr; 2722 } else 2723 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2724 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2725 } 2726 2727 OperandMatchResultTy 2728 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2729 // TODO: add syntactic sugar for 1/(2*PI) 2730 2731 assert(!isRegister()); 2732 assert(!isModifier()); 2733 2734 const auto& Tok = getToken(); 2735 const auto& NextTok = peekToken(); 2736 bool IsReal = Tok.is(AsmToken::Real); 2737 SMLoc S = getLoc(); 2738 bool Negate = false; 2739 2740 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2741 lex(); 2742 IsReal = true; 2743 Negate = true; 2744 } 2745 2746 if (IsReal) { 2747 // Floating-point expressions are not supported. 2748 // Can only allow floating-point literals with an 2749 // optional sign. 2750 2751 StringRef Num = getTokenStr(); 2752 lex(); 2753 2754 APFloat RealVal(APFloat::IEEEdouble()); 2755 auto roundMode = APFloat::rmNearestTiesToEven; 2756 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2757 return MatchOperand_ParseFail; 2758 } 2759 if (Negate) 2760 RealVal.changeSign(); 2761 2762 Operands.push_back( 2763 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2764 AMDGPUOperand::ImmTyNone, true)); 2765 2766 return MatchOperand_Success; 2767 2768 } else { 2769 int64_t IntVal; 2770 const MCExpr *Expr; 2771 SMLoc S = getLoc(); 2772 2773 if (HasSP3AbsModifier) { 2774 // This is a workaround for handling expressions 2775 // as arguments of SP3 'abs' modifier, for example: 2776 // |1.0| 2777 // |-1| 2778 // |1+x| 2779 // This syntax is not compatible with syntax of standard 2780 // MC expressions (due to the trailing '|'). 2781 SMLoc EndLoc; 2782 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2783 return MatchOperand_ParseFail; 2784 } else { 2785 if (Parser.parseExpression(Expr)) 2786 return MatchOperand_ParseFail; 2787 } 2788 2789 if (Expr->evaluateAsAbsolute(IntVal)) { 2790 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2791 } else { 2792 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2793 } 2794 2795 return MatchOperand_Success; 2796 } 2797 2798 return MatchOperand_NoMatch; 2799 } 2800 2801 OperandMatchResultTy 2802 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2803 if (!isRegister()) 2804 return MatchOperand_NoMatch; 2805 2806 if (auto R = parseRegister()) { 2807 assert(R->isReg()); 2808 Operands.push_back(std::move(R)); 2809 return MatchOperand_Success; 2810 } 2811 return MatchOperand_ParseFail; 2812 } 2813 2814 OperandMatchResultTy 2815 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2816 auto res = parseReg(Operands); 2817 if (res != MatchOperand_NoMatch) { 2818 return res; 2819 } else if (isModifier()) { 2820 return MatchOperand_NoMatch; 2821 } else { 2822 return parseImm(Operands, HasSP3AbsMod); 2823 } 2824 } 2825 2826 bool 2827 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2828 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2829 const auto &str = Token.getString(); 2830 return str == "abs" || str == "neg" || str == "sext"; 2831 } 2832 return false; 2833 } 2834 2835 bool 2836 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2837 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2838 } 2839 2840 bool 2841 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2842 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2843 } 2844 2845 bool 2846 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2847 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2848 } 2849 2850 // Check if this is an operand modifier or an opcode modifier 2851 // which may look like an expression but it is not. We should 2852 // avoid parsing these modifiers as expressions. Currently 2853 // recognized sequences are: 2854 // |...| 2855 // abs(...) 2856 // neg(...) 2857 // sext(...) 2858 // -reg 2859 // -|...| 2860 // -abs(...) 2861 // name:... 2862 // Note that simple opcode modifiers like 'gds' may be parsed as 2863 // expressions; this is a special case. See getExpressionAsToken. 2864 // 2865 bool 2866 AMDGPUAsmParser::isModifier() { 2867 2868 AsmToken Tok = getToken(); 2869 AsmToken NextToken[2]; 2870 peekTokens(NextToken); 2871 2872 return isOperandModifier(Tok, NextToken[0]) || 2873 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2874 isOpcodeModifierWithVal(Tok, NextToken[0]); 2875 } 2876 2877 // Check if the current token is an SP3 'neg' modifier. 2878 // Currently this modifier is allowed in the following context: 2879 // 2880 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2881 // 2. Before an 'abs' modifier: -abs(...) 2882 // 3. Before an SP3 'abs' modifier: -|...| 2883 // 2884 // In all other cases "-" is handled as a part 2885 // of an expression that follows the sign. 2886 // 2887 // Note: When "-" is followed by an integer literal, 2888 // this is interpreted as integer negation rather 2889 // than a floating-point NEG modifier applied to N. 2890 // Beside being contr-intuitive, such use of floating-point 2891 // NEG modifier would have resulted in different meaning 2892 // of integer literals used with VOP1/2/C and VOP3, 2893 // for example: 2894 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2895 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2896 // Negative fp literals with preceding "-" are 2897 // handled likewise for unifomtity 2898 // 2899 bool 2900 AMDGPUAsmParser::parseSP3NegModifier() { 2901 2902 AsmToken NextToken[2]; 2903 peekTokens(NextToken); 2904 2905 if (isToken(AsmToken::Minus) && 2906 (isRegister(NextToken[0], NextToken[1]) || 2907 NextToken[0].is(AsmToken::Pipe) || 2908 isId(NextToken[0], "abs"))) { 2909 lex(); 2910 return true; 2911 } 2912 2913 return false; 2914 } 2915 2916 OperandMatchResultTy 2917 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2918 bool AllowImm) { 2919 bool Neg, SP3Neg; 2920 bool Abs, SP3Abs; 2921 SMLoc Loc; 2922 2923 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2924 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2925 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2926 return MatchOperand_ParseFail; 2927 } 2928 2929 SP3Neg = parseSP3NegModifier(); 2930 2931 Loc = getLoc(); 2932 Neg = trySkipId("neg"); 2933 if (Neg && SP3Neg) { 2934 Error(Loc, "expected register or immediate"); 2935 return MatchOperand_ParseFail; 2936 } 2937 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2938 return MatchOperand_ParseFail; 2939 2940 Abs = trySkipId("abs"); 2941 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2942 return MatchOperand_ParseFail; 2943 2944 Loc = getLoc(); 2945 SP3Abs = trySkipToken(AsmToken::Pipe); 2946 if (Abs && SP3Abs) { 2947 Error(Loc, "expected register or immediate"); 2948 return MatchOperand_ParseFail; 2949 } 2950 2951 OperandMatchResultTy Res; 2952 if (AllowImm) { 2953 Res = parseRegOrImm(Operands, SP3Abs); 2954 } else { 2955 Res = parseReg(Operands); 2956 } 2957 if (Res != MatchOperand_Success) { 2958 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2959 } 2960 2961 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2962 return MatchOperand_ParseFail; 2963 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2964 return MatchOperand_ParseFail; 2965 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2966 return MatchOperand_ParseFail; 2967 2968 AMDGPUOperand::Modifiers Mods; 2969 Mods.Abs = Abs || SP3Abs; 2970 Mods.Neg = Neg || SP3Neg; 2971 2972 if (Mods.hasFPModifiers()) { 2973 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2974 if (Op.isExpr()) { 2975 Error(Op.getStartLoc(), "expected an absolute expression"); 2976 return MatchOperand_ParseFail; 2977 } 2978 Op.setModifiers(Mods); 2979 } 2980 return MatchOperand_Success; 2981 } 2982 2983 OperandMatchResultTy 2984 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2985 bool AllowImm) { 2986 bool Sext = trySkipId("sext"); 2987 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2988 return MatchOperand_ParseFail; 2989 2990 OperandMatchResultTy Res; 2991 if (AllowImm) { 2992 Res = parseRegOrImm(Operands); 2993 } else { 2994 Res = parseReg(Operands); 2995 } 2996 if (Res != MatchOperand_Success) { 2997 return Sext? MatchOperand_ParseFail : Res; 2998 } 2999 3000 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3001 return MatchOperand_ParseFail; 3002 3003 AMDGPUOperand::Modifiers Mods; 3004 Mods.Sext = Sext; 3005 3006 if (Mods.hasIntModifiers()) { 3007 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3008 if (Op.isExpr()) { 3009 Error(Op.getStartLoc(), "expected an absolute expression"); 3010 return MatchOperand_ParseFail; 3011 } 3012 Op.setModifiers(Mods); 3013 } 3014 3015 return MatchOperand_Success; 3016 } 3017 3018 OperandMatchResultTy 3019 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3020 return parseRegOrImmWithFPInputMods(Operands, false); 3021 } 3022 3023 OperandMatchResultTy 3024 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3025 return parseRegOrImmWithIntInputMods(Operands, false); 3026 } 3027 3028 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3029 auto Loc = getLoc(); 3030 if (trySkipId("off")) { 3031 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3032 AMDGPUOperand::ImmTyOff, false)); 3033 return MatchOperand_Success; 3034 } 3035 3036 if (!isRegister()) 3037 return MatchOperand_NoMatch; 3038 3039 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3040 if (Reg) { 3041 Operands.push_back(std::move(Reg)); 3042 return MatchOperand_Success; 3043 } 3044 3045 return MatchOperand_ParseFail; 3046 3047 } 3048 3049 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3050 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3051 3052 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3053 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3054 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3055 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3056 return Match_InvalidOperand; 3057 3058 if ((TSFlags & SIInstrFlags::VOP3) && 3059 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3060 getForcedEncodingSize() != 64) 3061 return Match_PreferE32; 3062 3063 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3064 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3065 // v_mac_f32/16 allow only dst_sel == DWORD; 3066 auto OpNum = 3067 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3068 const auto &Op = Inst.getOperand(OpNum); 3069 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3070 return Match_InvalidOperand; 3071 } 3072 } 3073 3074 return Match_Success; 3075 } 3076 3077 static ArrayRef<unsigned> getAllVariants() { 3078 static const unsigned Variants[] = { 3079 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3080 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3081 }; 3082 3083 return makeArrayRef(Variants); 3084 } 3085 3086 // What asm variants we should check 3087 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3088 if (getForcedEncodingSize() == 32) { 3089 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3090 return makeArrayRef(Variants); 3091 } 3092 3093 if (isForcedVOP3()) { 3094 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3095 return makeArrayRef(Variants); 3096 } 3097 3098 if (isForcedSDWA()) { 3099 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3100 AMDGPUAsmVariants::SDWA9}; 3101 return makeArrayRef(Variants); 3102 } 3103 3104 if (isForcedDPP()) { 3105 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3106 return makeArrayRef(Variants); 3107 } 3108 3109 return getAllVariants(); 3110 } 3111 3112 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3113 if (getForcedEncodingSize() == 32) 3114 return "e32"; 3115 3116 if (isForcedVOP3()) 3117 return "e64"; 3118 3119 if (isForcedSDWA()) 3120 return "sdwa"; 3121 3122 if (isForcedDPP()) 3123 return "dpp"; 3124 3125 return ""; 3126 } 3127 3128 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3129 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3130 const unsigned Num = Desc.getNumImplicitUses(); 3131 for (unsigned i = 0; i < Num; ++i) { 3132 unsigned Reg = Desc.ImplicitUses[i]; 3133 switch (Reg) { 3134 case AMDGPU::FLAT_SCR: 3135 case AMDGPU::VCC: 3136 case AMDGPU::VCC_LO: 3137 case AMDGPU::VCC_HI: 3138 case AMDGPU::M0: 3139 return Reg; 3140 default: 3141 break; 3142 } 3143 } 3144 return AMDGPU::NoRegister; 3145 } 3146 3147 // NB: This code is correct only when used to check constant 3148 // bus limitations because GFX7 support no f16 inline constants. 3149 // Note that there are no cases when a GFX7 opcode violates 3150 // constant bus limitations due to the use of an f16 constant. 3151 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3152 unsigned OpIdx) const { 3153 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3154 3155 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3156 return false; 3157 } 3158 3159 const MCOperand &MO = Inst.getOperand(OpIdx); 3160 3161 int64_t Val = MO.getImm(); 3162 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3163 3164 switch (OpSize) { // expected operand size 3165 case 8: 3166 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3167 case 4: 3168 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3169 case 2: { 3170 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3171 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3172 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3173 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3174 return AMDGPU::isInlinableIntLiteral(Val); 3175 3176 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3178 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3179 return AMDGPU::isInlinableIntLiteralV216(Val); 3180 3181 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3182 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3183 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3184 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3185 3186 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3187 } 3188 default: 3189 llvm_unreachable("invalid operand size"); 3190 } 3191 } 3192 3193 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3194 if (!isGFX10Plus()) 3195 return 1; 3196 3197 switch (Opcode) { 3198 // 64-bit shift instructions can use only one scalar value input 3199 case AMDGPU::V_LSHLREV_B64_e64: 3200 case AMDGPU::V_LSHLREV_B64_gfx10: 3201 case AMDGPU::V_LSHRREV_B64_e64: 3202 case AMDGPU::V_LSHRREV_B64_gfx10: 3203 case AMDGPU::V_ASHRREV_I64_e64: 3204 case AMDGPU::V_ASHRREV_I64_gfx10: 3205 case AMDGPU::V_LSHL_B64_e64: 3206 case AMDGPU::V_LSHR_B64_e64: 3207 case AMDGPU::V_ASHR_I64_e64: 3208 return 1; 3209 default: 3210 return 2; 3211 } 3212 } 3213 3214 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3215 const MCOperand &MO = Inst.getOperand(OpIdx); 3216 if (MO.isImm()) { 3217 return !isInlineConstant(Inst, OpIdx); 3218 } else if (MO.isReg()) { 3219 auto Reg = MO.getReg(); 3220 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3221 auto PReg = mc2PseudoReg(Reg); 3222 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3223 } else { 3224 return true; 3225 } 3226 } 3227 3228 bool 3229 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3230 const OperandVector &Operands) { 3231 const unsigned Opcode = Inst.getOpcode(); 3232 const MCInstrDesc &Desc = MII.get(Opcode); 3233 unsigned LastSGPR = AMDGPU::NoRegister; 3234 unsigned ConstantBusUseCount = 0; 3235 unsigned NumLiterals = 0; 3236 unsigned LiteralSize; 3237 3238 if (Desc.TSFlags & 3239 (SIInstrFlags::VOPC | 3240 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3241 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3242 SIInstrFlags::SDWA)) { 3243 // Check special imm operands (used by madmk, etc) 3244 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3245 ++ConstantBusUseCount; 3246 } 3247 3248 SmallDenseSet<unsigned> SGPRsUsed; 3249 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3250 if (SGPRUsed != AMDGPU::NoRegister) { 3251 SGPRsUsed.insert(SGPRUsed); 3252 ++ConstantBusUseCount; 3253 } 3254 3255 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3256 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3257 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3258 3259 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3260 3261 for (int OpIdx : OpIndices) { 3262 if (OpIdx == -1) break; 3263 3264 const MCOperand &MO = Inst.getOperand(OpIdx); 3265 if (usesConstantBus(Inst, OpIdx)) { 3266 if (MO.isReg()) { 3267 LastSGPR = mc2PseudoReg(MO.getReg()); 3268 // Pairs of registers with a partial intersections like these 3269 // s0, s[0:1] 3270 // flat_scratch_lo, flat_scratch 3271 // flat_scratch_lo, flat_scratch_hi 3272 // are theoretically valid but they are disabled anyway. 3273 // Note that this code mimics SIInstrInfo::verifyInstruction 3274 if (!SGPRsUsed.count(LastSGPR)) { 3275 SGPRsUsed.insert(LastSGPR); 3276 ++ConstantBusUseCount; 3277 } 3278 } else { // Expression or a literal 3279 3280 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3281 continue; // special operand like VINTERP attr_chan 3282 3283 // An instruction may use only one literal. 3284 // This has been validated on the previous step. 3285 // See validateVOP3Literal. 3286 // This literal may be used as more than one operand. 3287 // If all these operands are of the same size, 3288 // this literal counts as one scalar value. 3289 // Otherwise it counts as 2 scalar values. 3290 // See "GFX10 Shader Programming", section 3.6.2.3. 3291 3292 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3293 if (Size < 4) Size = 4; 3294 3295 if (NumLiterals == 0) { 3296 NumLiterals = 1; 3297 LiteralSize = Size; 3298 } else if (LiteralSize != Size) { 3299 NumLiterals = 2; 3300 } 3301 } 3302 } 3303 } 3304 } 3305 ConstantBusUseCount += NumLiterals; 3306 3307 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3308 return true; 3309 3310 SMLoc LitLoc = getLitLoc(Operands); 3311 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3312 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3313 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3314 return false; 3315 } 3316 3317 bool 3318 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3319 const OperandVector &Operands) { 3320 const unsigned Opcode = Inst.getOpcode(); 3321 const MCInstrDesc &Desc = MII.get(Opcode); 3322 3323 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3324 if (DstIdx == -1 || 3325 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3326 return true; 3327 } 3328 3329 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3330 3331 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3332 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3333 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3334 3335 assert(DstIdx != -1); 3336 const MCOperand &Dst = Inst.getOperand(DstIdx); 3337 assert(Dst.isReg()); 3338 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3339 3340 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3341 3342 for (int SrcIdx : SrcIndices) { 3343 if (SrcIdx == -1) break; 3344 const MCOperand &Src = Inst.getOperand(SrcIdx); 3345 if (Src.isReg()) { 3346 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3347 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3348 Error(getRegLoc(SrcReg, Operands), 3349 "destination must be different than all sources"); 3350 return false; 3351 } 3352 } 3353 } 3354 3355 return true; 3356 } 3357 3358 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3359 3360 const unsigned Opc = Inst.getOpcode(); 3361 const MCInstrDesc &Desc = MII.get(Opc); 3362 3363 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3364 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3365 assert(ClampIdx != -1); 3366 return Inst.getOperand(ClampIdx).getImm() == 0; 3367 } 3368 3369 return true; 3370 } 3371 3372 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3373 3374 const unsigned Opc = Inst.getOpcode(); 3375 const MCInstrDesc &Desc = MII.get(Opc); 3376 3377 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3378 return true; 3379 3380 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3381 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3382 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3383 3384 assert(VDataIdx != -1); 3385 3386 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3387 return true; 3388 3389 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3390 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3391 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3392 if (DMask == 0) 3393 DMask = 1; 3394 3395 unsigned DataSize = 3396 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3397 if (hasPackedD16()) { 3398 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3399 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3400 DataSize = (DataSize + 1) / 2; 3401 } 3402 3403 return (VDataSize / 4) == DataSize + TFESize; 3404 } 3405 3406 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3407 const unsigned Opc = Inst.getOpcode(); 3408 const MCInstrDesc &Desc = MII.get(Opc); 3409 3410 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3411 return true; 3412 3413 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3414 3415 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3416 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3417 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3418 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3419 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3420 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3421 3422 assert(VAddr0Idx != -1); 3423 assert(SrsrcIdx != -1); 3424 assert(SrsrcIdx > VAddr0Idx); 3425 3426 if (DimIdx == -1) 3427 return true; // intersect_ray 3428 3429 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3430 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3431 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3432 unsigned VAddrSize = 3433 IsNSA ? SrsrcIdx - VAddr0Idx 3434 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3435 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3436 3437 unsigned AddrSize = 3438 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3439 3440 if (!IsNSA) { 3441 if (AddrSize > 8) 3442 AddrSize = 16; 3443 else if (AddrSize > 4) 3444 AddrSize = 8; 3445 } 3446 3447 return VAddrSize == AddrSize; 3448 } 3449 3450 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3451 3452 const unsigned Opc = Inst.getOpcode(); 3453 const MCInstrDesc &Desc = MII.get(Opc); 3454 3455 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3456 return true; 3457 if (!Desc.mayLoad() || !Desc.mayStore()) 3458 return true; // Not atomic 3459 3460 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3461 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3462 3463 // This is an incomplete check because image_atomic_cmpswap 3464 // may only use 0x3 and 0xf while other atomic operations 3465 // may use 0x1 and 0x3. However these limitations are 3466 // verified when we check that dmask matches dst size. 3467 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3468 } 3469 3470 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3471 3472 const unsigned Opc = Inst.getOpcode(); 3473 const MCInstrDesc &Desc = MII.get(Opc); 3474 3475 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3476 return true; 3477 3478 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3479 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3480 3481 // GATHER4 instructions use dmask in a different fashion compared to 3482 // other MIMG instructions. The only useful DMASK values are 3483 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3484 // (red,red,red,red) etc.) The ISA document doesn't mention 3485 // this. 3486 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3487 } 3488 3489 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3490 const unsigned Opc = Inst.getOpcode(); 3491 const MCInstrDesc &Desc = MII.get(Opc); 3492 3493 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3494 return true; 3495 3496 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3497 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3498 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3499 3500 if (!BaseOpcode->MSAA) 3501 return true; 3502 3503 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3504 assert(DimIdx != -1); 3505 3506 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3507 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3508 3509 return DimInfo->MSAA; 3510 } 3511 3512 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3513 { 3514 switch (Opcode) { 3515 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3516 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3517 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3518 return true; 3519 default: 3520 return false; 3521 } 3522 } 3523 3524 // movrels* opcodes should only allow VGPRS as src0. 3525 // This is specified in .td description for vop1/vop3, 3526 // but sdwa is handled differently. See isSDWAOperand. 3527 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3528 const OperandVector &Operands) { 3529 3530 const unsigned Opc = Inst.getOpcode(); 3531 const MCInstrDesc &Desc = MII.get(Opc); 3532 3533 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3534 return true; 3535 3536 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3537 assert(Src0Idx != -1); 3538 3539 SMLoc ErrLoc; 3540 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3541 if (Src0.isReg()) { 3542 auto Reg = mc2PseudoReg(Src0.getReg()); 3543 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3544 if (!isSGPR(Reg, TRI)) 3545 return true; 3546 ErrLoc = getRegLoc(Reg, Operands); 3547 } else { 3548 ErrLoc = getConstLoc(Operands); 3549 } 3550 3551 Error(ErrLoc, "source operand must be a VGPR"); 3552 return false; 3553 } 3554 3555 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3556 const OperandVector &Operands) { 3557 3558 const unsigned Opc = Inst.getOpcode(); 3559 3560 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3561 return true; 3562 3563 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3564 assert(Src0Idx != -1); 3565 3566 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3567 if (!Src0.isReg()) 3568 return true; 3569 3570 auto Reg = mc2PseudoReg(Src0.getReg()); 3571 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3572 if (isSGPR(Reg, TRI)) { 3573 Error(getRegLoc(Reg, Operands), 3574 "source operand must be either a VGPR or an inline constant"); 3575 return false; 3576 } 3577 3578 return true; 3579 } 3580 3581 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3582 switch (Inst.getOpcode()) { 3583 default: 3584 return true; 3585 case V_DIV_SCALE_F32_gfx6_gfx7: 3586 case V_DIV_SCALE_F32_vi: 3587 case V_DIV_SCALE_F32_gfx10: 3588 case V_DIV_SCALE_F64_gfx6_gfx7: 3589 case V_DIV_SCALE_F64_vi: 3590 case V_DIV_SCALE_F64_gfx10: 3591 break; 3592 } 3593 3594 // TODO: Check that src0 = src1 or src2. 3595 3596 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3597 AMDGPU::OpName::src2_modifiers, 3598 AMDGPU::OpName::src2_modifiers}) { 3599 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3600 .getImm() & 3601 SISrcMods::ABS) { 3602 return false; 3603 } 3604 } 3605 3606 return true; 3607 } 3608 3609 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3610 3611 const unsigned Opc = Inst.getOpcode(); 3612 const MCInstrDesc &Desc = MII.get(Opc); 3613 3614 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3615 return true; 3616 3617 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3618 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3619 if (isCI() || isSI()) 3620 return false; 3621 } 3622 3623 return true; 3624 } 3625 3626 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3627 const unsigned Opc = Inst.getOpcode(); 3628 const MCInstrDesc &Desc = MII.get(Opc); 3629 3630 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3631 return true; 3632 3633 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3634 if (DimIdx < 0) 3635 return true; 3636 3637 long Imm = Inst.getOperand(DimIdx).getImm(); 3638 if (Imm < 0 || Imm >= 8) 3639 return false; 3640 3641 return true; 3642 } 3643 3644 static bool IsRevOpcode(const unsigned Opcode) 3645 { 3646 switch (Opcode) { 3647 case AMDGPU::V_SUBREV_F32_e32: 3648 case AMDGPU::V_SUBREV_F32_e64: 3649 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3650 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3651 case AMDGPU::V_SUBREV_F32_e32_vi: 3652 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3653 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3654 case AMDGPU::V_SUBREV_F32_e64_vi: 3655 3656 case AMDGPU::V_SUBREV_CO_U32_e32: 3657 case AMDGPU::V_SUBREV_CO_U32_e64: 3658 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3659 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3660 3661 case AMDGPU::V_SUBBREV_U32_e32: 3662 case AMDGPU::V_SUBBREV_U32_e64: 3663 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3664 case AMDGPU::V_SUBBREV_U32_e32_vi: 3665 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3666 case AMDGPU::V_SUBBREV_U32_e64_vi: 3667 3668 case AMDGPU::V_SUBREV_U32_e32: 3669 case AMDGPU::V_SUBREV_U32_e64: 3670 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3671 case AMDGPU::V_SUBREV_U32_e32_vi: 3672 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3673 case AMDGPU::V_SUBREV_U32_e64_vi: 3674 3675 case AMDGPU::V_SUBREV_F16_e32: 3676 case AMDGPU::V_SUBREV_F16_e64: 3677 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3678 case AMDGPU::V_SUBREV_F16_e32_vi: 3679 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3680 case AMDGPU::V_SUBREV_F16_e64_vi: 3681 3682 case AMDGPU::V_SUBREV_U16_e32: 3683 case AMDGPU::V_SUBREV_U16_e64: 3684 case AMDGPU::V_SUBREV_U16_e32_vi: 3685 case AMDGPU::V_SUBREV_U16_e64_vi: 3686 3687 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3688 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3689 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3690 3691 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3692 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3693 3694 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3695 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3696 3697 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3698 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3699 3700 case AMDGPU::V_LSHRREV_B32_e32: 3701 case AMDGPU::V_LSHRREV_B32_e64: 3702 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3703 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3704 case AMDGPU::V_LSHRREV_B32_e32_vi: 3705 case AMDGPU::V_LSHRREV_B32_e64_vi: 3706 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3707 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3708 3709 case AMDGPU::V_ASHRREV_I32_e32: 3710 case AMDGPU::V_ASHRREV_I32_e64: 3711 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3712 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3713 case AMDGPU::V_ASHRREV_I32_e32_vi: 3714 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3715 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3716 case AMDGPU::V_ASHRREV_I32_e64_vi: 3717 3718 case AMDGPU::V_LSHLREV_B32_e32: 3719 case AMDGPU::V_LSHLREV_B32_e64: 3720 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3721 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3722 case AMDGPU::V_LSHLREV_B32_e32_vi: 3723 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3724 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3725 case AMDGPU::V_LSHLREV_B32_e64_vi: 3726 3727 case AMDGPU::V_LSHLREV_B16_e32: 3728 case AMDGPU::V_LSHLREV_B16_e64: 3729 case AMDGPU::V_LSHLREV_B16_e32_vi: 3730 case AMDGPU::V_LSHLREV_B16_e64_vi: 3731 case AMDGPU::V_LSHLREV_B16_gfx10: 3732 3733 case AMDGPU::V_LSHRREV_B16_e32: 3734 case AMDGPU::V_LSHRREV_B16_e64: 3735 case AMDGPU::V_LSHRREV_B16_e32_vi: 3736 case AMDGPU::V_LSHRREV_B16_e64_vi: 3737 case AMDGPU::V_LSHRREV_B16_gfx10: 3738 3739 case AMDGPU::V_ASHRREV_I16_e32: 3740 case AMDGPU::V_ASHRREV_I16_e64: 3741 case AMDGPU::V_ASHRREV_I16_e32_vi: 3742 case AMDGPU::V_ASHRREV_I16_e64_vi: 3743 case AMDGPU::V_ASHRREV_I16_gfx10: 3744 3745 case AMDGPU::V_LSHLREV_B64_e64: 3746 case AMDGPU::V_LSHLREV_B64_gfx10: 3747 case AMDGPU::V_LSHLREV_B64_vi: 3748 3749 case AMDGPU::V_LSHRREV_B64_e64: 3750 case AMDGPU::V_LSHRREV_B64_gfx10: 3751 case AMDGPU::V_LSHRREV_B64_vi: 3752 3753 case AMDGPU::V_ASHRREV_I64_e64: 3754 case AMDGPU::V_ASHRREV_I64_gfx10: 3755 case AMDGPU::V_ASHRREV_I64_vi: 3756 3757 case AMDGPU::V_PK_LSHLREV_B16: 3758 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3759 case AMDGPU::V_PK_LSHLREV_B16_vi: 3760 3761 case AMDGPU::V_PK_LSHRREV_B16: 3762 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3763 case AMDGPU::V_PK_LSHRREV_B16_vi: 3764 case AMDGPU::V_PK_ASHRREV_I16: 3765 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3766 case AMDGPU::V_PK_ASHRREV_I16_vi: 3767 return true; 3768 default: 3769 return false; 3770 } 3771 } 3772 3773 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3774 3775 using namespace SIInstrFlags; 3776 const unsigned Opcode = Inst.getOpcode(); 3777 const MCInstrDesc &Desc = MII.get(Opcode); 3778 3779 // lds_direct register is defined so that it can be used 3780 // with 9-bit operands only. Ignore encodings which do not accept these. 3781 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3782 if ((Desc.TSFlags & Enc) == 0) 3783 return None; 3784 3785 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3786 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3787 if (SrcIdx == -1) 3788 break; 3789 const auto &Src = Inst.getOperand(SrcIdx); 3790 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3791 3792 if (isGFX90A()) 3793 return StringRef("lds_direct is not supported on this GPU"); 3794 3795 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3796 return StringRef("lds_direct cannot be used with this instruction"); 3797 3798 if (SrcName != OpName::src0) 3799 return StringRef("lds_direct may be used as src0 only"); 3800 } 3801 } 3802 3803 return None; 3804 } 3805 3806 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3807 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3808 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3809 if (Op.isFlatOffset()) 3810 return Op.getStartLoc(); 3811 } 3812 return getLoc(); 3813 } 3814 3815 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3816 const OperandVector &Operands) { 3817 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3818 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3819 return true; 3820 3821 auto Opcode = Inst.getOpcode(); 3822 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3823 assert(OpNum != -1); 3824 3825 const auto &Op = Inst.getOperand(OpNum); 3826 if (!hasFlatOffsets() && Op.getImm() != 0) { 3827 Error(getFlatOffsetLoc(Operands), 3828 "flat offset modifier is not supported on this GPU"); 3829 return false; 3830 } 3831 3832 // For FLAT segment the offset must be positive; 3833 // MSB is ignored and forced to zero. 3834 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3835 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3836 if (!isIntN(OffsetSize, Op.getImm())) { 3837 Error(getFlatOffsetLoc(Operands), 3838 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3839 return false; 3840 } 3841 } else { 3842 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3843 if (!isUIntN(OffsetSize, Op.getImm())) { 3844 Error(getFlatOffsetLoc(Operands), 3845 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3846 return false; 3847 } 3848 } 3849 3850 return true; 3851 } 3852 3853 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3854 // Start with second operand because SMEM Offset cannot be dst or src0. 3855 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3856 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3857 if (Op.isSMEMOffset()) 3858 return Op.getStartLoc(); 3859 } 3860 return getLoc(); 3861 } 3862 3863 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3864 const OperandVector &Operands) { 3865 if (isCI() || isSI()) 3866 return true; 3867 3868 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3869 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3870 return true; 3871 3872 auto Opcode = Inst.getOpcode(); 3873 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3874 if (OpNum == -1) 3875 return true; 3876 3877 const auto &Op = Inst.getOperand(OpNum); 3878 if (!Op.isImm()) 3879 return true; 3880 3881 uint64_t Offset = Op.getImm(); 3882 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3883 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3884 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3885 return true; 3886 3887 Error(getSMEMOffsetLoc(Operands), 3888 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3889 "expected a 21-bit signed offset"); 3890 3891 return false; 3892 } 3893 3894 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3895 unsigned Opcode = Inst.getOpcode(); 3896 const MCInstrDesc &Desc = MII.get(Opcode); 3897 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3898 return true; 3899 3900 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3901 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3902 3903 const int OpIndices[] = { Src0Idx, Src1Idx }; 3904 3905 unsigned NumExprs = 0; 3906 unsigned NumLiterals = 0; 3907 uint32_t LiteralValue; 3908 3909 for (int OpIdx : OpIndices) { 3910 if (OpIdx == -1) break; 3911 3912 const MCOperand &MO = Inst.getOperand(OpIdx); 3913 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3914 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3915 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3916 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3917 if (NumLiterals == 0 || LiteralValue != Value) { 3918 LiteralValue = Value; 3919 ++NumLiterals; 3920 } 3921 } else if (MO.isExpr()) { 3922 ++NumExprs; 3923 } 3924 } 3925 } 3926 3927 return NumLiterals + NumExprs <= 1; 3928 } 3929 3930 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3931 const unsigned Opc = Inst.getOpcode(); 3932 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3933 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3934 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3935 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3936 3937 if (OpSel & ~3) 3938 return false; 3939 } 3940 return true; 3941 } 3942 3943 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3944 const OperandVector &Operands) { 3945 const unsigned Opc = Inst.getOpcode(); 3946 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3947 if (DppCtrlIdx < 0) 3948 return true; 3949 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3950 3951 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3952 // DPP64 is supported for row_newbcast only. 3953 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3954 if (Src0Idx >= 0 && 3955 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3956 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3957 Error(S, "64 bit dpp only supports row_newbcast"); 3958 return false; 3959 } 3960 } 3961 3962 return true; 3963 } 3964 3965 // Check if VCC register matches wavefront size 3966 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3967 auto FB = getFeatureBits(); 3968 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3969 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3970 } 3971 3972 // VOP3 literal is only allowed in GFX10+ and only one can be used 3973 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3974 const OperandVector &Operands) { 3975 unsigned Opcode = Inst.getOpcode(); 3976 const MCInstrDesc &Desc = MII.get(Opcode); 3977 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3978 return true; 3979 3980 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3981 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3982 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3983 3984 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3985 3986 unsigned NumExprs = 0; 3987 unsigned NumLiterals = 0; 3988 uint32_t LiteralValue; 3989 3990 for (int OpIdx : OpIndices) { 3991 if (OpIdx == -1) break; 3992 3993 const MCOperand &MO = Inst.getOperand(OpIdx); 3994 if (!MO.isImm() && !MO.isExpr()) 3995 continue; 3996 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3997 continue; 3998 3999 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4000 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4001 Error(getConstLoc(Operands), 4002 "inline constants are not allowed for this operand"); 4003 return false; 4004 } 4005 4006 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4007 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4008 if (NumLiterals == 0 || LiteralValue != Value) { 4009 LiteralValue = Value; 4010 ++NumLiterals; 4011 } 4012 } else if (MO.isExpr()) { 4013 ++NumExprs; 4014 } 4015 } 4016 NumLiterals += NumExprs; 4017 4018 if (!NumLiterals) 4019 return true; 4020 4021 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4022 Error(getLitLoc(Operands), "literal operands are not supported"); 4023 return false; 4024 } 4025 4026 if (NumLiterals > 1) { 4027 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4028 return false; 4029 } 4030 4031 return true; 4032 } 4033 4034 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4035 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4036 const MCRegisterInfo *MRI) { 4037 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4038 if (OpIdx < 0) 4039 return -1; 4040 4041 const MCOperand &Op = Inst.getOperand(OpIdx); 4042 if (!Op.isReg()) 4043 return -1; 4044 4045 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4046 auto Reg = Sub ? Sub : Op.getReg(); 4047 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4048 return AGRP32.contains(Reg) ? 1 : 0; 4049 } 4050 4051 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4052 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4053 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4054 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4055 SIInstrFlags::DS)) == 0) 4056 return true; 4057 4058 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4059 : AMDGPU::OpName::vdata; 4060 4061 const MCRegisterInfo *MRI = getMRI(); 4062 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4063 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4064 4065 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4066 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4067 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4068 return false; 4069 } 4070 4071 auto FB = getFeatureBits(); 4072 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4073 if (DataAreg < 0 || DstAreg < 0) 4074 return true; 4075 return DstAreg == DataAreg; 4076 } 4077 4078 return DstAreg < 1 && DataAreg < 1; 4079 } 4080 4081 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4082 auto FB = getFeatureBits(); 4083 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4084 return true; 4085 4086 const MCRegisterInfo *MRI = getMRI(); 4087 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4088 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4089 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4090 const MCOperand &Op = Inst.getOperand(I); 4091 if (!Op.isReg()) 4092 continue; 4093 4094 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4095 if (!Sub) 4096 continue; 4097 4098 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4099 return false; 4100 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4101 return false; 4102 } 4103 4104 return true; 4105 } 4106 4107 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4108 const OperandVector &Operands, 4109 const SMLoc &IDLoc) { 4110 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4111 AMDGPU::OpName::cpol); 4112 if (CPolPos == -1) 4113 return true; 4114 4115 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4116 4117 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4118 if ((TSFlags & (SIInstrFlags::SMRD)) && 4119 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4120 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4121 return false; 4122 } 4123 4124 if (isGFX90A() && (CPol & CPol::SCC)) { 4125 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4126 StringRef CStr(S.getPointer()); 4127 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4128 Error(S, "scc is not supported on this GPU"); 4129 return false; 4130 } 4131 4132 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4133 return true; 4134 4135 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4136 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4137 Error(IDLoc, "instruction must use glc"); 4138 return false; 4139 } 4140 } else { 4141 if (CPol & CPol::GLC) { 4142 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4143 StringRef CStr(S.getPointer()); 4144 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4145 Error(S, "instruction must not use glc"); 4146 return false; 4147 } 4148 } 4149 4150 return true; 4151 } 4152 4153 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4154 const SMLoc &IDLoc, 4155 const OperandVector &Operands) { 4156 if (auto ErrMsg = validateLdsDirect(Inst)) { 4157 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4158 return false; 4159 } 4160 if (!validateSOPLiteral(Inst)) { 4161 Error(getLitLoc(Operands), 4162 "only one literal operand is allowed"); 4163 return false; 4164 } 4165 if (!validateVOP3Literal(Inst, Operands)) { 4166 return false; 4167 } 4168 if (!validateConstantBusLimitations(Inst, Operands)) { 4169 return false; 4170 } 4171 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4172 return false; 4173 } 4174 if (!validateIntClampSupported(Inst)) { 4175 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4176 "integer clamping is not supported on this GPU"); 4177 return false; 4178 } 4179 if (!validateOpSel(Inst)) { 4180 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4181 "invalid op_sel operand"); 4182 return false; 4183 } 4184 if (!validateDPP(Inst, Operands)) { 4185 return false; 4186 } 4187 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4188 if (!validateMIMGD16(Inst)) { 4189 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4190 "d16 modifier is not supported on this GPU"); 4191 return false; 4192 } 4193 if (!validateMIMGDim(Inst)) { 4194 Error(IDLoc, "dim modifier is required on this GPU"); 4195 return false; 4196 } 4197 if (!validateMIMGMSAA(Inst)) { 4198 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4199 "invalid dim; must be MSAA type"); 4200 return false; 4201 } 4202 if (!validateMIMGDataSize(Inst)) { 4203 Error(IDLoc, 4204 "image data size does not match dmask and tfe"); 4205 return false; 4206 } 4207 if (!validateMIMGAddrSize(Inst)) { 4208 Error(IDLoc, 4209 "image address size does not match dim and a16"); 4210 return false; 4211 } 4212 if (!validateMIMGAtomicDMask(Inst)) { 4213 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4214 "invalid atomic image dmask"); 4215 return false; 4216 } 4217 if (!validateMIMGGatherDMask(Inst)) { 4218 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4219 "invalid image_gather dmask: only one bit must be set"); 4220 return false; 4221 } 4222 if (!validateMovrels(Inst, Operands)) { 4223 return false; 4224 } 4225 if (!validateFlatOffset(Inst, Operands)) { 4226 return false; 4227 } 4228 if (!validateSMEMOffset(Inst, Operands)) { 4229 return false; 4230 } 4231 if (!validateMAIAccWrite(Inst, Operands)) { 4232 return false; 4233 } 4234 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4235 return false; 4236 } 4237 4238 if (!validateAGPRLdSt(Inst)) { 4239 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4240 ? "invalid register class: data and dst should be all VGPR or AGPR" 4241 : "invalid register class: agpr loads and stores not supported on this GPU" 4242 ); 4243 return false; 4244 } 4245 if (!validateVGPRAlign(Inst)) { 4246 Error(IDLoc, 4247 "invalid register class: vgpr tuples must be 64 bit aligned"); 4248 return false; 4249 } 4250 4251 if (!validateDivScale(Inst)) { 4252 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4253 return false; 4254 } 4255 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4256 return false; 4257 } 4258 4259 return true; 4260 } 4261 4262 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4263 const FeatureBitset &FBS, 4264 unsigned VariantID = 0); 4265 4266 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4267 const FeatureBitset &AvailableFeatures, 4268 unsigned VariantID); 4269 4270 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4271 const FeatureBitset &FBS) { 4272 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4273 } 4274 4275 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4276 const FeatureBitset &FBS, 4277 ArrayRef<unsigned> Variants) { 4278 for (auto Variant : Variants) { 4279 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4280 return true; 4281 } 4282 4283 return false; 4284 } 4285 4286 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4287 const SMLoc &IDLoc) { 4288 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4289 4290 // Check if requested instruction variant is supported. 4291 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4292 return false; 4293 4294 // This instruction is not supported. 4295 // Clear any other pending errors because they are no longer relevant. 4296 getParser().clearPendingErrors(); 4297 4298 // Requested instruction variant is not supported. 4299 // Check if any other variants are supported. 4300 StringRef VariantName = getMatchedVariantName(); 4301 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4302 return Error(IDLoc, 4303 Twine(VariantName, 4304 " variant of this instruction is not supported")); 4305 } 4306 4307 // Finally check if this instruction is supported on any other GPU. 4308 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4309 return Error(IDLoc, "instruction not supported on this GPU"); 4310 } 4311 4312 // Instruction not supported on any GPU. Probably a typo. 4313 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4314 return Error(IDLoc, "invalid instruction" + Suggestion); 4315 } 4316 4317 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4318 OperandVector &Operands, 4319 MCStreamer &Out, 4320 uint64_t &ErrorInfo, 4321 bool MatchingInlineAsm) { 4322 MCInst Inst; 4323 unsigned Result = Match_Success; 4324 for (auto Variant : getMatchedVariants()) { 4325 uint64_t EI; 4326 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4327 Variant); 4328 // We order match statuses from least to most specific. We use most specific 4329 // status as resulting 4330 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4331 if ((R == Match_Success) || 4332 (R == Match_PreferE32) || 4333 (R == Match_MissingFeature && Result != Match_PreferE32) || 4334 (R == Match_InvalidOperand && Result != Match_MissingFeature 4335 && Result != Match_PreferE32) || 4336 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4337 && Result != Match_MissingFeature 4338 && Result != Match_PreferE32)) { 4339 Result = R; 4340 ErrorInfo = EI; 4341 } 4342 if (R == Match_Success) 4343 break; 4344 } 4345 4346 if (Result == Match_Success) { 4347 if (!validateInstruction(Inst, IDLoc, Operands)) { 4348 return true; 4349 } 4350 Inst.setLoc(IDLoc); 4351 Out.emitInstruction(Inst, getSTI()); 4352 return false; 4353 } 4354 4355 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4356 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4357 return true; 4358 } 4359 4360 switch (Result) { 4361 default: break; 4362 case Match_MissingFeature: 4363 // It has been verified that the specified instruction 4364 // mnemonic is valid. A match was found but it requires 4365 // features which are not supported on this GPU. 4366 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4367 4368 case Match_InvalidOperand: { 4369 SMLoc ErrorLoc = IDLoc; 4370 if (ErrorInfo != ~0ULL) { 4371 if (ErrorInfo >= Operands.size()) { 4372 return Error(IDLoc, "too few operands for instruction"); 4373 } 4374 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4375 if (ErrorLoc == SMLoc()) 4376 ErrorLoc = IDLoc; 4377 } 4378 return Error(ErrorLoc, "invalid operand for instruction"); 4379 } 4380 4381 case Match_PreferE32: 4382 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4383 "should be encoded as e32"); 4384 case Match_MnemonicFail: 4385 llvm_unreachable("Invalid instructions should have been handled already"); 4386 } 4387 llvm_unreachable("Implement any new match types added!"); 4388 } 4389 4390 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4391 int64_t Tmp = -1; 4392 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4393 return true; 4394 } 4395 if (getParser().parseAbsoluteExpression(Tmp)) { 4396 return true; 4397 } 4398 Ret = static_cast<uint32_t>(Tmp); 4399 return false; 4400 } 4401 4402 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4403 uint32_t &Minor) { 4404 if (ParseAsAbsoluteExpression(Major)) 4405 return TokError("invalid major version"); 4406 4407 if (!trySkipToken(AsmToken::Comma)) 4408 return TokError("minor version number required, comma expected"); 4409 4410 if (ParseAsAbsoluteExpression(Minor)) 4411 return TokError("invalid minor version"); 4412 4413 return false; 4414 } 4415 4416 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4417 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4418 return TokError("directive only supported for amdgcn architecture"); 4419 4420 std::string TargetIDDirective; 4421 SMLoc TargetStart = getTok().getLoc(); 4422 if (getParser().parseEscapedString(TargetIDDirective)) 4423 return true; 4424 4425 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4426 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4427 return getParser().Error(TargetRange.Start, 4428 (Twine(".amdgcn_target directive's target id ") + 4429 Twine(TargetIDDirective) + 4430 Twine(" does not match the specified target id ") + 4431 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4432 4433 return false; 4434 } 4435 4436 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4437 return Error(Range.Start, "value out of range", Range); 4438 } 4439 4440 bool AMDGPUAsmParser::calculateGPRBlocks( 4441 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4442 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4443 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4444 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4445 // TODO(scott.linder): These calculations are duplicated from 4446 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4447 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4448 4449 unsigned NumVGPRs = NextFreeVGPR; 4450 unsigned NumSGPRs = NextFreeSGPR; 4451 4452 if (Version.Major >= 10) 4453 NumSGPRs = 0; 4454 else { 4455 unsigned MaxAddressableNumSGPRs = 4456 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4457 4458 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4459 NumSGPRs > MaxAddressableNumSGPRs) 4460 return OutOfRangeError(SGPRRange); 4461 4462 NumSGPRs += 4463 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4464 4465 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4466 NumSGPRs > MaxAddressableNumSGPRs) 4467 return OutOfRangeError(SGPRRange); 4468 4469 if (Features.test(FeatureSGPRInitBug)) 4470 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4471 } 4472 4473 VGPRBlocks = 4474 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4475 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4476 4477 return false; 4478 } 4479 4480 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4481 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4482 return TokError("directive only supported for amdgcn architecture"); 4483 4484 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4485 return TokError("directive only supported for amdhsa OS"); 4486 4487 StringRef KernelName; 4488 if (getParser().parseIdentifier(KernelName)) 4489 return true; 4490 4491 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4492 4493 StringSet<> Seen; 4494 4495 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4496 4497 SMRange VGPRRange; 4498 uint64_t NextFreeVGPR = 0; 4499 uint64_t AccumOffset = 0; 4500 SMRange SGPRRange; 4501 uint64_t NextFreeSGPR = 0; 4502 unsigned UserSGPRCount = 0; 4503 bool ReserveVCC = true; 4504 bool ReserveFlatScr = true; 4505 Optional<bool> EnableWavefrontSize32; 4506 4507 while (true) { 4508 while (trySkipToken(AsmToken::EndOfStatement)); 4509 4510 StringRef ID; 4511 SMRange IDRange = getTok().getLocRange(); 4512 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4513 return true; 4514 4515 if (ID == ".end_amdhsa_kernel") 4516 break; 4517 4518 if (Seen.find(ID) != Seen.end()) 4519 return TokError(".amdhsa_ directives cannot be repeated"); 4520 Seen.insert(ID); 4521 4522 SMLoc ValStart = getLoc(); 4523 int64_t IVal; 4524 if (getParser().parseAbsoluteExpression(IVal)) 4525 return true; 4526 SMLoc ValEnd = getLoc(); 4527 SMRange ValRange = SMRange(ValStart, ValEnd); 4528 4529 if (IVal < 0) 4530 return OutOfRangeError(ValRange); 4531 4532 uint64_t Val = IVal; 4533 4534 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4535 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4536 return OutOfRangeError(RANGE); \ 4537 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4538 4539 if (ID == ".amdhsa_group_segment_fixed_size") { 4540 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4541 return OutOfRangeError(ValRange); 4542 KD.group_segment_fixed_size = Val; 4543 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4544 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4545 return OutOfRangeError(ValRange); 4546 KD.private_segment_fixed_size = Val; 4547 } else if (ID == ".amdhsa_kernarg_size") { 4548 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4549 return OutOfRangeError(ValRange); 4550 KD.kernarg_size = Val; 4551 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4552 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4553 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4554 Val, ValRange); 4555 if (Val) 4556 UserSGPRCount += 4; 4557 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4558 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4559 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4560 ValRange); 4561 if (Val) 4562 UserSGPRCount += 2; 4563 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4564 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4565 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4566 ValRange); 4567 if (Val) 4568 UserSGPRCount += 2; 4569 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4570 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4571 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4572 Val, ValRange); 4573 if (Val) 4574 UserSGPRCount += 2; 4575 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4576 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4577 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4578 ValRange); 4579 if (Val) 4580 UserSGPRCount += 2; 4581 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4582 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4583 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4584 ValRange); 4585 if (Val) 4586 UserSGPRCount += 2; 4587 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4588 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4589 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4590 Val, ValRange); 4591 if (Val) 4592 UserSGPRCount += 1; 4593 } else if (ID == ".amdhsa_wavefront_size32") { 4594 if (IVersion.Major < 10) 4595 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4596 EnableWavefrontSize32 = Val; 4597 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4598 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4599 Val, ValRange); 4600 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4601 PARSE_BITS_ENTRY( 4602 KD.compute_pgm_rsrc2, 4603 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4604 ValRange); 4605 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4606 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4607 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4608 ValRange); 4609 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4610 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4611 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4612 ValRange); 4613 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4614 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4615 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4616 ValRange); 4617 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4618 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4619 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4620 ValRange); 4621 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4622 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4623 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4624 ValRange); 4625 } else if (ID == ".amdhsa_next_free_vgpr") { 4626 VGPRRange = ValRange; 4627 NextFreeVGPR = Val; 4628 } else if (ID == ".amdhsa_next_free_sgpr") { 4629 SGPRRange = ValRange; 4630 NextFreeSGPR = Val; 4631 } else if (ID == ".amdhsa_accum_offset") { 4632 if (!isGFX90A()) 4633 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4634 AccumOffset = Val; 4635 } else if (ID == ".amdhsa_reserve_vcc") { 4636 if (!isUInt<1>(Val)) 4637 return OutOfRangeError(ValRange); 4638 ReserveVCC = Val; 4639 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4640 if (IVersion.Major < 7) 4641 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4642 if (!isUInt<1>(Val)) 4643 return OutOfRangeError(ValRange); 4644 ReserveFlatScr = Val; 4645 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4646 if (IVersion.Major < 8) 4647 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4648 if (!isUInt<1>(Val)) 4649 return OutOfRangeError(ValRange); 4650 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4651 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4652 IDRange); 4653 } else if (ID == ".amdhsa_float_round_mode_32") { 4654 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4655 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4656 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4657 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4658 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4659 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4661 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4662 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4663 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4664 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4665 ValRange); 4666 } else if (ID == ".amdhsa_dx10_clamp") { 4667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4668 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4669 } else if (ID == ".amdhsa_ieee_mode") { 4670 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4671 Val, ValRange); 4672 } else if (ID == ".amdhsa_fp16_overflow") { 4673 if (IVersion.Major < 9) 4674 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4675 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4676 ValRange); 4677 } else if (ID == ".amdhsa_tg_split") { 4678 if (!isGFX90A()) 4679 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4680 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4681 ValRange); 4682 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4683 if (IVersion.Major < 10) 4684 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4685 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4686 ValRange); 4687 } else if (ID == ".amdhsa_memory_ordered") { 4688 if (IVersion.Major < 10) 4689 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4691 ValRange); 4692 } else if (ID == ".amdhsa_forward_progress") { 4693 if (IVersion.Major < 10) 4694 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4695 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4696 ValRange); 4697 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4698 PARSE_BITS_ENTRY( 4699 KD.compute_pgm_rsrc2, 4700 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4701 ValRange); 4702 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4703 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4704 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4705 Val, ValRange); 4706 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4707 PARSE_BITS_ENTRY( 4708 KD.compute_pgm_rsrc2, 4709 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4710 ValRange); 4711 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4713 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4714 Val, ValRange); 4715 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4716 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4717 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4718 Val, ValRange); 4719 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4720 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4721 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4722 Val, ValRange); 4723 } else if (ID == ".amdhsa_exception_int_div_zero") { 4724 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4725 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4726 Val, ValRange); 4727 } else { 4728 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4729 } 4730 4731 #undef PARSE_BITS_ENTRY 4732 } 4733 4734 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4735 return TokError(".amdhsa_next_free_vgpr directive is required"); 4736 4737 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4738 return TokError(".amdhsa_next_free_sgpr directive is required"); 4739 4740 unsigned VGPRBlocks; 4741 unsigned SGPRBlocks; 4742 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4743 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4744 EnableWavefrontSize32, NextFreeVGPR, 4745 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4746 SGPRBlocks)) 4747 return true; 4748 4749 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4750 VGPRBlocks)) 4751 return OutOfRangeError(VGPRRange); 4752 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4753 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4754 4755 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4756 SGPRBlocks)) 4757 return OutOfRangeError(SGPRRange); 4758 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4759 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4760 SGPRBlocks); 4761 4762 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4763 return TokError("too many user SGPRs enabled"); 4764 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4765 UserSGPRCount); 4766 4767 if (isGFX90A()) { 4768 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4769 return TokError(".amdhsa_accum_offset directive is required"); 4770 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4771 return TokError("accum_offset should be in range [4..256] in " 4772 "increments of 4"); 4773 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4774 return TokError("accum_offset exceeds total VGPR allocation"); 4775 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4776 (AccumOffset / 4 - 1)); 4777 } 4778 4779 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4780 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4781 ReserveFlatScr); 4782 return false; 4783 } 4784 4785 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4786 uint32_t Major; 4787 uint32_t Minor; 4788 4789 if (ParseDirectiveMajorMinor(Major, Minor)) 4790 return true; 4791 4792 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4793 return false; 4794 } 4795 4796 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4797 uint32_t Major; 4798 uint32_t Minor; 4799 uint32_t Stepping; 4800 StringRef VendorName; 4801 StringRef ArchName; 4802 4803 // If this directive has no arguments, then use the ISA version for the 4804 // targeted GPU. 4805 if (isToken(AsmToken::EndOfStatement)) { 4806 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4807 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4808 ISA.Stepping, 4809 "AMD", "AMDGPU"); 4810 return false; 4811 } 4812 4813 if (ParseDirectiveMajorMinor(Major, Minor)) 4814 return true; 4815 4816 if (!trySkipToken(AsmToken::Comma)) 4817 return TokError("stepping version number required, comma expected"); 4818 4819 if (ParseAsAbsoluteExpression(Stepping)) 4820 return TokError("invalid stepping version"); 4821 4822 if (!trySkipToken(AsmToken::Comma)) 4823 return TokError("vendor name required, comma expected"); 4824 4825 if (!parseString(VendorName, "invalid vendor name")) 4826 return true; 4827 4828 if (!trySkipToken(AsmToken::Comma)) 4829 return TokError("arch name required, comma expected"); 4830 4831 if (!parseString(ArchName, "invalid arch name")) 4832 return true; 4833 4834 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4835 VendorName, ArchName); 4836 return false; 4837 } 4838 4839 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4840 amd_kernel_code_t &Header) { 4841 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4842 // assembly for backwards compatibility. 4843 if (ID == "max_scratch_backing_memory_byte_size") { 4844 Parser.eatToEndOfStatement(); 4845 return false; 4846 } 4847 4848 SmallString<40> ErrStr; 4849 raw_svector_ostream Err(ErrStr); 4850 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4851 return TokError(Err.str()); 4852 } 4853 Lex(); 4854 4855 if (ID == "enable_wavefront_size32") { 4856 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4857 if (!isGFX10Plus()) 4858 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4859 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4860 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4861 } else { 4862 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4863 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4864 } 4865 } 4866 4867 if (ID == "wavefront_size") { 4868 if (Header.wavefront_size == 5) { 4869 if (!isGFX10Plus()) 4870 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4871 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4872 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4873 } else if (Header.wavefront_size == 6) { 4874 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4875 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4876 } 4877 } 4878 4879 if (ID == "enable_wgp_mode") { 4880 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4881 !isGFX10Plus()) 4882 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4883 } 4884 4885 if (ID == "enable_mem_ordered") { 4886 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4887 !isGFX10Plus()) 4888 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4889 } 4890 4891 if (ID == "enable_fwd_progress") { 4892 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4893 !isGFX10Plus()) 4894 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4895 } 4896 4897 return false; 4898 } 4899 4900 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4901 amd_kernel_code_t Header; 4902 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4903 4904 while (true) { 4905 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4906 // will set the current token to EndOfStatement. 4907 while(trySkipToken(AsmToken::EndOfStatement)); 4908 4909 StringRef ID; 4910 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4911 return true; 4912 4913 if (ID == ".end_amd_kernel_code_t") 4914 break; 4915 4916 if (ParseAMDKernelCodeTValue(ID, Header)) 4917 return true; 4918 } 4919 4920 getTargetStreamer().EmitAMDKernelCodeT(Header); 4921 4922 return false; 4923 } 4924 4925 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4926 StringRef KernelName; 4927 if (!parseId(KernelName, "expected symbol name")) 4928 return true; 4929 4930 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4931 ELF::STT_AMDGPU_HSA_KERNEL); 4932 4933 KernelScope.initialize(getContext()); 4934 return false; 4935 } 4936 4937 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4938 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4939 return Error(getLoc(), 4940 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4941 "architectures"); 4942 } 4943 4944 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4945 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4946 return Error(getParser().getTok().getLoc(), "target id must match options"); 4947 4948 getTargetStreamer().EmitISAVersion(); 4949 Lex(); 4950 4951 return false; 4952 } 4953 4954 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4955 const char *AssemblerDirectiveBegin; 4956 const char *AssemblerDirectiveEnd; 4957 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4958 isHsaAbiVersion3Or4(&getSTI()) 4959 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4960 HSAMD::V3::AssemblerDirectiveEnd) 4961 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4962 HSAMD::AssemblerDirectiveEnd); 4963 4964 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4965 return Error(getLoc(), 4966 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4967 "not available on non-amdhsa OSes")).str()); 4968 } 4969 4970 std::string HSAMetadataString; 4971 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4972 HSAMetadataString)) 4973 return true; 4974 4975 if (isHsaAbiVersion3Or4(&getSTI())) { 4976 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4977 return Error(getLoc(), "invalid HSA metadata"); 4978 } else { 4979 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4980 return Error(getLoc(), "invalid HSA metadata"); 4981 } 4982 4983 return false; 4984 } 4985 4986 /// Common code to parse out a block of text (typically YAML) between start and 4987 /// end directives. 4988 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4989 const char *AssemblerDirectiveEnd, 4990 std::string &CollectString) { 4991 4992 raw_string_ostream CollectStream(CollectString); 4993 4994 getLexer().setSkipSpace(false); 4995 4996 bool FoundEnd = false; 4997 while (!isToken(AsmToken::Eof)) { 4998 while (isToken(AsmToken::Space)) { 4999 CollectStream << getTokenStr(); 5000 Lex(); 5001 } 5002 5003 if (trySkipId(AssemblerDirectiveEnd)) { 5004 FoundEnd = true; 5005 break; 5006 } 5007 5008 CollectStream << Parser.parseStringToEndOfStatement() 5009 << getContext().getAsmInfo()->getSeparatorString(); 5010 5011 Parser.eatToEndOfStatement(); 5012 } 5013 5014 getLexer().setSkipSpace(true); 5015 5016 if (isToken(AsmToken::Eof) && !FoundEnd) { 5017 return TokError(Twine("expected directive ") + 5018 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5019 } 5020 5021 CollectStream.flush(); 5022 return false; 5023 } 5024 5025 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5026 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5027 std::string String; 5028 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5029 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5030 return true; 5031 5032 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5033 if (!PALMetadata->setFromString(String)) 5034 return Error(getLoc(), "invalid PAL metadata"); 5035 return false; 5036 } 5037 5038 /// Parse the assembler directive for old linear-format PAL metadata. 5039 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5040 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5041 return Error(getLoc(), 5042 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5043 "not available on non-amdpal OSes")).str()); 5044 } 5045 5046 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5047 PALMetadata->setLegacy(); 5048 for (;;) { 5049 uint32_t Key, Value; 5050 if (ParseAsAbsoluteExpression(Key)) { 5051 return TokError(Twine("invalid value in ") + 5052 Twine(PALMD::AssemblerDirective)); 5053 } 5054 if (!trySkipToken(AsmToken::Comma)) { 5055 return TokError(Twine("expected an even number of values in ") + 5056 Twine(PALMD::AssemblerDirective)); 5057 } 5058 if (ParseAsAbsoluteExpression(Value)) { 5059 return TokError(Twine("invalid value in ") + 5060 Twine(PALMD::AssemblerDirective)); 5061 } 5062 PALMetadata->setRegister(Key, Value); 5063 if (!trySkipToken(AsmToken::Comma)) 5064 break; 5065 } 5066 return false; 5067 } 5068 5069 /// ParseDirectiveAMDGPULDS 5070 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5071 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5072 if (getParser().checkForValidSection()) 5073 return true; 5074 5075 StringRef Name; 5076 SMLoc NameLoc = getLoc(); 5077 if (getParser().parseIdentifier(Name)) 5078 return TokError("expected identifier in directive"); 5079 5080 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5081 if (parseToken(AsmToken::Comma, "expected ','")) 5082 return true; 5083 5084 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5085 5086 int64_t Size; 5087 SMLoc SizeLoc = getLoc(); 5088 if (getParser().parseAbsoluteExpression(Size)) 5089 return true; 5090 if (Size < 0) 5091 return Error(SizeLoc, "size must be non-negative"); 5092 if (Size > LocalMemorySize) 5093 return Error(SizeLoc, "size is too large"); 5094 5095 int64_t Alignment = 4; 5096 if (trySkipToken(AsmToken::Comma)) { 5097 SMLoc AlignLoc = getLoc(); 5098 if (getParser().parseAbsoluteExpression(Alignment)) 5099 return true; 5100 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5101 return Error(AlignLoc, "alignment must be a power of two"); 5102 5103 // Alignment larger than the size of LDS is possible in theory, as long 5104 // as the linker manages to place to symbol at address 0, but we do want 5105 // to make sure the alignment fits nicely into a 32-bit integer. 5106 if (Alignment >= 1u << 31) 5107 return Error(AlignLoc, "alignment is too large"); 5108 } 5109 5110 if (parseToken(AsmToken::EndOfStatement, 5111 "unexpected token in '.amdgpu_lds' directive")) 5112 return true; 5113 5114 Symbol->redefineIfPossible(); 5115 if (!Symbol->isUndefined()) 5116 return Error(NameLoc, "invalid symbol redefinition"); 5117 5118 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5119 return false; 5120 } 5121 5122 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5123 StringRef IDVal = DirectiveID.getString(); 5124 5125 if (isHsaAbiVersion3Or4(&getSTI())) { 5126 if (IDVal == ".amdhsa_kernel") 5127 return ParseDirectiveAMDHSAKernel(); 5128 5129 // TODO: Restructure/combine with PAL metadata directive. 5130 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5131 return ParseDirectiveHSAMetadata(); 5132 } else { 5133 if (IDVal == ".hsa_code_object_version") 5134 return ParseDirectiveHSACodeObjectVersion(); 5135 5136 if (IDVal == ".hsa_code_object_isa") 5137 return ParseDirectiveHSACodeObjectISA(); 5138 5139 if (IDVal == ".amd_kernel_code_t") 5140 return ParseDirectiveAMDKernelCodeT(); 5141 5142 if (IDVal == ".amdgpu_hsa_kernel") 5143 return ParseDirectiveAMDGPUHsaKernel(); 5144 5145 if (IDVal == ".amd_amdgpu_isa") 5146 return ParseDirectiveISAVersion(); 5147 5148 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5149 return ParseDirectiveHSAMetadata(); 5150 } 5151 5152 if (IDVal == ".amdgcn_target") 5153 return ParseDirectiveAMDGCNTarget(); 5154 5155 if (IDVal == ".amdgpu_lds") 5156 return ParseDirectiveAMDGPULDS(); 5157 5158 if (IDVal == PALMD::AssemblerDirectiveBegin) 5159 return ParseDirectivePALMetadataBegin(); 5160 5161 if (IDVal == PALMD::AssemblerDirective) 5162 return ParseDirectivePALMetadata(); 5163 5164 return true; 5165 } 5166 5167 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5168 unsigned RegNo) { 5169 5170 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5171 R.isValid(); ++R) { 5172 if (*R == RegNo) 5173 return isGFX9Plus(); 5174 } 5175 5176 // GFX10 has 2 more SGPRs 104 and 105. 5177 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5178 R.isValid(); ++R) { 5179 if (*R == RegNo) 5180 return hasSGPR104_SGPR105(); 5181 } 5182 5183 switch (RegNo) { 5184 case AMDGPU::SRC_SHARED_BASE: 5185 case AMDGPU::SRC_SHARED_LIMIT: 5186 case AMDGPU::SRC_PRIVATE_BASE: 5187 case AMDGPU::SRC_PRIVATE_LIMIT: 5188 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5189 return isGFX9Plus(); 5190 case AMDGPU::TBA: 5191 case AMDGPU::TBA_LO: 5192 case AMDGPU::TBA_HI: 5193 case AMDGPU::TMA: 5194 case AMDGPU::TMA_LO: 5195 case AMDGPU::TMA_HI: 5196 return !isGFX9Plus(); 5197 case AMDGPU::XNACK_MASK: 5198 case AMDGPU::XNACK_MASK_LO: 5199 case AMDGPU::XNACK_MASK_HI: 5200 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5201 case AMDGPU::SGPR_NULL: 5202 return isGFX10Plus(); 5203 default: 5204 break; 5205 } 5206 5207 if (isCI()) 5208 return true; 5209 5210 if (isSI() || isGFX10Plus()) { 5211 // No flat_scr on SI. 5212 // On GFX10 flat scratch is not a valid register operand and can only be 5213 // accessed with s_setreg/s_getreg. 5214 switch (RegNo) { 5215 case AMDGPU::FLAT_SCR: 5216 case AMDGPU::FLAT_SCR_LO: 5217 case AMDGPU::FLAT_SCR_HI: 5218 return false; 5219 default: 5220 return true; 5221 } 5222 } 5223 5224 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5225 // SI/CI have. 5226 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5227 R.isValid(); ++R) { 5228 if (*R == RegNo) 5229 return hasSGPR102_SGPR103(); 5230 } 5231 5232 return true; 5233 } 5234 5235 OperandMatchResultTy 5236 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5237 OperandMode Mode) { 5238 // Try to parse with a custom parser 5239 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5240 5241 // If we successfully parsed the operand or if there as an error parsing, 5242 // we are done. 5243 // 5244 // If we are parsing after we reach EndOfStatement then this means we 5245 // are appending default values to the Operands list. This is only done 5246 // by custom parser, so we shouldn't continue on to the generic parsing. 5247 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5248 isToken(AsmToken::EndOfStatement)) 5249 return ResTy; 5250 5251 SMLoc RBraceLoc; 5252 SMLoc LBraceLoc = getLoc(); 5253 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5254 unsigned Prefix = Operands.size(); 5255 5256 for (;;) { 5257 auto Loc = getLoc(); 5258 ResTy = parseReg(Operands); 5259 if (ResTy == MatchOperand_NoMatch) 5260 Error(Loc, "expected a register"); 5261 if (ResTy != MatchOperand_Success) 5262 return MatchOperand_ParseFail; 5263 5264 RBraceLoc = getLoc(); 5265 if (trySkipToken(AsmToken::RBrac)) 5266 break; 5267 5268 if (!skipToken(AsmToken::Comma, 5269 "expected a comma or a closing square bracket")) { 5270 return MatchOperand_ParseFail; 5271 } 5272 } 5273 5274 if (Operands.size() - Prefix > 1) { 5275 Operands.insert(Operands.begin() + Prefix, 5276 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5277 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5278 } 5279 5280 return MatchOperand_Success; 5281 } 5282 5283 return parseRegOrImm(Operands); 5284 } 5285 5286 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5287 // Clear any forced encodings from the previous instruction. 5288 setForcedEncodingSize(0); 5289 setForcedDPP(false); 5290 setForcedSDWA(false); 5291 5292 if (Name.endswith("_e64")) { 5293 setForcedEncodingSize(64); 5294 return Name.substr(0, Name.size() - 4); 5295 } else if (Name.endswith("_e32")) { 5296 setForcedEncodingSize(32); 5297 return Name.substr(0, Name.size() - 4); 5298 } else if (Name.endswith("_dpp")) { 5299 setForcedDPP(true); 5300 return Name.substr(0, Name.size() - 4); 5301 } else if (Name.endswith("_sdwa")) { 5302 setForcedSDWA(true); 5303 return Name.substr(0, Name.size() - 5); 5304 } 5305 return Name; 5306 } 5307 5308 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5309 StringRef Name, 5310 SMLoc NameLoc, OperandVector &Operands) { 5311 // Add the instruction mnemonic 5312 Name = parseMnemonicSuffix(Name); 5313 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5314 5315 bool IsMIMG = Name.startswith("image_"); 5316 5317 while (!trySkipToken(AsmToken::EndOfStatement)) { 5318 OperandMode Mode = OperandMode_Default; 5319 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5320 Mode = OperandMode_NSA; 5321 CPolSeen = 0; 5322 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5323 5324 if (Res != MatchOperand_Success) { 5325 checkUnsupportedInstruction(Name, NameLoc); 5326 if (!Parser.hasPendingError()) { 5327 // FIXME: use real operand location rather than the current location. 5328 StringRef Msg = 5329 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5330 "not a valid operand."; 5331 Error(getLoc(), Msg); 5332 } 5333 while (!trySkipToken(AsmToken::EndOfStatement)) { 5334 lex(); 5335 } 5336 return true; 5337 } 5338 5339 // Eat the comma or space if there is one. 5340 trySkipToken(AsmToken::Comma); 5341 } 5342 5343 return false; 5344 } 5345 5346 //===----------------------------------------------------------------------===// 5347 // Utility functions 5348 //===----------------------------------------------------------------------===// 5349 5350 OperandMatchResultTy 5351 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5352 5353 if (!trySkipId(Prefix, AsmToken::Colon)) 5354 return MatchOperand_NoMatch; 5355 5356 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5357 } 5358 5359 OperandMatchResultTy 5360 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5361 AMDGPUOperand::ImmTy ImmTy, 5362 bool (*ConvertResult)(int64_t&)) { 5363 SMLoc S = getLoc(); 5364 int64_t Value = 0; 5365 5366 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5367 if (Res != MatchOperand_Success) 5368 return Res; 5369 5370 if (ConvertResult && !ConvertResult(Value)) { 5371 Error(S, "invalid " + StringRef(Prefix) + " value."); 5372 } 5373 5374 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5375 return MatchOperand_Success; 5376 } 5377 5378 OperandMatchResultTy 5379 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5380 OperandVector &Operands, 5381 AMDGPUOperand::ImmTy ImmTy, 5382 bool (*ConvertResult)(int64_t&)) { 5383 SMLoc S = getLoc(); 5384 if (!trySkipId(Prefix, AsmToken::Colon)) 5385 return MatchOperand_NoMatch; 5386 5387 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5388 return MatchOperand_ParseFail; 5389 5390 unsigned Val = 0; 5391 const unsigned MaxSize = 4; 5392 5393 // FIXME: How to verify the number of elements matches the number of src 5394 // operands? 5395 for (int I = 0; ; ++I) { 5396 int64_t Op; 5397 SMLoc Loc = getLoc(); 5398 if (!parseExpr(Op)) 5399 return MatchOperand_ParseFail; 5400 5401 if (Op != 0 && Op != 1) { 5402 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5403 return MatchOperand_ParseFail; 5404 } 5405 5406 Val |= (Op << I); 5407 5408 if (trySkipToken(AsmToken::RBrac)) 5409 break; 5410 5411 if (I + 1 == MaxSize) { 5412 Error(getLoc(), "expected a closing square bracket"); 5413 return MatchOperand_ParseFail; 5414 } 5415 5416 if (!skipToken(AsmToken::Comma, "expected a comma")) 5417 return MatchOperand_ParseFail; 5418 } 5419 5420 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5421 return MatchOperand_Success; 5422 } 5423 5424 OperandMatchResultTy 5425 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5426 AMDGPUOperand::ImmTy ImmTy) { 5427 int64_t Bit; 5428 SMLoc S = getLoc(); 5429 5430 if (trySkipId(Name)) { 5431 Bit = 1; 5432 } else if (trySkipId("no", Name)) { 5433 Bit = 0; 5434 } else { 5435 return MatchOperand_NoMatch; 5436 } 5437 5438 if (Name == "r128" && !hasMIMG_R128()) { 5439 Error(S, "r128 modifier is not supported on this GPU"); 5440 return MatchOperand_ParseFail; 5441 } 5442 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5443 Error(S, "a16 modifier is not supported on this GPU"); 5444 return MatchOperand_ParseFail; 5445 } 5446 5447 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5448 ImmTy = AMDGPUOperand::ImmTyR128A16; 5449 5450 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5451 return MatchOperand_Success; 5452 } 5453 5454 OperandMatchResultTy 5455 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5456 unsigned CPolOn = 0; 5457 unsigned CPolOff = 0; 5458 SMLoc S = getLoc(); 5459 5460 if (trySkipId("glc")) 5461 CPolOn = AMDGPU::CPol::GLC; 5462 else if (trySkipId("noglc")) 5463 CPolOff = AMDGPU::CPol::GLC; 5464 else if (trySkipId("slc")) 5465 CPolOn = AMDGPU::CPol::SLC; 5466 else if (trySkipId("noslc")) 5467 CPolOff = AMDGPU::CPol::SLC; 5468 else if (trySkipId("dlc")) 5469 CPolOn = AMDGPU::CPol::DLC; 5470 else if (trySkipId("nodlc")) 5471 CPolOff = AMDGPU::CPol::DLC; 5472 else if (trySkipId("scc")) 5473 CPolOn = AMDGPU::CPol::SCC; 5474 else if (trySkipId("noscc")) 5475 CPolOff = AMDGPU::CPol::SCC; 5476 else 5477 return MatchOperand_NoMatch; 5478 5479 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5480 Error(S, "dlc modifier is not supported on this GPU"); 5481 return MatchOperand_ParseFail; 5482 } 5483 5484 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5485 Error(S, "scc modifier is not supported on this GPU"); 5486 return MatchOperand_ParseFail; 5487 } 5488 5489 if (CPolSeen & (CPolOn | CPolOff)) { 5490 Error(S, "duplicate cache policy modifier"); 5491 return MatchOperand_ParseFail; 5492 } 5493 5494 CPolSeen |= (CPolOn | CPolOff); 5495 5496 for (unsigned I = 1; I != Operands.size(); ++I) { 5497 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5498 if (Op.isCPol()) { 5499 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5500 return MatchOperand_Success; 5501 } 5502 } 5503 5504 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5505 AMDGPUOperand::ImmTyCPol)); 5506 5507 return MatchOperand_Success; 5508 } 5509 5510 static void addOptionalImmOperand( 5511 MCInst& Inst, const OperandVector& Operands, 5512 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5513 AMDGPUOperand::ImmTy ImmT, 5514 int64_t Default = 0) { 5515 auto i = OptionalIdx.find(ImmT); 5516 if (i != OptionalIdx.end()) { 5517 unsigned Idx = i->second; 5518 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5519 } else { 5520 Inst.addOperand(MCOperand::createImm(Default)); 5521 } 5522 } 5523 5524 OperandMatchResultTy 5525 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5526 StringRef &Value, 5527 SMLoc &StringLoc) { 5528 if (!trySkipId(Prefix, AsmToken::Colon)) 5529 return MatchOperand_NoMatch; 5530 5531 StringLoc = getLoc(); 5532 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5533 : MatchOperand_ParseFail; 5534 } 5535 5536 //===----------------------------------------------------------------------===// 5537 // MTBUF format 5538 //===----------------------------------------------------------------------===// 5539 5540 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5541 int64_t MaxVal, 5542 int64_t &Fmt) { 5543 int64_t Val; 5544 SMLoc Loc = getLoc(); 5545 5546 auto Res = parseIntWithPrefix(Pref, Val); 5547 if (Res == MatchOperand_ParseFail) 5548 return false; 5549 if (Res == MatchOperand_NoMatch) 5550 return true; 5551 5552 if (Val < 0 || Val > MaxVal) { 5553 Error(Loc, Twine("out of range ", StringRef(Pref))); 5554 return false; 5555 } 5556 5557 Fmt = Val; 5558 return true; 5559 } 5560 5561 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5562 // values to live in a joint format operand in the MCInst encoding. 5563 OperandMatchResultTy 5564 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5565 using namespace llvm::AMDGPU::MTBUFFormat; 5566 5567 int64_t Dfmt = DFMT_UNDEF; 5568 int64_t Nfmt = NFMT_UNDEF; 5569 5570 // dfmt and nfmt can appear in either order, and each is optional. 5571 for (int I = 0; I < 2; ++I) { 5572 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5573 return MatchOperand_ParseFail; 5574 5575 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5576 return MatchOperand_ParseFail; 5577 } 5578 // Skip optional comma between dfmt/nfmt 5579 // but guard against 2 commas following each other. 5580 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5581 !peekToken().is(AsmToken::Comma)) { 5582 trySkipToken(AsmToken::Comma); 5583 } 5584 } 5585 5586 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5587 return MatchOperand_NoMatch; 5588 5589 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5590 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5591 5592 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5593 return MatchOperand_Success; 5594 } 5595 5596 OperandMatchResultTy 5597 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5598 using namespace llvm::AMDGPU::MTBUFFormat; 5599 5600 int64_t Fmt = UFMT_UNDEF; 5601 5602 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5603 return MatchOperand_ParseFail; 5604 5605 if (Fmt == UFMT_UNDEF) 5606 return MatchOperand_NoMatch; 5607 5608 Format = Fmt; 5609 return MatchOperand_Success; 5610 } 5611 5612 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5613 int64_t &Nfmt, 5614 StringRef FormatStr, 5615 SMLoc Loc) { 5616 using namespace llvm::AMDGPU::MTBUFFormat; 5617 int64_t Format; 5618 5619 Format = getDfmt(FormatStr); 5620 if (Format != DFMT_UNDEF) { 5621 Dfmt = Format; 5622 return true; 5623 } 5624 5625 Format = getNfmt(FormatStr, getSTI()); 5626 if (Format != NFMT_UNDEF) { 5627 Nfmt = Format; 5628 return true; 5629 } 5630 5631 Error(Loc, "unsupported format"); 5632 return false; 5633 } 5634 5635 OperandMatchResultTy 5636 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5637 SMLoc FormatLoc, 5638 int64_t &Format) { 5639 using namespace llvm::AMDGPU::MTBUFFormat; 5640 5641 int64_t Dfmt = DFMT_UNDEF; 5642 int64_t Nfmt = NFMT_UNDEF; 5643 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5644 return MatchOperand_ParseFail; 5645 5646 if (trySkipToken(AsmToken::Comma)) { 5647 StringRef Str; 5648 SMLoc Loc = getLoc(); 5649 if (!parseId(Str, "expected a format string") || 5650 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5651 return MatchOperand_ParseFail; 5652 } 5653 if (Dfmt == DFMT_UNDEF) { 5654 Error(Loc, "duplicate numeric format"); 5655 return MatchOperand_ParseFail; 5656 } else if (Nfmt == NFMT_UNDEF) { 5657 Error(Loc, "duplicate data format"); 5658 return MatchOperand_ParseFail; 5659 } 5660 } 5661 5662 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5663 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5664 5665 if (isGFX10Plus()) { 5666 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5667 if (Ufmt == UFMT_UNDEF) { 5668 Error(FormatLoc, "unsupported format"); 5669 return MatchOperand_ParseFail; 5670 } 5671 Format = Ufmt; 5672 } else { 5673 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5674 } 5675 5676 return MatchOperand_Success; 5677 } 5678 5679 OperandMatchResultTy 5680 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5681 SMLoc Loc, 5682 int64_t &Format) { 5683 using namespace llvm::AMDGPU::MTBUFFormat; 5684 5685 auto Id = getUnifiedFormat(FormatStr); 5686 if (Id == UFMT_UNDEF) 5687 return MatchOperand_NoMatch; 5688 5689 if (!isGFX10Plus()) { 5690 Error(Loc, "unified format is not supported on this GPU"); 5691 return MatchOperand_ParseFail; 5692 } 5693 5694 Format = Id; 5695 return MatchOperand_Success; 5696 } 5697 5698 OperandMatchResultTy 5699 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5700 using namespace llvm::AMDGPU::MTBUFFormat; 5701 SMLoc Loc = getLoc(); 5702 5703 if (!parseExpr(Format)) 5704 return MatchOperand_ParseFail; 5705 if (!isValidFormatEncoding(Format, getSTI())) { 5706 Error(Loc, "out of range format"); 5707 return MatchOperand_ParseFail; 5708 } 5709 5710 return MatchOperand_Success; 5711 } 5712 5713 OperandMatchResultTy 5714 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5715 using namespace llvm::AMDGPU::MTBUFFormat; 5716 5717 if (!trySkipId("format", AsmToken::Colon)) 5718 return MatchOperand_NoMatch; 5719 5720 if (trySkipToken(AsmToken::LBrac)) { 5721 StringRef FormatStr; 5722 SMLoc Loc = getLoc(); 5723 if (!parseId(FormatStr, "expected a format string")) 5724 return MatchOperand_ParseFail; 5725 5726 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5727 if (Res == MatchOperand_NoMatch) 5728 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5729 if (Res != MatchOperand_Success) 5730 return Res; 5731 5732 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5733 return MatchOperand_ParseFail; 5734 5735 return MatchOperand_Success; 5736 } 5737 5738 return parseNumericFormat(Format); 5739 } 5740 5741 OperandMatchResultTy 5742 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5743 using namespace llvm::AMDGPU::MTBUFFormat; 5744 5745 int64_t Format = getDefaultFormatEncoding(getSTI()); 5746 OperandMatchResultTy Res; 5747 SMLoc Loc = getLoc(); 5748 5749 // Parse legacy format syntax. 5750 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5751 if (Res == MatchOperand_ParseFail) 5752 return Res; 5753 5754 bool FormatFound = (Res == MatchOperand_Success); 5755 5756 Operands.push_back( 5757 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5758 5759 if (FormatFound) 5760 trySkipToken(AsmToken::Comma); 5761 5762 if (isToken(AsmToken::EndOfStatement)) { 5763 // We are expecting an soffset operand, 5764 // but let matcher handle the error. 5765 return MatchOperand_Success; 5766 } 5767 5768 // Parse soffset. 5769 Res = parseRegOrImm(Operands); 5770 if (Res != MatchOperand_Success) 5771 return Res; 5772 5773 trySkipToken(AsmToken::Comma); 5774 5775 if (!FormatFound) { 5776 Res = parseSymbolicOrNumericFormat(Format); 5777 if (Res == MatchOperand_ParseFail) 5778 return Res; 5779 if (Res == MatchOperand_Success) { 5780 auto Size = Operands.size(); 5781 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5782 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5783 Op.setImm(Format); 5784 } 5785 return MatchOperand_Success; 5786 } 5787 5788 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5789 Error(getLoc(), "duplicate format"); 5790 return MatchOperand_ParseFail; 5791 } 5792 return MatchOperand_Success; 5793 } 5794 5795 //===----------------------------------------------------------------------===// 5796 // ds 5797 //===----------------------------------------------------------------------===// 5798 5799 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5800 const OperandVector &Operands) { 5801 OptionalImmIndexMap OptionalIdx; 5802 5803 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5804 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5805 5806 // Add the register arguments 5807 if (Op.isReg()) { 5808 Op.addRegOperands(Inst, 1); 5809 continue; 5810 } 5811 5812 // Handle optional arguments 5813 OptionalIdx[Op.getImmTy()] = i; 5814 } 5815 5816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5819 5820 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5821 } 5822 5823 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5824 bool IsGdsHardcoded) { 5825 OptionalImmIndexMap OptionalIdx; 5826 5827 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5828 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5829 5830 // Add the register arguments 5831 if (Op.isReg()) { 5832 Op.addRegOperands(Inst, 1); 5833 continue; 5834 } 5835 5836 if (Op.isToken() && Op.getToken() == "gds") { 5837 IsGdsHardcoded = true; 5838 continue; 5839 } 5840 5841 // Handle optional arguments 5842 OptionalIdx[Op.getImmTy()] = i; 5843 } 5844 5845 AMDGPUOperand::ImmTy OffsetType = 5846 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5847 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5848 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5849 AMDGPUOperand::ImmTyOffset; 5850 5851 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5852 5853 if (!IsGdsHardcoded) { 5854 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5855 } 5856 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5857 } 5858 5859 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5860 OptionalImmIndexMap OptionalIdx; 5861 5862 unsigned OperandIdx[4]; 5863 unsigned EnMask = 0; 5864 int SrcIdx = 0; 5865 5866 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5867 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5868 5869 // Add the register arguments 5870 if (Op.isReg()) { 5871 assert(SrcIdx < 4); 5872 OperandIdx[SrcIdx] = Inst.size(); 5873 Op.addRegOperands(Inst, 1); 5874 ++SrcIdx; 5875 continue; 5876 } 5877 5878 if (Op.isOff()) { 5879 assert(SrcIdx < 4); 5880 OperandIdx[SrcIdx] = Inst.size(); 5881 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5882 ++SrcIdx; 5883 continue; 5884 } 5885 5886 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5887 Op.addImmOperands(Inst, 1); 5888 continue; 5889 } 5890 5891 if (Op.isToken() && Op.getToken() == "done") 5892 continue; 5893 5894 // Handle optional arguments 5895 OptionalIdx[Op.getImmTy()] = i; 5896 } 5897 5898 assert(SrcIdx == 4); 5899 5900 bool Compr = false; 5901 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5902 Compr = true; 5903 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5904 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5905 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5906 } 5907 5908 for (auto i = 0; i < SrcIdx; ++i) { 5909 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5910 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5911 } 5912 } 5913 5914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5916 5917 Inst.addOperand(MCOperand::createImm(EnMask)); 5918 } 5919 5920 //===----------------------------------------------------------------------===// 5921 // s_waitcnt 5922 //===----------------------------------------------------------------------===// 5923 5924 static bool 5925 encodeCnt( 5926 const AMDGPU::IsaVersion ISA, 5927 int64_t &IntVal, 5928 int64_t CntVal, 5929 bool Saturate, 5930 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5931 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5932 { 5933 bool Failed = false; 5934 5935 IntVal = encode(ISA, IntVal, CntVal); 5936 if (CntVal != decode(ISA, IntVal)) { 5937 if (Saturate) { 5938 IntVal = encode(ISA, IntVal, -1); 5939 } else { 5940 Failed = true; 5941 } 5942 } 5943 return Failed; 5944 } 5945 5946 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5947 5948 SMLoc CntLoc = getLoc(); 5949 StringRef CntName = getTokenStr(); 5950 5951 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5952 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5953 return false; 5954 5955 int64_t CntVal; 5956 SMLoc ValLoc = getLoc(); 5957 if (!parseExpr(CntVal)) 5958 return false; 5959 5960 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5961 5962 bool Failed = true; 5963 bool Sat = CntName.endswith("_sat"); 5964 5965 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5966 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5967 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5968 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5969 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5970 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5971 } else { 5972 Error(CntLoc, "invalid counter name " + CntName); 5973 return false; 5974 } 5975 5976 if (Failed) { 5977 Error(ValLoc, "too large value for " + CntName); 5978 return false; 5979 } 5980 5981 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5982 return false; 5983 5984 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5985 if (isToken(AsmToken::EndOfStatement)) { 5986 Error(getLoc(), "expected a counter name"); 5987 return false; 5988 } 5989 } 5990 5991 return true; 5992 } 5993 5994 OperandMatchResultTy 5995 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5996 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5997 int64_t Waitcnt = getWaitcntBitMask(ISA); 5998 SMLoc S = getLoc(); 5999 6000 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6001 while (!isToken(AsmToken::EndOfStatement)) { 6002 if (!parseCnt(Waitcnt)) 6003 return MatchOperand_ParseFail; 6004 } 6005 } else { 6006 if (!parseExpr(Waitcnt)) 6007 return MatchOperand_ParseFail; 6008 } 6009 6010 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6011 return MatchOperand_Success; 6012 } 6013 6014 bool 6015 AMDGPUOperand::isSWaitCnt() const { 6016 return isImm(); 6017 } 6018 6019 //===----------------------------------------------------------------------===// 6020 // hwreg 6021 //===----------------------------------------------------------------------===// 6022 6023 bool 6024 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6025 OperandInfoTy &Offset, 6026 OperandInfoTy &Width) { 6027 using namespace llvm::AMDGPU::Hwreg; 6028 6029 // The register may be specified by name or using a numeric code 6030 HwReg.Loc = getLoc(); 6031 if (isToken(AsmToken::Identifier) && 6032 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6033 HwReg.IsSymbolic = true; 6034 lex(); // skip register name 6035 } else if (!parseExpr(HwReg.Id, "a register name")) { 6036 return false; 6037 } 6038 6039 if (trySkipToken(AsmToken::RParen)) 6040 return true; 6041 6042 // parse optional params 6043 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6044 return false; 6045 6046 Offset.Loc = getLoc(); 6047 if (!parseExpr(Offset.Id)) 6048 return false; 6049 6050 if (!skipToken(AsmToken::Comma, "expected a comma")) 6051 return false; 6052 6053 Width.Loc = getLoc(); 6054 return parseExpr(Width.Id) && 6055 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6056 } 6057 6058 bool 6059 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6060 const OperandInfoTy &Offset, 6061 const OperandInfoTy &Width) { 6062 6063 using namespace llvm::AMDGPU::Hwreg; 6064 6065 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6066 Error(HwReg.Loc, 6067 "specified hardware register is not supported on this GPU"); 6068 return false; 6069 } 6070 if (!isValidHwreg(HwReg.Id)) { 6071 Error(HwReg.Loc, 6072 "invalid code of hardware register: only 6-bit values are legal"); 6073 return false; 6074 } 6075 if (!isValidHwregOffset(Offset.Id)) { 6076 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6077 return false; 6078 } 6079 if (!isValidHwregWidth(Width.Id)) { 6080 Error(Width.Loc, 6081 "invalid bitfield width: only values from 1 to 32 are legal"); 6082 return false; 6083 } 6084 return true; 6085 } 6086 6087 OperandMatchResultTy 6088 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6089 using namespace llvm::AMDGPU::Hwreg; 6090 6091 int64_t ImmVal = 0; 6092 SMLoc Loc = getLoc(); 6093 6094 if (trySkipId("hwreg", AsmToken::LParen)) { 6095 OperandInfoTy HwReg(ID_UNKNOWN_); 6096 OperandInfoTy Offset(OFFSET_DEFAULT_); 6097 OperandInfoTy Width(WIDTH_DEFAULT_); 6098 if (parseHwregBody(HwReg, Offset, Width) && 6099 validateHwreg(HwReg, Offset, Width)) { 6100 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6101 } else { 6102 return MatchOperand_ParseFail; 6103 } 6104 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6105 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6106 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6107 return MatchOperand_ParseFail; 6108 } 6109 } else { 6110 return MatchOperand_ParseFail; 6111 } 6112 6113 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6114 return MatchOperand_Success; 6115 } 6116 6117 bool AMDGPUOperand::isHwreg() const { 6118 return isImmTy(ImmTyHwreg); 6119 } 6120 6121 //===----------------------------------------------------------------------===// 6122 // sendmsg 6123 //===----------------------------------------------------------------------===// 6124 6125 bool 6126 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6127 OperandInfoTy &Op, 6128 OperandInfoTy &Stream) { 6129 using namespace llvm::AMDGPU::SendMsg; 6130 6131 Msg.Loc = getLoc(); 6132 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6133 Msg.IsSymbolic = true; 6134 lex(); // skip message name 6135 } else if (!parseExpr(Msg.Id, "a message name")) { 6136 return false; 6137 } 6138 6139 if (trySkipToken(AsmToken::Comma)) { 6140 Op.IsDefined = true; 6141 Op.Loc = getLoc(); 6142 if (isToken(AsmToken::Identifier) && 6143 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6144 lex(); // skip operation name 6145 } else if (!parseExpr(Op.Id, "an operation name")) { 6146 return false; 6147 } 6148 6149 if (trySkipToken(AsmToken::Comma)) { 6150 Stream.IsDefined = true; 6151 Stream.Loc = getLoc(); 6152 if (!parseExpr(Stream.Id)) 6153 return false; 6154 } 6155 } 6156 6157 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6158 } 6159 6160 bool 6161 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6162 const OperandInfoTy &Op, 6163 const OperandInfoTy &Stream) { 6164 using namespace llvm::AMDGPU::SendMsg; 6165 6166 // Validation strictness depends on whether message is specified 6167 // in a symbolc or in a numeric form. In the latter case 6168 // only encoding possibility is checked. 6169 bool Strict = Msg.IsSymbolic; 6170 6171 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6172 Error(Msg.Loc, "invalid message id"); 6173 return false; 6174 } 6175 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6176 if (Op.IsDefined) { 6177 Error(Op.Loc, "message does not support operations"); 6178 } else { 6179 Error(Msg.Loc, "missing message operation"); 6180 } 6181 return false; 6182 } 6183 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6184 Error(Op.Loc, "invalid operation id"); 6185 return false; 6186 } 6187 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6188 Error(Stream.Loc, "message operation does not support streams"); 6189 return false; 6190 } 6191 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6192 Error(Stream.Loc, "invalid message stream id"); 6193 return false; 6194 } 6195 return true; 6196 } 6197 6198 OperandMatchResultTy 6199 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6200 using namespace llvm::AMDGPU::SendMsg; 6201 6202 int64_t ImmVal = 0; 6203 SMLoc Loc = getLoc(); 6204 6205 if (trySkipId("sendmsg", AsmToken::LParen)) { 6206 OperandInfoTy Msg(ID_UNKNOWN_); 6207 OperandInfoTy Op(OP_NONE_); 6208 OperandInfoTy Stream(STREAM_ID_NONE_); 6209 if (parseSendMsgBody(Msg, Op, Stream) && 6210 validateSendMsg(Msg, Op, Stream)) { 6211 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6212 } else { 6213 return MatchOperand_ParseFail; 6214 } 6215 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6216 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6217 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6218 return MatchOperand_ParseFail; 6219 } 6220 } else { 6221 return MatchOperand_ParseFail; 6222 } 6223 6224 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6225 return MatchOperand_Success; 6226 } 6227 6228 bool AMDGPUOperand::isSendMsg() const { 6229 return isImmTy(ImmTySendMsg); 6230 } 6231 6232 //===----------------------------------------------------------------------===// 6233 // v_interp 6234 //===----------------------------------------------------------------------===// 6235 6236 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6237 StringRef Str; 6238 SMLoc S = getLoc(); 6239 6240 if (!parseId(Str)) 6241 return MatchOperand_NoMatch; 6242 6243 int Slot = StringSwitch<int>(Str) 6244 .Case("p10", 0) 6245 .Case("p20", 1) 6246 .Case("p0", 2) 6247 .Default(-1); 6248 6249 if (Slot == -1) { 6250 Error(S, "invalid interpolation slot"); 6251 return MatchOperand_ParseFail; 6252 } 6253 6254 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6255 AMDGPUOperand::ImmTyInterpSlot)); 6256 return MatchOperand_Success; 6257 } 6258 6259 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6260 StringRef Str; 6261 SMLoc S = getLoc(); 6262 6263 if (!parseId(Str)) 6264 return MatchOperand_NoMatch; 6265 6266 if (!Str.startswith("attr")) { 6267 Error(S, "invalid interpolation attribute"); 6268 return MatchOperand_ParseFail; 6269 } 6270 6271 StringRef Chan = Str.take_back(2); 6272 int AttrChan = StringSwitch<int>(Chan) 6273 .Case(".x", 0) 6274 .Case(".y", 1) 6275 .Case(".z", 2) 6276 .Case(".w", 3) 6277 .Default(-1); 6278 if (AttrChan == -1) { 6279 Error(S, "invalid or missing interpolation attribute channel"); 6280 return MatchOperand_ParseFail; 6281 } 6282 6283 Str = Str.drop_back(2).drop_front(4); 6284 6285 uint8_t Attr; 6286 if (Str.getAsInteger(10, Attr)) { 6287 Error(S, "invalid or missing interpolation attribute number"); 6288 return MatchOperand_ParseFail; 6289 } 6290 6291 if (Attr > 63) { 6292 Error(S, "out of bounds interpolation attribute number"); 6293 return MatchOperand_ParseFail; 6294 } 6295 6296 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6297 6298 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6299 AMDGPUOperand::ImmTyInterpAttr)); 6300 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6301 AMDGPUOperand::ImmTyAttrChan)); 6302 return MatchOperand_Success; 6303 } 6304 6305 //===----------------------------------------------------------------------===// 6306 // exp 6307 //===----------------------------------------------------------------------===// 6308 6309 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6310 using namespace llvm::AMDGPU::Exp; 6311 6312 StringRef Str; 6313 SMLoc S = getLoc(); 6314 6315 if (!parseId(Str)) 6316 return MatchOperand_NoMatch; 6317 6318 unsigned Id = getTgtId(Str); 6319 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6320 Error(S, (Id == ET_INVALID) ? 6321 "invalid exp target" : 6322 "exp target is not supported on this GPU"); 6323 return MatchOperand_ParseFail; 6324 } 6325 6326 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6327 AMDGPUOperand::ImmTyExpTgt)); 6328 return MatchOperand_Success; 6329 } 6330 6331 //===----------------------------------------------------------------------===// 6332 // parser helpers 6333 //===----------------------------------------------------------------------===// 6334 6335 bool 6336 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6337 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6338 } 6339 6340 bool 6341 AMDGPUAsmParser::isId(const StringRef Id) const { 6342 return isId(getToken(), Id); 6343 } 6344 6345 bool 6346 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6347 return getTokenKind() == Kind; 6348 } 6349 6350 bool 6351 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6352 if (isId(Id)) { 6353 lex(); 6354 return true; 6355 } 6356 return false; 6357 } 6358 6359 bool 6360 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6361 if (isToken(AsmToken::Identifier)) { 6362 StringRef Tok = getTokenStr(); 6363 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6364 lex(); 6365 return true; 6366 } 6367 } 6368 return false; 6369 } 6370 6371 bool 6372 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6373 if (isId(Id) && peekToken().is(Kind)) { 6374 lex(); 6375 lex(); 6376 return true; 6377 } 6378 return false; 6379 } 6380 6381 bool 6382 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6383 if (isToken(Kind)) { 6384 lex(); 6385 return true; 6386 } 6387 return false; 6388 } 6389 6390 bool 6391 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6392 const StringRef ErrMsg) { 6393 if (!trySkipToken(Kind)) { 6394 Error(getLoc(), ErrMsg); 6395 return false; 6396 } 6397 return true; 6398 } 6399 6400 bool 6401 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6402 SMLoc S = getLoc(); 6403 6404 const MCExpr *Expr; 6405 if (Parser.parseExpression(Expr)) 6406 return false; 6407 6408 if (Expr->evaluateAsAbsolute(Imm)) 6409 return true; 6410 6411 if (Expected.empty()) { 6412 Error(S, "expected absolute expression"); 6413 } else { 6414 Error(S, Twine("expected ", Expected) + 6415 Twine(" or an absolute expression")); 6416 } 6417 return false; 6418 } 6419 6420 bool 6421 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6422 SMLoc S = getLoc(); 6423 6424 const MCExpr *Expr; 6425 if (Parser.parseExpression(Expr)) 6426 return false; 6427 6428 int64_t IntVal; 6429 if (Expr->evaluateAsAbsolute(IntVal)) { 6430 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6431 } else { 6432 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6433 } 6434 return true; 6435 } 6436 6437 bool 6438 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6439 if (isToken(AsmToken::String)) { 6440 Val = getToken().getStringContents(); 6441 lex(); 6442 return true; 6443 } else { 6444 Error(getLoc(), ErrMsg); 6445 return false; 6446 } 6447 } 6448 6449 bool 6450 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6451 if (isToken(AsmToken::Identifier)) { 6452 Val = getTokenStr(); 6453 lex(); 6454 return true; 6455 } else { 6456 if (!ErrMsg.empty()) 6457 Error(getLoc(), ErrMsg); 6458 return false; 6459 } 6460 } 6461 6462 AsmToken 6463 AMDGPUAsmParser::getToken() const { 6464 return Parser.getTok(); 6465 } 6466 6467 AsmToken 6468 AMDGPUAsmParser::peekToken() { 6469 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6470 } 6471 6472 void 6473 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6474 auto TokCount = getLexer().peekTokens(Tokens); 6475 6476 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6477 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6478 } 6479 6480 AsmToken::TokenKind 6481 AMDGPUAsmParser::getTokenKind() const { 6482 return getLexer().getKind(); 6483 } 6484 6485 SMLoc 6486 AMDGPUAsmParser::getLoc() const { 6487 return getToken().getLoc(); 6488 } 6489 6490 StringRef 6491 AMDGPUAsmParser::getTokenStr() const { 6492 return getToken().getString(); 6493 } 6494 6495 void 6496 AMDGPUAsmParser::lex() { 6497 Parser.Lex(); 6498 } 6499 6500 SMLoc 6501 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6502 const OperandVector &Operands) const { 6503 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6504 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6505 if (Test(Op)) 6506 return Op.getStartLoc(); 6507 } 6508 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6509 } 6510 6511 SMLoc 6512 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6513 const OperandVector &Operands) const { 6514 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6515 return getOperandLoc(Test, Operands); 6516 } 6517 6518 SMLoc 6519 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6520 const OperandVector &Operands) const { 6521 auto Test = [=](const AMDGPUOperand& Op) { 6522 return Op.isRegKind() && Op.getReg() == Reg; 6523 }; 6524 return getOperandLoc(Test, Operands); 6525 } 6526 6527 SMLoc 6528 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6529 auto Test = [](const AMDGPUOperand& Op) { 6530 return Op.IsImmKindLiteral() || Op.isExpr(); 6531 }; 6532 return getOperandLoc(Test, Operands); 6533 } 6534 6535 SMLoc 6536 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6537 auto Test = [](const AMDGPUOperand& Op) { 6538 return Op.isImmKindConst(); 6539 }; 6540 return getOperandLoc(Test, Operands); 6541 } 6542 6543 //===----------------------------------------------------------------------===// 6544 // swizzle 6545 //===----------------------------------------------------------------------===// 6546 6547 LLVM_READNONE 6548 static unsigned 6549 encodeBitmaskPerm(const unsigned AndMask, 6550 const unsigned OrMask, 6551 const unsigned XorMask) { 6552 using namespace llvm::AMDGPU::Swizzle; 6553 6554 return BITMASK_PERM_ENC | 6555 (AndMask << BITMASK_AND_SHIFT) | 6556 (OrMask << BITMASK_OR_SHIFT) | 6557 (XorMask << BITMASK_XOR_SHIFT); 6558 } 6559 6560 bool 6561 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6562 const unsigned MinVal, 6563 const unsigned MaxVal, 6564 const StringRef ErrMsg, 6565 SMLoc &Loc) { 6566 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6567 return false; 6568 } 6569 Loc = getLoc(); 6570 if (!parseExpr(Op)) { 6571 return false; 6572 } 6573 if (Op < MinVal || Op > MaxVal) { 6574 Error(Loc, ErrMsg); 6575 return false; 6576 } 6577 6578 return true; 6579 } 6580 6581 bool 6582 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6583 const unsigned MinVal, 6584 const unsigned MaxVal, 6585 const StringRef ErrMsg) { 6586 SMLoc Loc; 6587 for (unsigned i = 0; i < OpNum; ++i) { 6588 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6589 return false; 6590 } 6591 6592 return true; 6593 } 6594 6595 bool 6596 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6597 using namespace llvm::AMDGPU::Swizzle; 6598 6599 int64_t Lane[LANE_NUM]; 6600 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6601 "expected a 2-bit lane id")) { 6602 Imm = QUAD_PERM_ENC; 6603 for (unsigned I = 0; I < LANE_NUM; ++I) { 6604 Imm |= Lane[I] << (LANE_SHIFT * I); 6605 } 6606 return true; 6607 } 6608 return false; 6609 } 6610 6611 bool 6612 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6613 using namespace llvm::AMDGPU::Swizzle; 6614 6615 SMLoc Loc; 6616 int64_t GroupSize; 6617 int64_t LaneIdx; 6618 6619 if (!parseSwizzleOperand(GroupSize, 6620 2, 32, 6621 "group size must be in the interval [2,32]", 6622 Loc)) { 6623 return false; 6624 } 6625 if (!isPowerOf2_64(GroupSize)) { 6626 Error(Loc, "group size must be a power of two"); 6627 return false; 6628 } 6629 if (parseSwizzleOperand(LaneIdx, 6630 0, GroupSize - 1, 6631 "lane id must be in the interval [0,group size - 1]", 6632 Loc)) { 6633 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6634 return true; 6635 } 6636 return false; 6637 } 6638 6639 bool 6640 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6641 using namespace llvm::AMDGPU::Swizzle; 6642 6643 SMLoc Loc; 6644 int64_t GroupSize; 6645 6646 if (!parseSwizzleOperand(GroupSize, 6647 2, 32, 6648 "group size must be in the interval [2,32]", 6649 Loc)) { 6650 return false; 6651 } 6652 if (!isPowerOf2_64(GroupSize)) { 6653 Error(Loc, "group size must be a power of two"); 6654 return false; 6655 } 6656 6657 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6658 return true; 6659 } 6660 6661 bool 6662 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6663 using namespace llvm::AMDGPU::Swizzle; 6664 6665 SMLoc Loc; 6666 int64_t GroupSize; 6667 6668 if (!parseSwizzleOperand(GroupSize, 6669 1, 16, 6670 "group size must be in the interval [1,16]", 6671 Loc)) { 6672 return false; 6673 } 6674 if (!isPowerOf2_64(GroupSize)) { 6675 Error(Loc, "group size must be a power of two"); 6676 return false; 6677 } 6678 6679 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6680 return true; 6681 } 6682 6683 bool 6684 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6685 using namespace llvm::AMDGPU::Swizzle; 6686 6687 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6688 return false; 6689 } 6690 6691 StringRef Ctl; 6692 SMLoc StrLoc = getLoc(); 6693 if (!parseString(Ctl)) { 6694 return false; 6695 } 6696 if (Ctl.size() != BITMASK_WIDTH) { 6697 Error(StrLoc, "expected a 5-character mask"); 6698 return false; 6699 } 6700 6701 unsigned AndMask = 0; 6702 unsigned OrMask = 0; 6703 unsigned XorMask = 0; 6704 6705 for (size_t i = 0; i < Ctl.size(); ++i) { 6706 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6707 switch(Ctl[i]) { 6708 default: 6709 Error(StrLoc, "invalid mask"); 6710 return false; 6711 case '0': 6712 break; 6713 case '1': 6714 OrMask |= Mask; 6715 break; 6716 case 'p': 6717 AndMask |= Mask; 6718 break; 6719 case 'i': 6720 AndMask |= Mask; 6721 XorMask |= Mask; 6722 break; 6723 } 6724 } 6725 6726 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6727 return true; 6728 } 6729 6730 bool 6731 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6732 6733 SMLoc OffsetLoc = getLoc(); 6734 6735 if (!parseExpr(Imm, "a swizzle macro")) { 6736 return false; 6737 } 6738 if (!isUInt<16>(Imm)) { 6739 Error(OffsetLoc, "expected a 16-bit offset"); 6740 return false; 6741 } 6742 return true; 6743 } 6744 6745 bool 6746 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6747 using namespace llvm::AMDGPU::Swizzle; 6748 6749 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6750 6751 SMLoc ModeLoc = getLoc(); 6752 bool Ok = false; 6753 6754 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6755 Ok = parseSwizzleQuadPerm(Imm); 6756 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6757 Ok = parseSwizzleBitmaskPerm(Imm); 6758 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6759 Ok = parseSwizzleBroadcast(Imm); 6760 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6761 Ok = parseSwizzleSwap(Imm); 6762 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6763 Ok = parseSwizzleReverse(Imm); 6764 } else { 6765 Error(ModeLoc, "expected a swizzle mode"); 6766 } 6767 6768 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6769 } 6770 6771 return false; 6772 } 6773 6774 OperandMatchResultTy 6775 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6776 SMLoc S = getLoc(); 6777 int64_t Imm = 0; 6778 6779 if (trySkipId("offset")) { 6780 6781 bool Ok = false; 6782 if (skipToken(AsmToken::Colon, "expected a colon")) { 6783 if (trySkipId("swizzle")) { 6784 Ok = parseSwizzleMacro(Imm); 6785 } else { 6786 Ok = parseSwizzleOffset(Imm); 6787 } 6788 } 6789 6790 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6791 6792 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6793 } else { 6794 // Swizzle "offset" operand is optional. 6795 // If it is omitted, try parsing other optional operands. 6796 return parseOptionalOpr(Operands); 6797 } 6798 } 6799 6800 bool 6801 AMDGPUOperand::isSwizzle() const { 6802 return isImmTy(ImmTySwizzle); 6803 } 6804 6805 //===----------------------------------------------------------------------===// 6806 // VGPR Index Mode 6807 //===----------------------------------------------------------------------===// 6808 6809 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6810 6811 using namespace llvm::AMDGPU::VGPRIndexMode; 6812 6813 if (trySkipToken(AsmToken::RParen)) { 6814 return OFF; 6815 } 6816 6817 int64_t Imm = 0; 6818 6819 while (true) { 6820 unsigned Mode = 0; 6821 SMLoc S = getLoc(); 6822 6823 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6824 if (trySkipId(IdSymbolic[ModeId])) { 6825 Mode = 1 << ModeId; 6826 break; 6827 } 6828 } 6829 6830 if (Mode == 0) { 6831 Error(S, (Imm == 0)? 6832 "expected a VGPR index mode or a closing parenthesis" : 6833 "expected a VGPR index mode"); 6834 return UNDEF; 6835 } 6836 6837 if (Imm & Mode) { 6838 Error(S, "duplicate VGPR index mode"); 6839 return UNDEF; 6840 } 6841 Imm |= Mode; 6842 6843 if (trySkipToken(AsmToken::RParen)) 6844 break; 6845 if (!skipToken(AsmToken::Comma, 6846 "expected a comma or a closing parenthesis")) 6847 return UNDEF; 6848 } 6849 6850 return Imm; 6851 } 6852 6853 OperandMatchResultTy 6854 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6855 6856 using namespace llvm::AMDGPU::VGPRIndexMode; 6857 6858 int64_t Imm = 0; 6859 SMLoc S = getLoc(); 6860 6861 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6862 Imm = parseGPRIdxMacro(); 6863 if (Imm == UNDEF) 6864 return MatchOperand_ParseFail; 6865 } else { 6866 if (getParser().parseAbsoluteExpression(Imm)) 6867 return MatchOperand_ParseFail; 6868 if (Imm < 0 || !isUInt<4>(Imm)) { 6869 Error(S, "invalid immediate: only 4-bit values are legal"); 6870 return MatchOperand_ParseFail; 6871 } 6872 } 6873 6874 Operands.push_back( 6875 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6876 return MatchOperand_Success; 6877 } 6878 6879 bool AMDGPUOperand::isGPRIdxMode() const { 6880 return isImmTy(ImmTyGprIdxMode); 6881 } 6882 6883 //===----------------------------------------------------------------------===// 6884 // sopp branch targets 6885 //===----------------------------------------------------------------------===// 6886 6887 OperandMatchResultTy 6888 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6889 6890 // Make sure we are not parsing something 6891 // that looks like a label or an expression but is not. 6892 // This will improve error messages. 6893 if (isRegister() || isModifier()) 6894 return MatchOperand_NoMatch; 6895 6896 if (!parseExpr(Operands)) 6897 return MatchOperand_ParseFail; 6898 6899 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6900 assert(Opr.isImm() || Opr.isExpr()); 6901 SMLoc Loc = Opr.getStartLoc(); 6902 6903 // Currently we do not support arbitrary expressions as branch targets. 6904 // Only labels and absolute expressions are accepted. 6905 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6906 Error(Loc, "expected an absolute expression or a label"); 6907 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6908 Error(Loc, "expected a 16-bit signed jump offset"); 6909 } 6910 6911 return MatchOperand_Success; 6912 } 6913 6914 //===----------------------------------------------------------------------===// 6915 // Boolean holding registers 6916 //===----------------------------------------------------------------------===// 6917 6918 OperandMatchResultTy 6919 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6920 return parseReg(Operands); 6921 } 6922 6923 //===----------------------------------------------------------------------===// 6924 // mubuf 6925 //===----------------------------------------------------------------------===// 6926 6927 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6928 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6929 } 6930 6931 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6932 const OperandVector &Operands, 6933 bool IsAtomic, 6934 bool IsLds) { 6935 bool IsLdsOpcode = IsLds; 6936 bool HasLdsModifier = false; 6937 OptionalImmIndexMap OptionalIdx; 6938 unsigned FirstOperandIdx = 1; 6939 bool IsAtomicReturn = false; 6940 6941 if (IsAtomic) { 6942 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6944 if (!Op.isCPol()) 6945 continue; 6946 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6947 break; 6948 } 6949 6950 if (!IsAtomicReturn) { 6951 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6952 if (NewOpc != -1) 6953 Inst.setOpcode(NewOpc); 6954 } 6955 6956 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6957 SIInstrFlags::IsAtomicRet; 6958 } 6959 6960 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6961 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6962 6963 // Add the register arguments 6964 if (Op.isReg()) { 6965 Op.addRegOperands(Inst, 1); 6966 // Insert a tied src for atomic return dst. 6967 // This cannot be postponed as subsequent calls to 6968 // addImmOperands rely on correct number of MC operands. 6969 if (IsAtomicReturn && i == FirstOperandIdx) 6970 Op.addRegOperands(Inst, 1); 6971 continue; 6972 } 6973 6974 // Handle the case where soffset is an immediate 6975 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6976 Op.addImmOperands(Inst, 1); 6977 continue; 6978 } 6979 6980 HasLdsModifier |= Op.isLDS(); 6981 6982 // Handle tokens like 'offen' which are sometimes hard-coded into the 6983 // asm string. There are no MCInst operands for these. 6984 if (Op.isToken()) { 6985 continue; 6986 } 6987 assert(Op.isImm()); 6988 6989 // Handle optional arguments 6990 OptionalIdx[Op.getImmTy()] = i; 6991 } 6992 6993 // This is a workaround for an llvm quirk which may result in an 6994 // incorrect instruction selection. Lds and non-lds versions of 6995 // MUBUF instructions are identical except that lds versions 6996 // have mandatory 'lds' modifier. However this modifier follows 6997 // optional modifiers and llvm asm matcher regards this 'lds' 6998 // modifier as an optional one. As a result, an lds version 6999 // of opcode may be selected even if it has no 'lds' modifier. 7000 if (IsLdsOpcode && !HasLdsModifier) { 7001 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7002 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7003 Inst.setOpcode(NoLdsOpcode); 7004 IsLdsOpcode = false; 7005 } 7006 } 7007 7008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7009 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7010 7011 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7012 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7013 } 7014 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7015 } 7016 7017 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7018 OptionalImmIndexMap OptionalIdx; 7019 7020 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7021 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7022 7023 // Add the register arguments 7024 if (Op.isReg()) { 7025 Op.addRegOperands(Inst, 1); 7026 continue; 7027 } 7028 7029 // Handle the case where soffset is an immediate 7030 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7031 Op.addImmOperands(Inst, 1); 7032 continue; 7033 } 7034 7035 // Handle tokens like 'offen' which are sometimes hard-coded into the 7036 // asm string. There are no MCInst operands for these. 7037 if (Op.isToken()) { 7038 continue; 7039 } 7040 assert(Op.isImm()); 7041 7042 // Handle optional arguments 7043 OptionalIdx[Op.getImmTy()] = i; 7044 } 7045 7046 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7047 AMDGPUOperand::ImmTyOffset); 7048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7052 } 7053 7054 //===----------------------------------------------------------------------===// 7055 // mimg 7056 //===----------------------------------------------------------------------===// 7057 7058 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7059 bool IsAtomic) { 7060 unsigned I = 1; 7061 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7062 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7063 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7064 } 7065 7066 if (IsAtomic) { 7067 // Add src, same as dst 7068 assert(Desc.getNumDefs() == 1); 7069 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7070 } 7071 7072 OptionalImmIndexMap OptionalIdx; 7073 7074 for (unsigned E = Operands.size(); I != E; ++I) { 7075 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7076 7077 // Add the register arguments 7078 if (Op.isReg()) { 7079 Op.addRegOperands(Inst, 1); 7080 } else if (Op.isImmModifier()) { 7081 OptionalIdx[Op.getImmTy()] = I; 7082 } else if (!Op.isToken()) { 7083 llvm_unreachable("unexpected operand type"); 7084 } 7085 } 7086 7087 bool IsGFX10Plus = isGFX10Plus(); 7088 7089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7090 if (IsGFX10Plus) 7091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7095 if (IsGFX10Plus) 7096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7097 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7098 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7100 if (!IsGFX10Plus) 7101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7102 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7103 } 7104 7105 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7106 cvtMIMG(Inst, Operands, true); 7107 } 7108 7109 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7110 OptionalImmIndexMap OptionalIdx; 7111 bool IsAtomicReturn = false; 7112 7113 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7114 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7115 if (!Op.isCPol()) 7116 continue; 7117 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7118 break; 7119 } 7120 7121 if (!IsAtomicReturn) { 7122 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7123 if (NewOpc != -1) 7124 Inst.setOpcode(NewOpc); 7125 } 7126 7127 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7128 SIInstrFlags::IsAtomicRet; 7129 7130 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7131 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7132 7133 // Add the register arguments 7134 if (Op.isReg()) { 7135 Op.addRegOperands(Inst, 1); 7136 if (IsAtomicReturn && i == 1) 7137 Op.addRegOperands(Inst, 1); 7138 continue; 7139 } 7140 7141 // Handle the case where soffset is an immediate 7142 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7143 Op.addImmOperands(Inst, 1); 7144 continue; 7145 } 7146 7147 // Handle tokens like 'offen' which are sometimes hard-coded into the 7148 // asm string. There are no MCInst operands for these. 7149 if (Op.isToken()) { 7150 continue; 7151 } 7152 assert(Op.isImm()); 7153 7154 // Handle optional arguments 7155 OptionalIdx[Op.getImmTy()] = i; 7156 } 7157 7158 if ((int)Inst.getNumOperands() <= 7159 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7162 } 7163 7164 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7165 const OperandVector &Operands) { 7166 for (unsigned I = 1; I < Operands.size(); ++I) { 7167 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7168 if (Operand.isReg()) 7169 Operand.addRegOperands(Inst, 1); 7170 } 7171 7172 Inst.addOperand(MCOperand::createImm(1)); // a16 7173 } 7174 7175 //===----------------------------------------------------------------------===// 7176 // smrd 7177 //===----------------------------------------------------------------------===// 7178 7179 bool AMDGPUOperand::isSMRDOffset8() const { 7180 return isImm() && isUInt<8>(getImm()); 7181 } 7182 7183 bool AMDGPUOperand::isSMEMOffset() const { 7184 return isImm(); // Offset range is checked later by validator. 7185 } 7186 7187 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7188 // 32-bit literals are only supported on CI and we only want to use them 7189 // when the offset is > 8-bits. 7190 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7191 } 7192 7193 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7194 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7195 } 7196 7197 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7198 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7199 } 7200 7201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7202 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7203 } 7204 7205 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7206 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7207 } 7208 7209 //===----------------------------------------------------------------------===// 7210 // vop3 7211 //===----------------------------------------------------------------------===// 7212 7213 static bool ConvertOmodMul(int64_t &Mul) { 7214 if (Mul != 1 && Mul != 2 && Mul != 4) 7215 return false; 7216 7217 Mul >>= 1; 7218 return true; 7219 } 7220 7221 static bool ConvertOmodDiv(int64_t &Div) { 7222 if (Div == 1) { 7223 Div = 0; 7224 return true; 7225 } 7226 7227 if (Div == 2) { 7228 Div = 3; 7229 return true; 7230 } 7231 7232 return false; 7233 } 7234 7235 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7236 // This is intentional and ensures compatibility with sp3. 7237 // See bug 35397 for details. 7238 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7239 if (BoundCtrl == 0 || BoundCtrl == 1) { 7240 BoundCtrl = 1; 7241 return true; 7242 } 7243 return false; 7244 } 7245 7246 // Note: the order in this table matches the order of operands in AsmString. 7247 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7248 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7249 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7250 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7251 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7252 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7253 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7254 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7255 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7256 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7257 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7258 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7259 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7260 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7261 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7262 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7263 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7264 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7265 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7266 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7267 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7268 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7269 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7270 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7271 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7272 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7273 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7274 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7275 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7276 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7277 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7278 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7279 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7280 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7281 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7282 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7283 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7284 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7285 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7286 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7287 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7288 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7289 }; 7290 7291 void AMDGPUAsmParser::onBeginOfFile() { 7292 if (!getParser().getStreamer().getTargetStreamer() || 7293 getSTI().getTargetTriple().getArch() == Triple::r600) 7294 return; 7295 7296 if (!getTargetStreamer().getTargetID()) 7297 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7298 7299 if (isHsaAbiVersion3Or4(&getSTI())) 7300 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7301 } 7302 7303 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7304 7305 OperandMatchResultTy res = parseOptionalOpr(Operands); 7306 7307 // This is a hack to enable hardcoded mandatory operands which follow 7308 // optional operands. 7309 // 7310 // Current design assumes that all operands after the first optional operand 7311 // are also optional. However implementation of some instructions violates 7312 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7313 // 7314 // To alleviate this problem, we have to (implicitly) parse extra operands 7315 // to make sure autogenerated parser of custom operands never hit hardcoded 7316 // mandatory operands. 7317 7318 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7319 if (res != MatchOperand_Success || 7320 isToken(AsmToken::EndOfStatement)) 7321 break; 7322 7323 trySkipToken(AsmToken::Comma); 7324 res = parseOptionalOpr(Operands); 7325 } 7326 7327 return res; 7328 } 7329 7330 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7331 OperandMatchResultTy res; 7332 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7333 // try to parse any optional operand here 7334 if (Op.IsBit) { 7335 res = parseNamedBit(Op.Name, Operands, Op.Type); 7336 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7337 res = parseOModOperand(Operands); 7338 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7339 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7340 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7341 res = parseSDWASel(Operands, Op.Name, Op.Type); 7342 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7343 res = parseSDWADstUnused(Operands); 7344 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7345 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7346 Op.Type == AMDGPUOperand::ImmTyNegLo || 7347 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7348 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7349 Op.ConvertResult); 7350 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7351 res = parseDim(Operands); 7352 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7353 res = parseCPol(Operands); 7354 } else { 7355 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7356 } 7357 if (res != MatchOperand_NoMatch) { 7358 return res; 7359 } 7360 } 7361 return MatchOperand_NoMatch; 7362 } 7363 7364 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7365 StringRef Name = getTokenStr(); 7366 if (Name == "mul") { 7367 return parseIntWithPrefix("mul", Operands, 7368 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7369 } 7370 7371 if (Name == "div") { 7372 return parseIntWithPrefix("div", Operands, 7373 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7374 } 7375 7376 return MatchOperand_NoMatch; 7377 } 7378 7379 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7380 cvtVOP3P(Inst, Operands); 7381 7382 int Opc = Inst.getOpcode(); 7383 7384 int SrcNum; 7385 const int Ops[] = { AMDGPU::OpName::src0, 7386 AMDGPU::OpName::src1, 7387 AMDGPU::OpName::src2 }; 7388 for (SrcNum = 0; 7389 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7390 ++SrcNum); 7391 assert(SrcNum > 0); 7392 7393 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7394 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7395 7396 if ((OpSel & (1 << SrcNum)) != 0) { 7397 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7398 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7399 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7400 } 7401 } 7402 7403 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7404 // 1. This operand is input modifiers 7405 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7406 // 2. This is not last operand 7407 && Desc.NumOperands > (OpNum + 1) 7408 // 3. Next operand is register class 7409 && Desc.OpInfo[OpNum + 1].RegClass != -1 7410 // 4. Next register is not tied to any other operand 7411 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7412 } 7413 7414 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7415 { 7416 OptionalImmIndexMap OptionalIdx; 7417 unsigned Opc = Inst.getOpcode(); 7418 7419 unsigned I = 1; 7420 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7421 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7422 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7423 } 7424 7425 for (unsigned E = Operands.size(); I != E; ++I) { 7426 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7427 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7428 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7429 } else if (Op.isInterpSlot() || 7430 Op.isInterpAttr() || 7431 Op.isAttrChan()) { 7432 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7433 } else if (Op.isImmModifier()) { 7434 OptionalIdx[Op.getImmTy()] = I; 7435 } else { 7436 llvm_unreachable("unhandled operand type"); 7437 } 7438 } 7439 7440 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7441 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7442 } 7443 7444 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7446 } 7447 7448 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7450 } 7451 } 7452 7453 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7454 OptionalImmIndexMap &OptionalIdx) { 7455 unsigned Opc = Inst.getOpcode(); 7456 7457 unsigned I = 1; 7458 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7459 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7460 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7461 } 7462 7463 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7464 // This instruction has src modifiers 7465 for (unsigned E = Operands.size(); I != E; ++I) { 7466 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7467 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7468 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7469 } else if (Op.isImmModifier()) { 7470 OptionalIdx[Op.getImmTy()] = I; 7471 } else if (Op.isRegOrImm()) { 7472 Op.addRegOrImmOperands(Inst, 1); 7473 } else { 7474 llvm_unreachable("unhandled operand type"); 7475 } 7476 } 7477 } else { 7478 // No src modifiers 7479 for (unsigned E = Operands.size(); I != E; ++I) { 7480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7481 if (Op.isMod()) { 7482 OptionalIdx[Op.getImmTy()] = I; 7483 } else { 7484 Op.addRegOrImmOperands(Inst, 1); 7485 } 7486 } 7487 } 7488 7489 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7490 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7491 } 7492 7493 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7495 } 7496 7497 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7498 // it has src2 register operand that is tied to dst operand 7499 // we don't allow modifiers for this operand in assembler so src2_modifiers 7500 // should be 0. 7501 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7502 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7503 Opc == AMDGPU::V_MAC_F32_e64_vi || 7504 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7505 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7506 Opc == AMDGPU::V_MAC_F16_e64_vi || 7507 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7508 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7509 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7510 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7511 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7512 auto it = Inst.begin(); 7513 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7514 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7515 ++it; 7516 // Copy the operand to ensure it's not invalidated when Inst grows. 7517 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7518 } 7519 } 7520 7521 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7522 OptionalImmIndexMap OptionalIdx; 7523 cvtVOP3(Inst, Operands, OptionalIdx); 7524 } 7525 7526 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7527 OptionalImmIndexMap &OptIdx) { 7528 const int Opc = Inst.getOpcode(); 7529 const MCInstrDesc &Desc = MII.get(Opc); 7530 7531 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7532 7533 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7534 assert(!IsPacked); 7535 Inst.addOperand(Inst.getOperand(0)); 7536 } 7537 7538 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7539 // instruction, and then figure out where to actually put the modifiers 7540 7541 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7542 if (OpSelIdx != -1) { 7543 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7544 } 7545 7546 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7547 if (OpSelHiIdx != -1) { 7548 int DefaultVal = IsPacked ? -1 : 0; 7549 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7550 DefaultVal); 7551 } 7552 7553 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7554 if (NegLoIdx != -1) { 7555 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7556 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7557 } 7558 7559 const int Ops[] = { AMDGPU::OpName::src0, 7560 AMDGPU::OpName::src1, 7561 AMDGPU::OpName::src2 }; 7562 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7563 AMDGPU::OpName::src1_modifiers, 7564 AMDGPU::OpName::src2_modifiers }; 7565 7566 unsigned OpSel = 0; 7567 unsigned OpSelHi = 0; 7568 unsigned NegLo = 0; 7569 unsigned NegHi = 0; 7570 7571 if (OpSelIdx != -1) 7572 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7573 7574 if (OpSelHiIdx != -1) 7575 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7576 7577 if (NegLoIdx != -1) { 7578 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7579 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7580 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7581 } 7582 7583 for (int J = 0; J < 3; ++J) { 7584 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7585 if (OpIdx == -1) 7586 break; 7587 7588 uint32_t ModVal = 0; 7589 7590 if ((OpSel & (1 << J)) != 0) 7591 ModVal |= SISrcMods::OP_SEL_0; 7592 7593 if ((OpSelHi & (1 << J)) != 0) 7594 ModVal |= SISrcMods::OP_SEL_1; 7595 7596 if ((NegLo & (1 << J)) != 0) 7597 ModVal |= SISrcMods::NEG; 7598 7599 if ((NegHi & (1 << J)) != 0) 7600 ModVal |= SISrcMods::NEG_HI; 7601 7602 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7603 7604 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7605 } 7606 } 7607 7608 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7609 OptionalImmIndexMap OptIdx; 7610 cvtVOP3(Inst, Operands, OptIdx); 7611 cvtVOP3P(Inst, Operands, OptIdx); 7612 } 7613 7614 //===----------------------------------------------------------------------===// 7615 // dpp 7616 //===----------------------------------------------------------------------===// 7617 7618 bool AMDGPUOperand::isDPP8() const { 7619 return isImmTy(ImmTyDPP8); 7620 } 7621 7622 bool AMDGPUOperand::isDPPCtrl() const { 7623 using namespace AMDGPU::DPP; 7624 7625 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7626 if (result) { 7627 int64_t Imm = getImm(); 7628 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7629 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7630 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7631 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7632 (Imm == DppCtrl::WAVE_SHL1) || 7633 (Imm == DppCtrl::WAVE_ROL1) || 7634 (Imm == DppCtrl::WAVE_SHR1) || 7635 (Imm == DppCtrl::WAVE_ROR1) || 7636 (Imm == DppCtrl::ROW_MIRROR) || 7637 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7638 (Imm == DppCtrl::BCAST15) || 7639 (Imm == DppCtrl::BCAST31) || 7640 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7641 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7642 } 7643 return false; 7644 } 7645 7646 //===----------------------------------------------------------------------===// 7647 // mAI 7648 //===----------------------------------------------------------------------===// 7649 7650 bool AMDGPUOperand::isBLGP() const { 7651 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7652 } 7653 7654 bool AMDGPUOperand::isCBSZ() const { 7655 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7656 } 7657 7658 bool AMDGPUOperand::isABID() const { 7659 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7660 } 7661 7662 bool AMDGPUOperand::isS16Imm() const { 7663 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7664 } 7665 7666 bool AMDGPUOperand::isU16Imm() const { 7667 return isImm() && isUInt<16>(getImm()); 7668 } 7669 7670 //===----------------------------------------------------------------------===// 7671 // dim 7672 //===----------------------------------------------------------------------===// 7673 7674 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7675 // We want to allow "dim:1D" etc., 7676 // but the initial 1 is tokenized as an integer. 7677 std::string Token; 7678 if (isToken(AsmToken::Integer)) { 7679 SMLoc Loc = getToken().getEndLoc(); 7680 Token = std::string(getTokenStr()); 7681 lex(); 7682 if (getLoc() != Loc) 7683 return false; 7684 } 7685 7686 StringRef Suffix; 7687 if (!parseId(Suffix)) 7688 return false; 7689 Token += Suffix; 7690 7691 StringRef DimId = Token; 7692 if (DimId.startswith("SQ_RSRC_IMG_")) 7693 DimId = DimId.drop_front(12); 7694 7695 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7696 if (!DimInfo) 7697 return false; 7698 7699 Encoding = DimInfo->Encoding; 7700 return true; 7701 } 7702 7703 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7704 if (!isGFX10Plus()) 7705 return MatchOperand_NoMatch; 7706 7707 SMLoc S = getLoc(); 7708 7709 if (!trySkipId("dim", AsmToken::Colon)) 7710 return MatchOperand_NoMatch; 7711 7712 unsigned Encoding; 7713 SMLoc Loc = getLoc(); 7714 if (!parseDimId(Encoding)) { 7715 Error(Loc, "invalid dim value"); 7716 return MatchOperand_ParseFail; 7717 } 7718 7719 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7720 AMDGPUOperand::ImmTyDim)); 7721 return MatchOperand_Success; 7722 } 7723 7724 //===----------------------------------------------------------------------===// 7725 // dpp 7726 //===----------------------------------------------------------------------===// 7727 7728 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7729 SMLoc S = getLoc(); 7730 7731 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7732 return MatchOperand_NoMatch; 7733 7734 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7735 7736 int64_t Sels[8]; 7737 7738 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7739 return MatchOperand_ParseFail; 7740 7741 for (size_t i = 0; i < 8; ++i) { 7742 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7743 return MatchOperand_ParseFail; 7744 7745 SMLoc Loc = getLoc(); 7746 if (getParser().parseAbsoluteExpression(Sels[i])) 7747 return MatchOperand_ParseFail; 7748 if (0 > Sels[i] || 7 < Sels[i]) { 7749 Error(Loc, "expected a 3-bit value"); 7750 return MatchOperand_ParseFail; 7751 } 7752 } 7753 7754 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7755 return MatchOperand_ParseFail; 7756 7757 unsigned DPP8 = 0; 7758 for (size_t i = 0; i < 8; ++i) 7759 DPP8 |= (Sels[i] << (i * 3)); 7760 7761 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7762 return MatchOperand_Success; 7763 } 7764 7765 bool 7766 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7767 const OperandVector &Operands) { 7768 if (Ctrl == "row_newbcast") 7769 return isGFX90A(); 7770 7771 if (Ctrl == "row_share" || 7772 Ctrl == "row_xmask") 7773 return isGFX10Plus(); 7774 7775 if (Ctrl == "wave_shl" || 7776 Ctrl == "wave_shr" || 7777 Ctrl == "wave_rol" || 7778 Ctrl == "wave_ror" || 7779 Ctrl == "row_bcast") 7780 return isVI() || isGFX9(); 7781 7782 return Ctrl == "row_mirror" || 7783 Ctrl == "row_half_mirror" || 7784 Ctrl == "quad_perm" || 7785 Ctrl == "row_shl" || 7786 Ctrl == "row_shr" || 7787 Ctrl == "row_ror"; 7788 } 7789 7790 int64_t 7791 AMDGPUAsmParser::parseDPPCtrlPerm() { 7792 // quad_perm:[%d,%d,%d,%d] 7793 7794 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7795 return -1; 7796 7797 int64_t Val = 0; 7798 for (int i = 0; i < 4; ++i) { 7799 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7800 return -1; 7801 7802 int64_t Temp; 7803 SMLoc Loc = getLoc(); 7804 if (getParser().parseAbsoluteExpression(Temp)) 7805 return -1; 7806 if (Temp < 0 || Temp > 3) { 7807 Error(Loc, "expected a 2-bit value"); 7808 return -1; 7809 } 7810 7811 Val += (Temp << i * 2); 7812 } 7813 7814 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7815 return -1; 7816 7817 return Val; 7818 } 7819 7820 int64_t 7821 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7822 using namespace AMDGPU::DPP; 7823 7824 // sel:%d 7825 7826 int64_t Val; 7827 SMLoc Loc = getLoc(); 7828 7829 if (getParser().parseAbsoluteExpression(Val)) 7830 return -1; 7831 7832 struct DppCtrlCheck { 7833 int64_t Ctrl; 7834 int Lo; 7835 int Hi; 7836 }; 7837 7838 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7839 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7840 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7841 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7842 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7843 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7844 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7845 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7846 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7847 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7848 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7849 .Default({-1, 0, 0}); 7850 7851 bool Valid; 7852 if (Check.Ctrl == -1) { 7853 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7854 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7855 } else { 7856 Valid = Check.Lo <= Val && Val <= Check.Hi; 7857 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7858 } 7859 7860 if (!Valid) { 7861 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7862 return -1; 7863 } 7864 7865 return Val; 7866 } 7867 7868 OperandMatchResultTy 7869 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7870 using namespace AMDGPU::DPP; 7871 7872 if (!isToken(AsmToken::Identifier) || 7873 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7874 return MatchOperand_NoMatch; 7875 7876 SMLoc S = getLoc(); 7877 int64_t Val = -1; 7878 StringRef Ctrl; 7879 7880 parseId(Ctrl); 7881 7882 if (Ctrl == "row_mirror") { 7883 Val = DppCtrl::ROW_MIRROR; 7884 } else if (Ctrl == "row_half_mirror") { 7885 Val = DppCtrl::ROW_HALF_MIRROR; 7886 } else { 7887 if (skipToken(AsmToken::Colon, "expected a colon")) { 7888 if (Ctrl == "quad_perm") { 7889 Val = parseDPPCtrlPerm(); 7890 } else { 7891 Val = parseDPPCtrlSel(Ctrl); 7892 } 7893 } 7894 } 7895 7896 if (Val == -1) 7897 return MatchOperand_ParseFail; 7898 7899 Operands.push_back( 7900 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7901 return MatchOperand_Success; 7902 } 7903 7904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7905 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7906 } 7907 7908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7909 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7910 } 7911 7912 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7913 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7914 } 7915 7916 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7917 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7918 } 7919 7920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7921 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7922 } 7923 7924 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7925 OptionalImmIndexMap OptionalIdx; 7926 7927 unsigned I = 1; 7928 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7929 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7930 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7931 } 7932 7933 int Fi = 0; 7934 for (unsigned E = Operands.size(); I != E; ++I) { 7935 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7936 MCOI::TIED_TO); 7937 if (TiedTo != -1) { 7938 assert((unsigned)TiedTo < Inst.getNumOperands()); 7939 // handle tied old or src2 for MAC instructions 7940 Inst.addOperand(Inst.getOperand(TiedTo)); 7941 } 7942 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7943 // Add the register arguments 7944 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7945 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7946 // Skip it. 7947 continue; 7948 } 7949 7950 if (IsDPP8) { 7951 if (Op.isDPP8()) { 7952 Op.addImmOperands(Inst, 1); 7953 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7954 Op.addRegWithFPInputModsOperands(Inst, 2); 7955 } else if (Op.isFI()) { 7956 Fi = Op.getImm(); 7957 } else if (Op.isReg()) { 7958 Op.addRegOperands(Inst, 1); 7959 } else { 7960 llvm_unreachable("Invalid operand type"); 7961 } 7962 } else { 7963 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7964 Op.addRegWithFPInputModsOperands(Inst, 2); 7965 } else if (Op.isDPPCtrl()) { 7966 Op.addImmOperands(Inst, 1); 7967 } else if (Op.isImm()) { 7968 // Handle optional arguments 7969 OptionalIdx[Op.getImmTy()] = I; 7970 } else { 7971 llvm_unreachable("Invalid operand type"); 7972 } 7973 } 7974 } 7975 7976 if (IsDPP8) { 7977 using namespace llvm::AMDGPU::DPP; 7978 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7979 } else { 7980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7983 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7985 } 7986 } 7987 } 7988 7989 //===----------------------------------------------------------------------===// 7990 // sdwa 7991 //===----------------------------------------------------------------------===// 7992 7993 OperandMatchResultTy 7994 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7995 AMDGPUOperand::ImmTy Type) { 7996 using namespace llvm::AMDGPU::SDWA; 7997 7998 SMLoc S = getLoc(); 7999 StringRef Value; 8000 OperandMatchResultTy res; 8001 8002 SMLoc StringLoc; 8003 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8004 if (res != MatchOperand_Success) { 8005 return res; 8006 } 8007 8008 int64_t Int; 8009 Int = StringSwitch<int64_t>(Value) 8010 .Case("BYTE_0", SdwaSel::BYTE_0) 8011 .Case("BYTE_1", SdwaSel::BYTE_1) 8012 .Case("BYTE_2", SdwaSel::BYTE_2) 8013 .Case("BYTE_3", SdwaSel::BYTE_3) 8014 .Case("WORD_0", SdwaSel::WORD_0) 8015 .Case("WORD_1", SdwaSel::WORD_1) 8016 .Case("DWORD", SdwaSel::DWORD) 8017 .Default(0xffffffff); 8018 8019 if (Int == 0xffffffff) { 8020 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8021 return MatchOperand_ParseFail; 8022 } 8023 8024 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8025 return MatchOperand_Success; 8026 } 8027 8028 OperandMatchResultTy 8029 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8030 using namespace llvm::AMDGPU::SDWA; 8031 8032 SMLoc S = getLoc(); 8033 StringRef Value; 8034 OperandMatchResultTy res; 8035 8036 SMLoc StringLoc; 8037 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8038 if (res != MatchOperand_Success) { 8039 return res; 8040 } 8041 8042 int64_t Int; 8043 Int = StringSwitch<int64_t>(Value) 8044 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8045 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8046 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8047 .Default(0xffffffff); 8048 8049 if (Int == 0xffffffff) { 8050 Error(StringLoc, "invalid dst_unused value"); 8051 return MatchOperand_ParseFail; 8052 } 8053 8054 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8055 return MatchOperand_Success; 8056 } 8057 8058 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8059 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8060 } 8061 8062 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8063 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8064 } 8065 8066 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8067 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8068 } 8069 8070 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8071 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8072 } 8073 8074 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8075 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8076 } 8077 8078 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8079 uint64_t BasicInstType, 8080 bool SkipDstVcc, 8081 bool SkipSrcVcc) { 8082 using namespace llvm::AMDGPU::SDWA; 8083 8084 OptionalImmIndexMap OptionalIdx; 8085 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8086 bool SkippedVcc = false; 8087 8088 unsigned I = 1; 8089 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8090 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8091 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8092 } 8093 8094 for (unsigned E = Operands.size(); I != E; ++I) { 8095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8096 if (SkipVcc && !SkippedVcc && Op.isReg() && 8097 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8098 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8099 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8100 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8101 // Skip VCC only if we didn't skip it on previous iteration. 8102 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8103 if (BasicInstType == SIInstrFlags::VOP2 && 8104 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8105 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8106 SkippedVcc = true; 8107 continue; 8108 } else if (BasicInstType == SIInstrFlags::VOPC && 8109 Inst.getNumOperands() == 0) { 8110 SkippedVcc = true; 8111 continue; 8112 } 8113 } 8114 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8115 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8116 } else if (Op.isImm()) { 8117 // Handle optional arguments 8118 OptionalIdx[Op.getImmTy()] = I; 8119 } else { 8120 llvm_unreachable("Invalid operand type"); 8121 } 8122 SkippedVcc = false; 8123 } 8124 8125 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8126 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8127 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8128 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8129 switch (BasicInstType) { 8130 case SIInstrFlags::VOP1: 8131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8132 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8134 } 8135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8138 break; 8139 8140 case SIInstrFlags::VOP2: 8141 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8142 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8144 } 8145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8146 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8148 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8149 break; 8150 8151 case SIInstrFlags::VOPC: 8152 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8156 break; 8157 8158 default: 8159 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8160 } 8161 } 8162 8163 // special case v_mac_{f16, f32}: 8164 // it has src2 register operand that is tied to dst operand 8165 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8166 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8167 auto it = Inst.begin(); 8168 std::advance( 8169 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8170 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8171 } 8172 } 8173 8174 //===----------------------------------------------------------------------===// 8175 // mAI 8176 //===----------------------------------------------------------------------===// 8177 8178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8179 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8180 } 8181 8182 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8183 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8184 } 8185 8186 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8187 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8188 } 8189 8190 /// Force static initialization. 8191 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8192 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8193 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8194 } 8195 8196 #define GET_REGISTER_MATCHER 8197 #define GET_MATCHER_IMPLEMENTATION 8198 #define GET_MNEMONIC_SPELL_CHECKER 8199 #define GET_MNEMONIC_CHECKER 8200 #include "AMDGPUGenAsmMatcher.inc" 8201 8202 // This fuction should be defined after auto-generated include so that we have 8203 // MatchClassKind enum defined 8204 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8205 unsigned Kind) { 8206 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8207 // But MatchInstructionImpl() expects to meet token and fails to validate 8208 // operand. This method checks if we are given immediate operand but expect to 8209 // get corresponding token. 8210 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8211 switch (Kind) { 8212 case MCK_addr64: 8213 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8214 case MCK_gds: 8215 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8216 case MCK_lds: 8217 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8218 case MCK_idxen: 8219 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8220 case MCK_offen: 8221 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8222 case MCK_SSrcB32: 8223 // When operands have expression values, they will return true for isToken, 8224 // because it is not possible to distinguish between a token and an 8225 // expression at parse time. MatchInstructionImpl() will always try to 8226 // match an operand as a token, when isToken returns true, and when the 8227 // name of the expression is not a valid token, the match will fail, 8228 // so we need to handle it here. 8229 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8230 case MCK_SSrcF32: 8231 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8232 case MCK_SoppBrTarget: 8233 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8234 case MCK_VReg32OrOff: 8235 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8236 case MCK_InterpSlot: 8237 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8238 case MCK_Attr: 8239 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8240 case MCK_AttrChan: 8241 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8242 case MCK_ImmSMEMOffset: 8243 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8244 case MCK_SReg_64: 8245 case MCK_SReg_64_XEXEC: 8246 // Null is defined as a 32-bit register but 8247 // it should also be enabled with 64-bit operands. 8248 // The following code enables it for SReg_64 operands 8249 // used as source and destination. Remaining source 8250 // operands are handled in isInlinableImm. 8251 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8252 default: 8253 return Match_InvalidOperand; 8254 } 8255 } 8256 8257 //===----------------------------------------------------------------------===// 8258 // endpgm 8259 //===----------------------------------------------------------------------===// 8260 8261 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8262 SMLoc S = getLoc(); 8263 int64_t Imm = 0; 8264 8265 if (!parseExpr(Imm)) { 8266 // The operand is optional, if not present default to 0 8267 Imm = 0; 8268 } 8269 8270 if (!isUInt<16>(Imm)) { 8271 Error(S, "expected a 16-bit value"); 8272 return MatchOperand_ParseFail; 8273 } 8274 8275 Operands.push_back( 8276 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8277 return MatchOperand_Success; 8278 } 8279 8280 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8281