1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and 342 // forced value of the GLC operand. 343 bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); } 344 bool isSWZ() const { return isImmTy(ImmTySWZ); } 345 bool isTFE() const { return isImmTy(ImmTyTFE); } 346 bool isD16() const { return isImmTy(ImmTyD16); } 347 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 348 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 349 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 350 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 351 bool isFI() const { return isImmTy(ImmTyDppFi); } 352 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 353 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 354 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 355 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 356 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 357 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 358 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 359 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 360 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 361 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 362 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 363 bool isHigh() const { return isImmTy(ImmTyHigh); } 364 365 bool isMod() const { 366 return isClampSI() || isOModSI(); 367 } 368 369 bool isRegOrImm() const { 370 return isReg() || isImm(); 371 } 372 373 bool isRegClass(unsigned RCID) const; 374 375 bool isInlineValue() const; 376 377 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 378 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 379 } 380 381 bool isSCSrcB16() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 383 } 384 385 bool isSCSrcV2B16() const { 386 return isSCSrcB16(); 387 } 388 389 bool isSCSrcB32() const { 390 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 391 } 392 393 bool isSCSrcB64() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 395 } 396 397 bool isBoolReg() const; 398 399 bool isSCSrcF16() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 401 } 402 403 bool isSCSrcV2F16() const { 404 return isSCSrcF16(); 405 } 406 407 bool isSCSrcF32() const { 408 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 409 } 410 411 bool isSCSrcF64() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 413 } 414 415 bool isSSrcB32() const { 416 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 417 } 418 419 bool isSSrcB16() const { 420 return isSCSrcB16() || isLiteralImm(MVT::i16); 421 } 422 423 bool isSSrcV2B16() const { 424 llvm_unreachable("cannot happen"); 425 return isSSrcB16(); 426 } 427 428 bool isSSrcB64() const { 429 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 430 // See isVSrc64(). 431 return isSCSrcB64() || isLiteralImm(MVT::i64); 432 } 433 434 bool isSSrcF32() const { 435 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 436 } 437 438 bool isSSrcF64() const { 439 return isSCSrcB64() || isLiteralImm(MVT::f64); 440 } 441 442 bool isSSrcF16() const { 443 return isSCSrcB16() || isLiteralImm(MVT::f16); 444 } 445 446 bool isSSrcV2F16() const { 447 llvm_unreachable("cannot happen"); 448 return isSSrcF16(); 449 } 450 451 bool isSSrcV2FP32() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcF32(); 454 } 455 456 bool isSCSrcV2FP32() const { 457 llvm_unreachable("cannot happen"); 458 return isSCSrcF32(); 459 } 460 461 bool isSSrcV2INT32() const { 462 llvm_unreachable("cannot happen"); 463 return isSSrcB32(); 464 } 465 466 bool isSCSrcV2INT32() const { 467 llvm_unreachable("cannot happen"); 468 return isSCSrcB32(); 469 } 470 471 bool isSSrcOrLdsB32() const { 472 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 473 isLiteralImm(MVT::i32) || isExpr(); 474 } 475 476 bool isVCSrcB32() const { 477 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 478 } 479 480 bool isVCSrcB64() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 482 } 483 484 bool isVCSrcB16() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 486 } 487 488 bool isVCSrcV2B16() const { 489 return isVCSrcB16(); 490 } 491 492 bool isVCSrcF32() const { 493 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 494 } 495 496 bool isVCSrcF64() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 498 } 499 500 bool isVCSrcF16() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 502 } 503 504 bool isVCSrcV2F16() const { 505 return isVCSrcF16(); 506 } 507 508 bool isVSrcB32() const { 509 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 510 } 511 512 bool isVSrcB64() const { 513 return isVCSrcF64() || isLiteralImm(MVT::i64); 514 } 515 516 bool isVSrcB16() const { 517 return isVCSrcB16() || isLiteralImm(MVT::i16); 518 } 519 520 bool isVSrcV2B16() const { 521 return isVSrcB16() || isLiteralImm(MVT::v2i16); 522 } 523 524 bool isVCSrcV2FP32() const { 525 return isVCSrcF64(); 526 } 527 528 bool isVSrcV2FP32() const { 529 return isVSrcF64() || isLiteralImm(MVT::v2f32); 530 } 531 532 bool isVCSrcV2INT32() const { 533 return isVCSrcB64(); 534 } 535 536 bool isVSrcV2INT32() const { 537 return isVSrcB64() || isLiteralImm(MVT::v2i32); 538 } 539 540 bool isVSrcF32() const { 541 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 542 } 543 544 bool isVSrcF64() const { 545 return isVCSrcF64() || isLiteralImm(MVT::f64); 546 } 547 548 bool isVSrcF16() const { 549 return isVCSrcF16() || isLiteralImm(MVT::f16); 550 } 551 552 bool isVSrcV2F16() const { 553 return isVSrcF16() || isLiteralImm(MVT::v2f16); 554 } 555 556 bool isVISrcB32() const { 557 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 558 } 559 560 bool isVISrcB16() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 562 } 563 564 bool isVISrcV2B16() const { 565 return isVISrcB16(); 566 } 567 568 bool isVISrcF32() const { 569 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 570 } 571 572 bool isVISrcF16() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 574 } 575 576 bool isVISrcV2F16() const { 577 return isVISrcF16() || isVISrcB32(); 578 } 579 580 bool isVISrc_64B64() const { 581 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 582 } 583 584 bool isVISrc_64F64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 586 } 587 588 bool isVISrc_64V2FP32() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 590 } 591 592 bool isVISrc_64V2INT32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 594 } 595 596 bool isVISrc_256B64() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 598 } 599 600 bool isVISrc_256F64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 602 } 603 604 bool isVISrc_128B16() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 606 } 607 608 bool isVISrc_128V2B16() const { 609 return isVISrc_128B16(); 610 } 611 612 bool isVISrc_128B32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 614 } 615 616 bool isVISrc_128F32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 618 } 619 620 bool isVISrc_256V2FP32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2INT32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 626 } 627 628 bool isVISrc_512B32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B16() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 634 } 635 636 bool isVISrc_512V2B16() const { 637 return isVISrc_512B16(); 638 } 639 640 bool isVISrc_512F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_512F16() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 646 } 647 648 bool isVISrc_512V2F16() const { 649 return isVISrc_512F16() || isVISrc_512B32(); 650 } 651 652 bool isVISrc_1024B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_1024B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_1024V2B16() const { 661 return isVISrc_1024B16(); 662 } 663 664 bool isVISrc_1024F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_1024F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_1024V2F16() const { 673 return isVISrc_1024F16() || isVISrc_1024B32(); 674 } 675 676 bool isAISrcB32() const { 677 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 678 } 679 680 bool isAISrcB16() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 682 } 683 684 bool isAISrcV2B16() const { 685 return isAISrcB16(); 686 } 687 688 bool isAISrcF32() const { 689 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 690 } 691 692 bool isAISrcF16() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 694 } 695 696 bool isAISrcV2F16() const { 697 return isAISrcF16() || isAISrcB32(); 698 } 699 700 bool isAISrc_64B64() const { 701 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 702 } 703 704 bool isAISrc_64F64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 706 } 707 708 bool isAISrc_128B32() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 710 } 711 712 bool isAISrc_128B16() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 714 } 715 716 bool isAISrc_128V2B16() const { 717 return isAISrc_128B16(); 718 } 719 720 bool isAISrc_128F32() const { 721 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 722 } 723 724 bool isAISrc_128F16() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 726 } 727 728 bool isAISrc_128V2F16() const { 729 return isAISrc_128F16() || isAISrc_128B32(); 730 } 731 732 bool isVISrc_128F16() const { 733 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 734 } 735 736 bool isVISrc_128V2F16() const { 737 return isVISrc_128F16() || isVISrc_128B32(); 738 } 739 740 bool isAISrc_256B64() const { 741 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 742 } 743 744 bool isAISrc_256F64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 746 } 747 748 bool isAISrc_512B32() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 750 } 751 752 bool isAISrc_512B16() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 754 } 755 756 bool isAISrc_512V2B16() const { 757 return isAISrc_512B16(); 758 } 759 760 bool isAISrc_512F32() const { 761 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 762 } 763 764 bool isAISrc_512F16() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 766 } 767 768 bool isAISrc_512V2F16() const { 769 return isAISrc_512F16() || isAISrc_512B32(); 770 } 771 772 bool isAISrc_1024B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_1024B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_1024V2B16() const { 781 return isAISrc_1024B16(); 782 } 783 784 bool isAISrc_1024F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_1024F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_1024V2F16() const { 793 return isAISrc_1024F16() || isAISrc_1024B32(); 794 } 795 796 bool isKImmFP32() const { 797 return isLiteralImm(MVT::f32); 798 } 799 800 bool isKImmFP16() const { 801 return isLiteralImm(MVT::f16); 802 } 803 804 bool isMem() const override { 805 return false; 806 } 807 808 bool isExpr() const { 809 return Kind == Expression; 810 } 811 812 bool isSoppBrTarget() const { 813 return isExpr() || isImm(); 814 } 815 816 bool isSWaitCnt() const; 817 bool isHwreg() const; 818 bool isSendMsg() const; 819 bool isSwizzle() const; 820 bool isSMRDOffset8() const; 821 bool isSMEMOffset() const; 822 bool isSMRDLiteralOffset() const; 823 bool isDPP8() const; 824 bool isDPPCtrl() const; 825 bool isBLGP() const; 826 bool isCBSZ() const; 827 bool isABID() const; 828 bool isGPRIdxMode() const; 829 bool isS16Imm() const; 830 bool isU16Imm() const; 831 bool isEndpgm() const; 832 833 StringRef getExpressionAsToken() const { 834 assert(isExpr()); 835 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 836 return S->getSymbol().getName(); 837 } 838 839 StringRef getToken() const { 840 assert(isToken()); 841 842 if (Kind == Expression) 843 return getExpressionAsToken(); 844 845 return StringRef(Tok.Data, Tok.Length); 846 } 847 848 int64_t getImm() const { 849 assert(isImm()); 850 return Imm.Val; 851 } 852 853 void setImm(int64_t Val) { 854 assert(isImm()); 855 Imm.Val = Val; 856 } 857 858 ImmTy getImmTy() const { 859 assert(isImm()); 860 return Imm.Type; 861 } 862 863 unsigned getReg() const override { 864 assert(isRegKind()); 865 return Reg.RegNo; 866 } 867 868 SMLoc getStartLoc() const override { 869 return StartLoc; 870 } 871 872 SMLoc getEndLoc() const override { 873 return EndLoc; 874 } 875 876 SMRange getLocRange() const { 877 return SMRange(StartLoc, EndLoc); 878 } 879 880 Modifiers getModifiers() const { 881 assert(isRegKind() || isImmTy(ImmTyNone)); 882 return isRegKind() ? Reg.Mods : Imm.Mods; 883 } 884 885 void setModifiers(Modifiers Mods) { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 if (isRegKind()) 888 Reg.Mods = Mods; 889 else 890 Imm.Mods = Mods; 891 } 892 893 bool hasModifiers() const { 894 return getModifiers().hasModifiers(); 895 } 896 897 bool hasFPModifiers() const { 898 return getModifiers().hasFPModifiers(); 899 } 900 901 bool hasIntModifiers() const { 902 return getModifiers().hasIntModifiers(); 903 } 904 905 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 906 907 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 908 909 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 910 911 template <unsigned Bitwidth> 912 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 913 914 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 915 addKImmFPOperands<16>(Inst, N); 916 } 917 918 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 919 addKImmFPOperands<32>(Inst, N); 920 } 921 922 void addRegOperands(MCInst &Inst, unsigned N) const; 923 924 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 925 addRegOperands(Inst, N); 926 } 927 928 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 929 if (isRegKind()) 930 addRegOperands(Inst, N); 931 else if (isExpr()) 932 Inst.addOperand(MCOperand::createExpr(Expr)); 933 else 934 addImmOperands(Inst, N); 935 } 936 937 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 938 Modifiers Mods = getModifiers(); 939 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 940 if (isRegKind()) { 941 addRegOperands(Inst, N); 942 } else { 943 addImmOperands(Inst, N, false); 944 } 945 } 946 947 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 948 assert(!hasIntModifiers()); 949 addRegOrImmWithInputModsOperands(Inst, N); 950 } 951 952 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasFPModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 958 Modifiers Mods = getModifiers(); 959 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 960 assert(isRegKind()); 961 addRegOperands(Inst, N); 962 } 963 964 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 965 assert(!hasIntModifiers()); 966 addRegWithInputModsOperands(Inst, N); 967 } 968 969 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasFPModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 975 if (isImm()) 976 addImmOperands(Inst, N); 977 else { 978 assert(isExpr()); 979 Inst.addOperand(MCOperand::createExpr(Expr)); 980 } 981 } 982 983 static void printImmTy(raw_ostream& OS, ImmTy Type) { 984 switch (Type) { 985 case ImmTyNone: OS << "None"; break; 986 case ImmTyGDS: OS << "GDS"; break; 987 case ImmTyLDS: OS << "LDS"; break; 988 case ImmTyOffen: OS << "Offen"; break; 989 case ImmTyIdxen: OS << "Idxen"; break; 990 case ImmTyAddr64: OS << "Addr64"; break; 991 case ImmTyOffset: OS << "Offset"; break; 992 case ImmTyInstOffset: OS << "InstOffset"; break; 993 case ImmTyOffset0: OS << "Offset0"; break; 994 case ImmTyOffset1: OS << "Offset1"; break; 995 case ImmTyCPol: OS << "CPol"; break; 996 case ImmTySWZ: OS << "SWZ"; break; 997 case ImmTyTFE: OS << "TFE"; break; 998 case ImmTyD16: OS << "D16"; break; 999 case ImmTyFORMAT: OS << "FORMAT"; break; 1000 case ImmTyClampSI: OS << "ClampSI"; break; 1001 case ImmTyOModSI: OS << "OModSI"; break; 1002 case ImmTyDPP8: OS << "DPP8"; break; 1003 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1004 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1005 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1006 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1007 case ImmTyDppFi: OS << "FI"; break; 1008 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1009 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1010 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1011 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1012 case ImmTyDMask: OS << "DMask"; break; 1013 case ImmTyDim: OS << "Dim"; break; 1014 case ImmTyUNorm: OS << "UNorm"; break; 1015 case ImmTyDA: OS << "DA"; break; 1016 case ImmTyR128A16: OS << "R128A16"; break; 1017 case ImmTyA16: OS << "A16"; break; 1018 case ImmTyLWE: OS << "LWE"; break; 1019 case ImmTyOff: OS << "Off"; break; 1020 case ImmTyExpTgt: OS << "ExpTgt"; break; 1021 case ImmTyExpCompr: OS << "ExpCompr"; break; 1022 case ImmTyExpVM: OS << "ExpVM"; break; 1023 case ImmTyHwreg: OS << "Hwreg"; break; 1024 case ImmTySendMsg: OS << "SendMsg"; break; 1025 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1026 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1027 case ImmTyAttrChan: OS << "AttrChan"; break; 1028 case ImmTyOpSel: OS << "OpSel"; break; 1029 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1030 case ImmTyNegLo: OS << "NegLo"; break; 1031 case ImmTyNegHi: OS << "NegHi"; break; 1032 case ImmTySwizzle: OS << "Swizzle"; break; 1033 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1034 case ImmTyHigh: OS << "High"; break; 1035 case ImmTyBLGP: OS << "BLGP"; break; 1036 case ImmTyCBSZ: OS << "CBSZ"; break; 1037 case ImmTyABID: OS << "ABID"; break; 1038 case ImmTyEndpgm: OS << "Endpgm"; break; 1039 } 1040 } 1041 1042 void print(raw_ostream &OS) const override { 1043 switch (Kind) { 1044 case Register: 1045 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1046 break; 1047 case Immediate: 1048 OS << '<' << getImm(); 1049 if (getImmTy() != ImmTyNone) { 1050 OS << " type: "; printImmTy(OS, getImmTy()); 1051 } 1052 OS << " mods: " << Imm.Mods << '>'; 1053 break; 1054 case Token: 1055 OS << '\'' << getToken() << '\''; 1056 break; 1057 case Expression: 1058 OS << "<expr " << *Expr << '>'; 1059 break; 1060 } 1061 } 1062 1063 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1064 int64_t Val, SMLoc Loc, 1065 ImmTy Type = ImmTyNone, 1066 bool IsFPImm = false) { 1067 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1068 Op->Imm.Val = Val; 1069 Op->Imm.IsFPImm = IsFPImm; 1070 Op->Imm.Kind = ImmKindTyNone; 1071 Op->Imm.Type = Type; 1072 Op->Imm.Mods = Modifiers(); 1073 Op->StartLoc = Loc; 1074 Op->EndLoc = Loc; 1075 return Op; 1076 } 1077 1078 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1079 StringRef Str, SMLoc Loc, 1080 bool HasExplicitEncodingSize = true) { 1081 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1082 Res->Tok.Data = Str.data(); 1083 Res->Tok.Length = Str.size(); 1084 Res->StartLoc = Loc; 1085 Res->EndLoc = Loc; 1086 return Res; 1087 } 1088 1089 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1090 unsigned RegNo, SMLoc S, 1091 SMLoc E) { 1092 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1093 Op->Reg.RegNo = RegNo; 1094 Op->Reg.Mods = Modifiers(); 1095 Op->StartLoc = S; 1096 Op->EndLoc = E; 1097 return Op; 1098 } 1099 1100 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1101 const class MCExpr *Expr, SMLoc S) { 1102 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1103 Op->Expr = Expr; 1104 Op->StartLoc = S; 1105 Op->EndLoc = S; 1106 return Op; 1107 } 1108 }; 1109 1110 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1111 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1112 return OS; 1113 } 1114 1115 //===----------------------------------------------------------------------===// 1116 // AsmParser 1117 //===----------------------------------------------------------------------===// 1118 1119 // Holds info related to the current kernel, e.g. count of SGPRs used. 1120 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1121 // .amdgpu_hsa_kernel or at EOF. 1122 class KernelScopeInfo { 1123 int SgprIndexUnusedMin = -1; 1124 int VgprIndexUnusedMin = -1; 1125 MCContext *Ctx = nullptr; 1126 1127 void usesSgprAt(int i) { 1128 if (i >= SgprIndexUnusedMin) { 1129 SgprIndexUnusedMin = ++i; 1130 if (Ctx) { 1131 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1132 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1133 } 1134 } 1135 } 1136 1137 void usesVgprAt(int i) { 1138 if (i >= VgprIndexUnusedMin) { 1139 VgprIndexUnusedMin = ++i; 1140 if (Ctx) { 1141 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1142 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1143 } 1144 } 1145 } 1146 1147 public: 1148 KernelScopeInfo() = default; 1149 1150 void initialize(MCContext &Context) { 1151 Ctx = &Context; 1152 usesSgprAt(SgprIndexUnusedMin = -1); 1153 usesVgprAt(VgprIndexUnusedMin = -1); 1154 } 1155 1156 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1157 switch (RegKind) { 1158 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1159 case IS_AGPR: // fall through 1160 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1161 default: break; 1162 } 1163 } 1164 }; 1165 1166 class AMDGPUAsmParser : public MCTargetAsmParser { 1167 MCAsmParser &Parser; 1168 1169 // Number of extra operands parsed after the first optional operand. 1170 // This may be necessary to skip hardcoded mandatory operands. 1171 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1172 1173 unsigned ForcedEncodingSize = 0; 1174 bool ForcedDPP = false; 1175 bool ForcedSDWA = false; 1176 KernelScopeInfo KernelScope; 1177 unsigned CPolSeen; 1178 1179 /// @name Auto-generated Match Functions 1180 /// { 1181 1182 #define GET_ASSEMBLER_HEADER 1183 #include "AMDGPUGenAsmMatcher.inc" 1184 1185 /// } 1186 1187 private: 1188 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1189 bool OutOfRangeError(SMRange Range); 1190 /// Calculate VGPR/SGPR blocks required for given target, reserved 1191 /// registers, and user-specified NextFreeXGPR values. 1192 /// 1193 /// \param Features [in] Target features, used for bug corrections. 1194 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1195 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1196 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1197 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1198 /// descriptor field, if valid. 1199 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1200 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1201 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1202 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1203 /// \param VGPRBlocks [out] Result VGPR block count. 1204 /// \param SGPRBlocks [out] Result SGPR block count. 1205 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1206 bool FlatScrUsed, bool XNACKUsed, 1207 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1208 SMRange VGPRRange, unsigned NextFreeSGPR, 1209 SMRange SGPRRange, unsigned &VGPRBlocks, 1210 unsigned &SGPRBlocks); 1211 bool ParseDirectiveAMDGCNTarget(); 1212 bool ParseDirectiveAMDHSAKernel(); 1213 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1214 bool ParseDirectiveHSACodeObjectVersion(); 1215 bool ParseDirectiveHSACodeObjectISA(); 1216 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1217 bool ParseDirectiveAMDKernelCodeT(); 1218 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1219 bool ParseDirectiveAMDGPUHsaKernel(); 1220 1221 bool ParseDirectiveISAVersion(); 1222 bool ParseDirectiveHSAMetadata(); 1223 bool ParseDirectivePALMetadataBegin(); 1224 bool ParseDirectivePALMetadata(); 1225 bool ParseDirectiveAMDGPULDS(); 1226 1227 /// Common code to parse out a block of text (typically YAML) between start and 1228 /// end directives. 1229 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1230 const char *AssemblerDirectiveEnd, 1231 std::string &CollectString); 1232 1233 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1234 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1235 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1236 unsigned &RegNum, unsigned &RegWidth, 1237 bool RestoreOnFailure = false); 1238 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1239 unsigned &RegNum, unsigned &RegWidth, 1240 SmallVectorImpl<AsmToken> &Tokens); 1241 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1242 unsigned &RegWidth, 1243 SmallVectorImpl<AsmToken> &Tokens); 1244 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1245 unsigned &RegWidth, 1246 SmallVectorImpl<AsmToken> &Tokens); 1247 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1248 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1249 bool ParseRegRange(unsigned& Num, unsigned& Width); 1250 unsigned getRegularReg(RegisterKind RegKind, 1251 unsigned RegNum, 1252 unsigned RegWidth, 1253 SMLoc Loc); 1254 1255 bool isRegister(); 1256 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1257 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1258 void initializeGprCountSymbol(RegisterKind RegKind); 1259 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1260 unsigned RegWidth); 1261 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsAtomic, bool IsLds = false); 1263 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1264 bool IsGdsHardcoded); 1265 1266 public: 1267 enum AMDGPUMatchResultTy { 1268 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1269 }; 1270 enum OperandMode { 1271 OperandMode_Default, 1272 OperandMode_NSA, 1273 }; 1274 1275 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1276 1277 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1278 const MCInstrInfo &MII, 1279 const MCTargetOptions &Options) 1280 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1281 MCAsmParserExtension::Initialize(Parser); 1282 1283 if (getFeatureBits().none()) { 1284 // Set default features. 1285 copySTI().ToggleFeature("southern-islands"); 1286 } 1287 1288 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1289 1290 { 1291 // TODO: make those pre-defined variables read-only. 1292 // Currently there is none suitable machinery in the core llvm-mc for this. 1293 // MCSymbol::isRedefinable is intended for another purpose, and 1294 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1295 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1296 MCContext &Ctx = getContext(); 1297 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1298 MCSymbol *Sym = 1299 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1303 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1305 } else { 1306 MCSymbol *Sym = 1307 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1313 } 1314 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1315 initializeGprCountSymbol(IS_VGPR); 1316 initializeGprCountSymbol(IS_SGPR); 1317 } else 1318 KernelScope.initialize(getContext()); 1319 } 1320 } 1321 1322 bool hasXNACK() const { 1323 return AMDGPU::hasXNACK(getSTI()); 1324 } 1325 1326 bool hasMIMG_R128() const { 1327 return AMDGPU::hasMIMG_R128(getSTI()); 1328 } 1329 1330 bool hasPackedD16() const { 1331 return AMDGPU::hasPackedD16(getSTI()); 1332 } 1333 1334 bool hasGFX10A16() const { 1335 return AMDGPU::hasGFX10A16(getSTI()); 1336 } 1337 1338 bool isSI() const { 1339 return AMDGPU::isSI(getSTI()); 1340 } 1341 1342 bool isCI() const { 1343 return AMDGPU::isCI(getSTI()); 1344 } 1345 1346 bool isVI() const { 1347 return AMDGPU::isVI(getSTI()); 1348 } 1349 1350 bool isGFX9() const { 1351 return AMDGPU::isGFX9(getSTI()); 1352 } 1353 1354 bool isGFX90A() const { 1355 return AMDGPU::isGFX90A(getSTI()); 1356 } 1357 1358 bool isGFX9Plus() const { 1359 return AMDGPU::isGFX9Plus(getSTI()); 1360 } 1361 1362 bool isGFX10() const { 1363 return AMDGPU::isGFX10(getSTI()); 1364 } 1365 1366 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1367 1368 bool isGFX10_BEncoding() const { 1369 return AMDGPU::isGFX10_BEncoding(getSTI()); 1370 } 1371 1372 bool hasInv2PiInlineImm() const { 1373 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1374 } 1375 1376 bool hasFlatOffsets() const { 1377 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateVccOperand(unsigned Reg) const; 1544 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1545 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateAGPRLdSt(const MCInst &Inst) const; 1547 bool validateVGPRAlign(const MCInst &Inst) const; 1548 bool validateDivScale(const MCInst &Inst); 1549 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1550 const SMLoc &IDLoc); 1551 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1552 unsigned getConstantBusLimit(unsigned Opcode) const; 1553 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1554 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1555 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1556 1557 bool isSupportedMnemo(StringRef Mnemo, 1558 const FeatureBitset &FBS); 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS, 1561 ArrayRef<unsigned> Variants); 1562 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1563 1564 bool isId(const StringRef Id) const; 1565 bool isId(const AsmToken &Token, const StringRef Id) const; 1566 bool isToken(const AsmToken::TokenKind Kind) const; 1567 bool trySkipId(const StringRef Id); 1568 bool trySkipId(const StringRef Pref, const StringRef Id); 1569 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1570 bool trySkipToken(const AsmToken::TokenKind Kind); 1571 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1572 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1573 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1574 1575 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1576 AsmToken::TokenKind getTokenKind() const; 1577 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1578 bool parseExpr(OperandVector &Operands); 1579 StringRef getTokenStr() const; 1580 AsmToken peekToken(); 1581 AsmToken getToken() const; 1582 SMLoc getLoc() const; 1583 void lex(); 1584 1585 public: 1586 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1587 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1588 1589 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1590 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1591 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1592 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1593 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1594 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1595 1596 bool parseSwizzleOperand(int64_t &Op, 1597 const unsigned MinVal, 1598 const unsigned MaxVal, 1599 const StringRef ErrMsg, 1600 SMLoc &Loc); 1601 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1602 const unsigned MinVal, 1603 const unsigned MaxVal, 1604 const StringRef ErrMsg); 1605 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1606 bool parseSwizzleOffset(int64_t &Imm); 1607 bool parseSwizzleMacro(int64_t &Imm); 1608 bool parseSwizzleQuadPerm(int64_t &Imm); 1609 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1610 bool parseSwizzleBroadcast(int64_t &Imm); 1611 bool parseSwizzleSwap(int64_t &Imm); 1612 bool parseSwizzleReverse(int64_t &Imm); 1613 1614 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1615 int64_t parseGPRIdxMacro(); 1616 1617 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1618 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1619 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1620 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1621 1622 AMDGPUOperand::Ptr defaultCPol() const; 1623 AMDGPUOperand::Ptr defaultCPol_GLC1() const; 1624 1625 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1626 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1627 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1628 AMDGPUOperand::Ptr defaultFlatOffset() const; 1629 1630 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1631 1632 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1633 OptionalImmIndexMap &OptionalIdx); 1634 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1636 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1637 1638 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1639 1640 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1641 bool IsAtomic = false); 1642 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1643 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1646 1647 bool parseDimId(unsigned &Encoding); 1648 OperandMatchResultTy parseDim(OperandVector &Operands); 1649 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1650 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1651 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1652 int64_t parseDPPCtrlSel(StringRef Ctrl); 1653 int64_t parseDPPCtrlPerm(); 1654 AMDGPUOperand::Ptr defaultRowMask() const; 1655 AMDGPUOperand::Ptr defaultBankMask() const; 1656 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1657 AMDGPUOperand::Ptr defaultFI() const; 1658 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1659 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1660 1661 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1662 AMDGPUOperand::ImmTy Type); 1663 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1664 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1667 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1668 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1669 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1670 uint64_t BasicInstType, 1671 bool SkipDstVcc = false, 1672 bool SkipSrcVcc = false); 1673 1674 AMDGPUOperand::Ptr defaultBLGP() const; 1675 AMDGPUOperand::Ptr defaultCBSZ() const; 1676 AMDGPUOperand::Ptr defaultABID() const; 1677 1678 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1679 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1680 }; 1681 1682 struct OptionalOperand { 1683 const char *Name; 1684 AMDGPUOperand::ImmTy Type; 1685 bool IsBit; 1686 bool (*ConvertResult)(int64_t&); 1687 }; 1688 1689 } // end anonymous namespace 1690 1691 // May be called with integer type with equivalent bitwidth. 1692 static const fltSemantics *getFltSemantics(unsigned Size) { 1693 switch (Size) { 1694 case 4: 1695 return &APFloat::IEEEsingle(); 1696 case 8: 1697 return &APFloat::IEEEdouble(); 1698 case 2: 1699 return &APFloat::IEEEhalf(); 1700 default: 1701 llvm_unreachable("unsupported fp type"); 1702 } 1703 } 1704 1705 static const fltSemantics *getFltSemantics(MVT VT) { 1706 return getFltSemantics(VT.getSizeInBits() / 8); 1707 } 1708 1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1710 switch (OperandType) { 1711 case AMDGPU::OPERAND_REG_IMM_INT32: 1712 case AMDGPU::OPERAND_REG_IMM_FP32: 1713 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1715 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1716 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1717 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1718 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1720 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1721 return &APFloat::IEEEsingle(); 1722 case AMDGPU::OPERAND_REG_IMM_INT64: 1723 case AMDGPU::OPERAND_REG_IMM_FP64: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1727 return &APFloat::IEEEdouble(); 1728 case AMDGPU::OPERAND_REG_IMM_INT16: 1729 case AMDGPU::OPERAND_REG_IMM_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1738 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1740 return &APFloat::IEEEhalf(); 1741 default: 1742 llvm_unreachable("unsupported fp type"); 1743 } 1744 } 1745 1746 //===----------------------------------------------------------------------===// 1747 // Operand 1748 //===----------------------------------------------------------------------===// 1749 1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1751 bool Lost; 1752 1753 // Convert literal to single precision 1754 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1755 APFloat::rmNearestTiesToEven, 1756 &Lost); 1757 // We allow precision lost but not overflow or underflow 1758 if (Status != APFloat::opOK && 1759 Lost && 1760 ((Status & APFloat::opOverflow) != 0 || 1761 (Status & APFloat::opUnderflow) != 0)) { 1762 return false; 1763 } 1764 1765 return true; 1766 } 1767 1768 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1769 return isUIntN(Size, Val) || isIntN(Size, Val); 1770 } 1771 1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1773 if (VT.getScalarType() == MVT::i16) { 1774 // FP immediate values are broken. 1775 return isInlinableIntLiteral(Val); 1776 } 1777 1778 // f16/v2f16 operands work correctly for all values. 1779 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1780 } 1781 1782 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1783 1784 // This is a hack to enable named inline values like 1785 // shared_base with both 32-bit and 64-bit operands. 1786 // Note that these values are defined as 1787 // 32-bit operands only. 1788 if (isInlineValue()) { 1789 return true; 1790 } 1791 1792 if (!isImmTy(ImmTyNone)) { 1793 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1794 return false; 1795 } 1796 // TODO: We should avoid using host float here. It would be better to 1797 // check the float bit values which is what a few other places do. 1798 // We've had bot failures before due to weird NaN support on mips hosts. 1799 1800 APInt Literal(64, Imm.Val); 1801 1802 if (Imm.IsFPImm) { // We got fp literal token 1803 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1804 return AMDGPU::isInlinableLiteral64(Imm.Val, 1805 AsmParser->hasInv2PiInlineImm()); 1806 } 1807 1808 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1809 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1810 return false; 1811 1812 if (type.getScalarSizeInBits() == 16) { 1813 return isInlineableLiteralOp16( 1814 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1815 type, AsmParser->hasInv2PiInlineImm()); 1816 } 1817 1818 // Check if single precision literal is inlinable 1819 return AMDGPU::isInlinableLiteral32( 1820 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1821 AsmParser->hasInv2PiInlineImm()); 1822 } 1823 1824 // We got int literal token. 1825 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1826 return AMDGPU::isInlinableLiteral64(Imm.Val, 1827 AsmParser->hasInv2PiInlineImm()); 1828 } 1829 1830 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1831 return false; 1832 } 1833 1834 if (type.getScalarSizeInBits() == 16) { 1835 return isInlineableLiteralOp16( 1836 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1837 type, AsmParser->hasInv2PiInlineImm()); 1838 } 1839 1840 return AMDGPU::isInlinableLiteral32( 1841 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1842 AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1846 // Check that this immediate can be added as literal 1847 if (!isImmTy(ImmTyNone)) { 1848 return false; 1849 } 1850 1851 if (!Imm.IsFPImm) { 1852 // We got int literal token. 1853 1854 if (type == MVT::f64 && hasFPModifiers()) { 1855 // Cannot apply fp modifiers to int literals preserving the same semantics 1856 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1857 // disable these cases. 1858 return false; 1859 } 1860 1861 unsigned Size = type.getSizeInBits(); 1862 if (Size == 64) 1863 Size = 32; 1864 1865 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1866 // types. 1867 return isSafeTruncation(Imm.Val, Size); 1868 } 1869 1870 // We got fp literal token 1871 if (type == MVT::f64) { // Expected 64-bit fp operand 1872 // We would set low 64-bits of literal to zeroes but we accept this literals 1873 return true; 1874 } 1875 1876 if (type == MVT::i64) { // Expected 64-bit int operand 1877 // We don't allow fp literals in 64-bit integer instructions. It is 1878 // unclear how we should encode them. 1879 return false; 1880 } 1881 1882 // We allow fp literals with f16x2 operands assuming that the specified 1883 // literal goes into the lower half and the upper half is zero. We also 1884 // require that the literal may be losslesly converted to f16. 1885 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1886 (type == MVT::v2i16)? MVT::i16 : 1887 (type == MVT::v2f32)? MVT::f32 : type; 1888 1889 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1890 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1891 } 1892 1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1894 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1895 } 1896 1897 bool AMDGPUOperand::isVRegWithInputMods() const { 1898 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1899 // GFX90A allows DPP on 64-bit operands. 1900 (isRegClass(AMDGPU::VReg_64RegClassID) && 1901 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1902 } 1903 1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1905 if (AsmParser->isVI()) 1906 return isVReg32(); 1907 else if (AsmParser->isGFX9Plus()) 1908 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1909 else 1910 return false; 1911 } 1912 1913 bool AMDGPUOperand::isSDWAFP16Operand() const { 1914 return isSDWAOperand(MVT::f16); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAFP32Operand() const { 1918 return isSDWAOperand(MVT::f32); 1919 } 1920 1921 bool AMDGPUOperand::isSDWAInt16Operand() const { 1922 return isSDWAOperand(MVT::i16); 1923 } 1924 1925 bool AMDGPUOperand::isSDWAInt32Operand() const { 1926 return isSDWAOperand(MVT::i32); 1927 } 1928 1929 bool AMDGPUOperand::isBoolReg() const { 1930 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1931 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1932 } 1933 1934 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1935 { 1936 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1937 assert(Size == 2 || Size == 4 || Size == 8); 1938 1939 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1940 1941 if (Imm.Mods.Abs) { 1942 Val &= ~FpSignMask; 1943 } 1944 if (Imm.Mods.Neg) { 1945 Val ^= FpSignMask; 1946 } 1947 1948 return Val; 1949 } 1950 1951 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1952 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1953 Inst.getNumOperands())) { 1954 addLiteralImmOperand(Inst, Imm.Val, 1955 ApplyModifiers & 1956 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1957 } else { 1958 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1959 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1960 setImmKindNone(); 1961 } 1962 } 1963 1964 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1965 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1966 auto OpNum = Inst.getNumOperands(); 1967 // Check that this operand accepts literals 1968 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1969 1970 if (ApplyModifiers) { 1971 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1972 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1973 Val = applyInputFPModifiers(Val, Size); 1974 } 1975 1976 APInt Literal(64, Val); 1977 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1978 1979 if (Imm.IsFPImm) { // We got fp literal token 1980 switch (OpTy) { 1981 case AMDGPU::OPERAND_REG_IMM_INT64: 1982 case AMDGPU::OPERAND_REG_IMM_FP64: 1983 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1984 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1985 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1986 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1987 AsmParser->hasInv2PiInlineImm())) { 1988 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1989 setImmKindConst(); 1990 return; 1991 } 1992 1993 // Non-inlineable 1994 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1995 // For fp operands we check if low 32 bits are zeros 1996 if (Literal.getLoBits(32) != 0) { 1997 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1998 "Can't encode literal as exact 64-bit floating-point operand. " 1999 "Low 32-bits will be set to zero"); 2000 } 2001 2002 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2003 setImmKindLiteral(); 2004 return; 2005 } 2006 2007 // We don't allow fp literals in 64-bit integer instructions. It is 2008 // unclear how we should encode them. This case should be checked earlier 2009 // in predicate methods (isLiteralImm()) 2010 llvm_unreachable("fp literal in 64-bit integer instruction."); 2011 2012 case AMDGPU::OPERAND_REG_IMM_INT32: 2013 case AMDGPU::OPERAND_REG_IMM_FP32: 2014 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2015 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2016 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2017 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2018 case AMDGPU::OPERAND_REG_IMM_INT16: 2019 case AMDGPU::OPERAND_REG_IMM_FP16: 2020 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2021 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2022 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2023 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2027 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2028 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2029 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2031 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2032 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2033 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2034 bool lost; 2035 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2036 // Convert literal to single precision 2037 FPLiteral.convert(*getOpFltSemantics(OpTy), 2038 APFloat::rmNearestTiesToEven, &lost); 2039 // We allow precision lost but not overflow or underflow. This should be 2040 // checked earlier in isLiteralImm() 2041 2042 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2043 Inst.addOperand(MCOperand::createImm(ImmVal)); 2044 setImmKindLiteral(); 2045 return; 2046 } 2047 default: 2048 llvm_unreachable("invalid operand size"); 2049 } 2050 2051 return; 2052 } 2053 2054 // We got int literal token. 2055 // Only sign extend inline immediates. 2056 switch (OpTy) { 2057 case AMDGPU::OPERAND_REG_IMM_INT32: 2058 case AMDGPU::OPERAND_REG_IMM_FP32: 2059 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2060 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2061 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2062 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2063 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2064 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2065 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2067 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2069 if (isSafeTruncation(Val, 32) && 2070 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2071 AsmParser->hasInv2PiInlineImm())) { 2072 Inst.addOperand(MCOperand::createImm(Val)); 2073 setImmKindConst(); 2074 return; 2075 } 2076 2077 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2078 setImmKindLiteral(); 2079 return; 2080 2081 case AMDGPU::OPERAND_REG_IMM_INT64: 2082 case AMDGPU::OPERAND_REG_IMM_FP64: 2083 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2084 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2086 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2087 Inst.addOperand(MCOperand::createImm(Val)); 2088 setImmKindConst(); 2089 return; 2090 } 2091 2092 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2093 setImmKindLiteral(); 2094 return; 2095 2096 case AMDGPU::OPERAND_REG_IMM_INT16: 2097 case AMDGPU::OPERAND_REG_IMM_FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2102 if (isSafeTruncation(Val, 16) && 2103 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2104 AsmParser->hasInv2PiInlineImm())) { 2105 Inst.addOperand(MCOperand::createImm(Val)); 2106 setImmKindConst(); 2107 return; 2108 } 2109 2110 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2111 setImmKindLiteral(); 2112 return; 2113 2114 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2118 assert(isSafeTruncation(Val, 16)); 2119 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2120 AsmParser->hasInv2PiInlineImm())); 2121 2122 Inst.addOperand(MCOperand::createImm(Val)); 2123 return; 2124 } 2125 default: 2126 llvm_unreachable("invalid operand size"); 2127 } 2128 } 2129 2130 template <unsigned Bitwidth> 2131 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2132 APInt Literal(64, Imm.Val); 2133 setImmKindNone(); 2134 2135 if (!Imm.IsFPImm) { 2136 // We got int literal token. 2137 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2138 return; 2139 } 2140 2141 bool Lost; 2142 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2143 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2144 APFloat::rmNearestTiesToEven, &Lost); 2145 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2146 } 2147 2148 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2149 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2150 } 2151 2152 static bool isInlineValue(unsigned Reg) { 2153 switch (Reg) { 2154 case AMDGPU::SRC_SHARED_BASE: 2155 case AMDGPU::SRC_SHARED_LIMIT: 2156 case AMDGPU::SRC_PRIVATE_BASE: 2157 case AMDGPU::SRC_PRIVATE_LIMIT: 2158 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2159 return true; 2160 case AMDGPU::SRC_VCCZ: 2161 case AMDGPU::SRC_EXECZ: 2162 case AMDGPU::SRC_SCC: 2163 return true; 2164 case AMDGPU::SGPR_NULL: 2165 return true; 2166 default: 2167 return false; 2168 } 2169 } 2170 2171 bool AMDGPUOperand::isInlineValue() const { 2172 return isRegKind() && ::isInlineValue(getReg()); 2173 } 2174 2175 //===----------------------------------------------------------------------===// 2176 // AsmParser 2177 //===----------------------------------------------------------------------===// 2178 2179 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2180 if (Is == IS_VGPR) { 2181 switch (RegWidth) { 2182 default: return -1; 2183 case 1: return AMDGPU::VGPR_32RegClassID; 2184 case 2: return AMDGPU::VReg_64RegClassID; 2185 case 3: return AMDGPU::VReg_96RegClassID; 2186 case 4: return AMDGPU::VReg_128RegClassID; 2187 case 5: return AMDGPU::VReg_160RegClassID; 2188 case 6: return AMDGPU::VReg_192RegClassID; 2189 case 8: return AMDGPU::VReg_256RegClassID; 2190 case 16: return AMDGPU::VReg_512RegClassID; 2191 case 32: return AMDGPU::VReg_1024RegClassID; 2192 } 2193 } else if (Is == IS_TTMP) { 2194 switch (RegWidth) { 2195 default: return -1; 2196 case 1: return AMDGPU::TTMP_32RegClassID; 2197 case 2: return AMDGPU::TTMP_64RegClassID; 2198 case 4: return AMDGPU::TTMP_128RegClassID; 2199 case 8: return AMDGPU::TTMP_256RegClassID; 2200 case 16: return AMDGPU::TTMP_512RegClassID; 2201 } 2202 } else if (Is == IS_SGPR) { 2203 switch (RegWidth) { 2204 default: return -1; 2205 case 1: return AMDGPU::SGPR_32RegClassID; 2206 case 2: return AMDGPU::SGPR_64RegClassID; 2207 case 3: return AMDGPU::SGPR_96RegClassID; 2208 case 4: return AMDGPU::SGPR_128RegClassID; 2209 case 5: return AMDGPU::SGPR_160RegClassID; 2210 case 6: return AMDGPU::SGPR_192RegClassID; 2211 case 8: return AMDGPU::SGPR_256RegClassID; 2212 case 16: return AMDGPU::SGPR_512RegClassID; 2213 } 2214 } else if (Is == IS_AGPR) { 2215 switch (RegWidth) { 2216 default: return -1; 2217 case 1: return AMDGPU::AGPR_32RegClassID; 2218 case 2: return AMDGPU::AReg_64RegClassID; 2219 case 3: return AMDGPU::AReg_96RegClassID; 2220 case 4: return AMDGPU::AReg_128RegClassID; 2221 case 5: return AMDGPU::AReg_160RegClassID; 2222 case 6: return AMDGPU::AReg_192RegClassID; 2223 case 8: return AMDGPU::AReg_256RegClassID; 2224 case 16: return AMDGPU::AReg_512RegClassID; 2225 case 32: return AMDGPU::AReg_1024RegClassID; 2226 } 2227 } 2228 return -1; 2229 } 2230 2231 static unsigned getSpecialRegForName(StringRef RegName) { 2232 return StringSwitch<unsigned>(RegName) 2233 .Case("exec", AMDGPU::EXEC) 2234 .Case("vcc", AMDGPU::VCC) 2235 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2236 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2237 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2238 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2239 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2240 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2241 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2242 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2243 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2244 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2245 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2246 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2247 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2248 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2249 .Case("m0", AMDGPU::M0) 2250 .Case("vccz", AMDGPU::SRC_VCCZ) 2251 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2252 .Case("execz", AMDGPU::SRC_EXECZ) 2253 .Case("src_execz", AMDGPU::SRC_EXECZ) 2254 .Case("scc", AMDGPU::SRC_SCC) 2255 .Case("src_scc", AMDGPU::SRC_SCC) 2256 .Case("tba", AMDGPU::TBA) 2257 .Case("tma", AMDGPU::TMA) 2258 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2259 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2260 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2261 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2262 .Case("vcc_lo", AMDGPU::VCC_LO) 2263 .Case("vcc_hi", AMDGPU::VCC_HI) 2264 .Case("exec_lo", AMDGPU::EXEC_LO) 2265 .Case("exec_hi", AMDGPU::EXEC_HI) 2266 .Case("tma_lo", AMDGPU::TMA_LO) 2267 .Case("tma_hi", AMDGPU::TMA_HI) 2268 .Case("tba_lo", AMDGPU::TBA_LO) 2269 .Case("tba_hi", AMDGPU::TBA_HI) 2270 .Case("pc", AMDGPU::PC_REG) 2271 .Case("null", AMDGPU::SGPR_NULL) 2272 .Default(AMDGPU::NoRegister); 2273 } 2274 2275 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2276 SMLoc &EndLoc, bool RestoreOnFailure) { 2277 auto R = parseRegister(); 2278 if (!R) return true; 2279 assert(R->isReg()); 2280 RegNo = R->getReg(); 2281 StartLoc = R->getStartLoc(); 2282 EndLoc = R->getEndLoc(); 2283 return false; 2284 } 2285 2286 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2287 SMLoc &EndLoc) { 2288 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2289 } 2290 2291 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2292 SMLoc &StartLoc, 2293 SMLoc &EndLoc) { 2294 bool Result = 2295 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2296 bool PendingErrors = getParser().hasPendingError(); 2297 getParser().clearPendingErrors(); 2298 if (PendingErrors) 2299 return MatchOperand_ParseFail; 2300 if (Result) 2301 return MatchOperand_NoMatch; 2302 return MatchOperand_Success; 2303 } 2304 2305 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2306 RegisterKind RegKind, unsigned Reg1, 2307 SMLoc Loc) { 2308 switch (RegKind) { 2309 case IS_SPECIAL: 2310 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2311 Reg = AMDGPU::EXEC; 2312 RegWidth = 2; 2313 return true; 2314 } 2315 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2316 Reg = AMDGPU::FLAT_SCR; 2317 RegWidth = 2; 2318 return true; 2319 } 2320 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2321 Reg = AMDGPU::XNACK_MASK; 2322 RegWidth = 2; 2323 return true; 2324 } 2325 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2326 Reg = AMDGPU::VCC; 2327 RegWidth = 2; 2328 return true; 2329 } 2330 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2331 Reg = AMDGPU::TBA; 2332 RegWidth = 2; 2333 return true; 2334 } 2335 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2336 Reg = AMDGPU::TMA; 2337 RegWidth = 2; 2338 return true; 2339 } 2340 Error(Loc, "register does not fit in the list"); 2341 return false; 2342 case IS_VGPR: 2343 case IS_SGPR: 2344 case IS_AGPR: 2345 case IS_TTMP: 2346 if (Reg1 != Reg + RegWidth) { 2347 Error(Loc, "registers in a list must have consecutive indices"); 2348 return false; 2349 } 2350 RegWidth++; 2351 return true; 2352 default: 2353 llvm_unreachable("unexpected register kind"); 2354 } 2355 } 2356 2357 struct RegInfo { 2358 StringLiteral Name; 2359 RegisterKind Kind; 2360 }; 2361 2362 static constexpr RegInfo RegularRegisters[] = { 2363 {{"v"}, IS_VGPR}, 2364 {{"s"}, IS_SGPR}, 2365 {{"ttmp"}, IS_TTMP}, 2366 {{"acc"}, IS_AGPR}, 2367 {{"a"}, IS_AGPR}, 2368 }; 2369 2370 static bool isRegularReg(RegisterKind Kind) { 2371 return Kind == IS_VGPR || 2372 Kind == IS_SGPR || 2373 Kind == IS_TTMP || 2374 Kind == IS_AGPR; 2375 } 2376 2377 static const RegInfo* getRegularRegInfo(StringRef Str) { 2378 for (const RegInfo &Reg : RegularRegisters) 2379 if (Str.startswith(Reg.Name)) 2380 return &Reg; 2381 return nullptr; 2382 } 2383 2384 static bool getRegNum(StringRef Str, unsigned& Num) { 2385 return !Str.getAsInteger(10, Num); 2386 } 2387 2388 bool 2389 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2390 const AsmToken &NextToken) const { 2391 2392 // A list of consecutive registers: [s0,s1,s2,s3] 2393 if (Token.is(AsmToken::LBrac)) 2394 return true; 2395 2396 if (!Token.is(AsmToken::Identifier)) 2397 return false; 2398 2399 // A single register like s0 or a range of registers like s[0:1] 2400 2401 StringRef Str = Token.getString(); 2402 const RegInfo *Reg = getRegularRegInfo(Str); 2403 if (Reg) { 2404 StringRef RegName = Reg->Name; 2405 StringRef RegSuffix = Str.substr(RegName.size()); 2406 if (!RegSuffix.empty()) { 2407 unsigned Num; 2408 // A single register with an index: rXX 2409 if (getRegNum(RegSuffix, Num)) 2410 return true; 2411 } else { 2412 // A range of registers: r[XX:YY]. 2413 if (NextToken.is(AsmToken::LBrac)) 2414 return true; 2415 } 2416 } 2417 2418 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2419 } 2420 2421 bool 2422 AMDGPUAsmParser::isRegister() 2423 { 2424 return isRegister(getToken(), peekToken()); 2425 } 2426 2427 unsigned 2428 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2429 unsigned RegNum, 2430 unsigned RegWidth, 2431 SMLoc Loc) { 2432 2433 assert(isRegularReg(RegKind)); 2434 2435 unsigned AlignSize = 1; 2436 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2437 // SGPR and TTMP registers must be aligned. 2438 // Max required alignment is 4 dwords. 2439 AlignSize = std::min(RegWidth, 4u); 2440 } 2441 2442 if (RegNum % AlignSize != 0) { 2443 Error(Loc, "invalid register alignment"); 2444 return AMDGPU::NoRegister; 2445 } 2446 2447 unsigned RegIdx = RegNum / AlignSize; 2448 int RCID = getRegClass(RegKind, RegWidth); 2449 if (RCID == -1) { 2450 Error(Loc, "invalid or unsupported register size"); 2451 return AMDGPU::NoRegister; 2452 } 2453 2454 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2455 const MCRegisterClass RC = TRI->getRegClass(RCID); 2456 if (RegIdx >= RC.getNumRegs()) { 2457 Error(Loc, "register index is out of range"); 2458 return AMDGPU::NoRegister; 2459 } 2460 2461 return RC.getRegister(RegIdx); 2462 } 2463 2464 bool 2465 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2466 int64_t RegLo, RegHi; 2467 if (!skipToken(AsmToken::LBrac, "missing register index")) 2468 return false; 2469 2470 SMLoc FirstIdxLoc = getLoc(); 2471 SMLoc SecondIdxLoc; 2472 2473 if (!parseExpr(RegLo)) 2474 return false; 2475 2476 if (trySkipToken(AsmToken::Colon)) { 2477 SecondIdxLoc = getLoc(); 2478 if (!parseExpr(RegHi)) 2479 return false; 2480 } else { 2481 RegHi = RegLo; 2482 } 2483 2484 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2485 return false; 2486 2487 if (!isUInt<32>(RegLo)) { 2488 Error(FirstIdxLoc, "invalid register index"); 2489 return false; 2490 } 2491 2492 if (!isUInt<32>(RegHi)) { 2493 Error(SecondIdxLoc, "invalid register index"); 2494 return false; 2495 } 2496 2497 if (RegLo > RegHi) { 2498 Error(FirstIdxLoc, "first register index should not exceed second index"); 2499 return false; 2500 } 2501 2502 Num = static_cast<unsigned>(RegLo); 2503 Width = (RegHi - RegLo) + 1; 2504 return true; 2505 } 2506 2507 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2508 unsigned &RegNum, unsigned &RegWidth, 2509 SmallVectorImpl<AsmToken> &Tokens) { 2510 assert(isToken(AsmToken::Identifier)); 2511 unsigned Reg = getSpecialRegForName(getTokenStr()); 2512 if (Reg) { 2513 RegNum = 0; 2514 RegWidth = 1; 2515 RegKind = IS_SPECIAL; 2516 Tokens.push_back(getToken()); 2517 lex(); // skip register name 2518 } 2519 return Reg; 2520 } 2521 2522 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2523 unsigned &RegNum, unsigned &RegWidth, 2524 SmallVectorImpl<AsmToken> &Tokens) { 2525 assert(isToken(AsmToken::Identifier)); 2526 StringRef RegName = getTokenStr(); 2527 auto Loc = getLoc(); 2528 2529 const RegInfo *RI = getRegularRegInfo(RegName); 2530 if (!RI) { 2531 Error(Loc, "invalid register name"); 2532 return AMDGPU::NoRegister; 2533 } 2534 2535 Tokens.push_back(getToken()); 2536 lex(); // skip register name 2537 2538 RegKind = RI->Kind; 2539 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2540 if (!RegSuffix.empty()) { 2541 // Single 32-bit register: vXX. 2542 if (!getRegNum(RegSuffix, RegNum)) { 2543 Error(Loc, "invalid register index"); 2544 return AMDGPU::NoRegister; 2545 } 2546 RegWidth = 1; 2547 } else { 2548 // Range of registers: v[XX:YY]. ":YY" is optional. 2549 if (!ParseRegRange(RegNum, RegWidth)) 2550 return AMDGPU::NoRegister; 2551 } 2552 2553 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2554 } 2555 2556 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2557 unsigned &RegWidth, 2558 SmallVectorImpl<AsmToken> &Tokens) { 2559 unsigned Reg = AMDGPU::NoRegister; 2560 auto ListLoc = getLoc(); 2561 2562 if (!skipToken(AsmToken::LBrac, 2563 "expected a register or a list of registers")) { 2564 return AMDGPU::NoRegister; 2565 } 2566 2567 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2568 2569 auto Loc = getLoc(); 2570 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2571 return AMDGPU::NoRegister; 2572 if (RegWidth != 1) { 2573 Error(Loc, "expected a single 32-bit register"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 for (; trySkipToken(AsmToken::Comma); ) { 2578 RegisterKind NextRegKind; 2579 unsigned NextReg, NextRegNum, NextRegWidth; 2580 Loc = getLoc(); 2581 2582 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2583 NextRegNum, NextRegWidth, 2584 Tokens)) { 2585 return AMDGPU::NoRegister; 2586 } 2587 if (NextRegWidth != 1) { 2588 Error(Loc, "expected a single 32-bit register"); 2589 return AMDGPU::NoRegister; 2590 } 2591 if (NextRegKind != RegKind) { 2592 Error(Loc, "registers in a list must be of the same kind"); 2593 return AMDGPU::NoRegister; 2594 } 2595 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2596 return AMDGPU::NoRegister; 2597 } 2598 2599 if (!skipToken(AsmToken::RBrac, 2600 "expected a comma or a closing square bracket")) { 2601 return AMDGPU::NoRegister; 2602 } 2603 2604 if (isRegularReg(RegKind)) 2605 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2606 2607 return Reg; 2608 } 2609 2610 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2611 unsigned &RegNum, unsigned &RegWidth, 2612 SmallVectorImpl<AsmToken> &Tokens) { 2613 auto Loc = getLoc(); 2614 Reg = AMDGPU::NoRegister; 2615 2616 if (isToken(AsmToken::Identifier)) { 2617 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2618 if (Reg == AMDGPU::NoRegister) 2619 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2620 } else { 2621 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2622 } 2623 2624 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2625 if (Reg == AMDGPU::NoRegister) { 2626 assert(Parser.hasPendingError()); 2627 return false; 2628 } 2629 2630 if (!subtargetHasRegister(*TRI, Reg)) { 2631 if (Reg == AMDGPU::SGPR_NULL) { 2632 Error(Loc, "'null' operand is not supported on this GPU"); 2633 } else { 2634 Error(Loc, "register not available on this GPU"); 2635 } 2636 return false; 2637 } 2638 2639 return true; 2640 } 2641 2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2643 unsigned &RegNum, unsigned &RegWidth, 2644 bool RestoreOnFailure /*=false*/) { 2645 Reg = AMDGPU::NoRegister; 2646 2647 SmallVector<AsmToken, 1> Tokens; 2648 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2649 if (RestoreOnFailure) { 2650 while (!Tokens.empty()) { 2651 getLexer().UnLex(Tokens.pop_back_val()); 2652 } 2653 } 2654 return true; 2655 } 2656 return false; 2657 } 2658 2659 Optional<StringRef> 2660 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2661 switch (RegKind) { 2662 case IS_VGPR: 2663 return StringRef(".amdgcn.next_free_vgpr"); 2664 case IS_SGPR: 2665 return StringRef(".amdgcn.next_free_sgpr"); 2666 default: 2667 return None; 2668 } 2669 } 2670 2671 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2672 auto SymbolName = getGprCountSymbolName(RegKind); 2673 assert(SymbolName && "initializing invalid register kind"); 2674 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2675 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2676 } 2677 2678 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2679 unsigned DwordRegIndex, 2680 unsigned RegWidth) { 2681 // Symbols are only defined for GCN targets 2682 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2683 return true; 2684 2685 auto SymbolName = getGprCountSymbolName(RegKind); 2686 if (!SymbolName) 2687 return true; 2688 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2689 2690 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2691 int64_t OldCount; 2692 2693 if (!Sym->isVariable()) 2694 return !Error(getLoc(), 2695 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2696 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2697 return !Error( 2698 getLoc(), 2699 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2700 2701 if (OldCount <= NewMax) 2702 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2703 2704 return true; 2705 } 2706 2707 std::unique_ptr<AMDGPUOperand> 2708 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2709 const auto &Tok = getToken(); 2710 SMLoc StartLoc = Tok.getLoc(); 2711 SMLoc EndLoc = Tok.getEndLoc(); 2712 RegisterKind RegKind; 2713 unsigned Reg, RegNum, RegWidth; 2714 2715 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2716 return nullptr; 2717 } 2718 if (isHsaAbiVersion3(&getSTI())) { 2719 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2720 return nullptr; 2721 } else 2722 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2723 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2724 } 2725 2726 OperandMatchResultTy 2727 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2728 // TODO: add syntactic sugar for 1/(2*PI) 2729 2730 assert(!isRegister()); 2731 assert(!isModifier()); 2732 2733 const auto& Tok = getToken(); 2734 const auto& NextTok = peekToken(); 2735 bool IsReal = Tok.is(AsmToken::Real); 2736 SMLoc S = getLoc(); 2737 bool Negate = false; 2738 2739 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2740 lex(); 2741 IsReal = true; 2742 Negate = true; 2743 } 2744 2745 if (IsReal) { 2746 // Floating-point expressions are not supported. 2747 // Can only allow floating-point literals with an 2748 // optional sign. 2749 2750 StringRef Num = getTokenStr(); 2751 lex(); 2752 2753 APFloat RealVal(APFloat::IEEEdouble()); 2754 auto roundMode = APFloat::rmNearestTiesToEven; 2755 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2756 return MatchOperand_ParseFail; 2757 } 2758 if (Negate) 2759 RealVal.changeSign(); 2760 2761 Operands.push_back( 2762 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2763 AMDGPUOperand::ImmTyNone, true)); 2764 2765 return MatchOperand_Success; 2766 2767 } else { 2768 int64_t IntVal; 2769 const MCExpr *Expr; 2770 SMLoc S = getLoc(); 2771 2772 if (HasSP3AbsModifier) { 2773 // This is a workaround for handling expressions 2774 // as arguments of SP3 'abs' modifier, for example: 2775 // |1.0| 2776 // |-1| 2777 // |1+x| 2778 // This syntax is not compatible with syntax of standard 2779 // MC expressions (due to the trailing '|'). 2780 SMLoc EndLoc; 2781 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2782 return MatchOperand_ParseFail; 2783 } else { 2784 if (Parser.parseExpression(Expr)) 2785 return MatchOperand_ParseFail; 2786 } 2787 2788 if (Expr->evaluateAsAbsolute(IntVal)) { 2789 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2790 } else { 2791 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2792 } 2793 2794 return MatchOperand_Success; 2795 } 2796 2797 return MatchOperand_NoMatch; 2798 } 2799 2800 OperandMatchResultTy 2801 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2802 if (!isRegister()) 2803 return MatchOperand_NoMatch; 2804 2805 if (auto R = parseRegister()) { 2806 assert(R->isReg()); 2807 Operands.push_back(std::move(R)); 2808 return MatchOperand_Success; 2809 } 2810 return MatchOperand_ParseFail; 2811 } 2812 2813 OperandMatchResultTy 2814 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2815 auto res = parseReg(Operands); 2816 if (res != MatchOperand_NoMatch) { 2817 return res; 2818 } else if (isModifier()) { 2819 return MatchOperand_NoMatch; 2820 } else { 2821 return parseImm(Operands, HasSP3AbsMod); 2822 } 2823 } 2824 2825 bool 2826 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2827 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2828 const auto &str = Token.getString(); 2829 return str == "abs" || str == "neg" || str == "sext"; 2830 } 2831 return false; 2832 } 2833 2834 bool 2835 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2836 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2837 } 2838 2839 bool 2840 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2841 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2842 } 2843 2844 bool 2845 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2846 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2847 } 2848 2849 // Check if this is an operand modifier or an opcode modifier 2850 // which may look like an expression but it is not. We should 2851 // avoid parsing these modifiers as expressions. Currently 2852 // recognized sequences are: 2853 // |...| 2854 // abs(...) 2855 // neg(...) 2856 // sext(...) 2857 // -reg 2858 // -|...| 2859 // -abs(...) 2860 // name:... 2861 // Note that simple opcode modifiers like 'gds' may be parsed as 2862 // expressions; this is a special case. See getExpressionAsToken. 2863 // 2864 bool 2865 AMDGPUAsmParser::isModifier() { 2866 2867 AsmToken Tok = getToken(); 2868 AsmToken NextToken[2]; 2869 peekTokens(NextToken); 2870 2871 return isOperandModifier(Tok, NextToken[0]) || 2872 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2873 isOpcodeModifierWithVal(Tok, NextToken[0]); 2874 } 2875 2876 // Check if the current token is an SP3 'neg' modifier. 2877 // Currently this modifier is allowed in the following context: 2878 // 2879 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2880 // 2. Before an 'abs' modifier: -abs(...) 2881 // 3. Before an SP3 'abs' modifier: -|...| 2882 // 2883 // In all other cases "-" is handled as a part 2884 // of an expression that follows the sign. 2885 // 2886 // Note: When "-" is followed by an integer literal, 2887 // this is interpreted as integer negation rather 2888 // than a floating-point NEG modifier applied to N. 2889 // Beside being contr-intuitive, such use of floating-point 2890 // NEG modifier would have resulted in different meaning 2891 // of integer literals used with VOP1/2/C and VOP3, 2892 // for example: 2893 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2894 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2895 // Negative fp literals with preceding "-" are 2896 // handled likewise for unifomtity 2897 // 2898 bool 2899 AMDGPUAsmParser::parseSP3NegModifier() { 2900 2901 AsmToken NextToken[2]; 2902 peekTokens(NextToken); 2903 2904 if (isToken(AsmToken::Minus) && 2905 (isRegister(NextToken[0], NextToken[1]) || 2906 NextToken[0].is(AsmToken::Pipe) || 2907 isId(NextToken[0], "abs"))) { 2908 lex(); 2909 return true; 2910 } 2911 2912 return false; 2913 } 2914 2915 OperandMatchResultTy 2916 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2917 bool AllowImm) { 2918 bool Neg, SP3Neg; 2919 bool Abs, SP3Abs; 2920 SMLoc Loc; 2921 2922 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2923 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2924 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2925 return MatchOperand_ParseFail; 2926 } 2927 2928 SP3Neg = parseSP3NegModifier(); 2929 2930 Loc = getLoc(); 2931 Neg = trySkipId("neg"); 2932 if (Neg && SP3Neg) { 2933 Error(Loc, "expected register or immediate"); 2934 return MatchOperand_ParseFail; 2935 } 2936 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2937 return MatchOperand_ParseFail; 2938 2939 Abs = trySkipId("abs"); 2940 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2941 return MatchOperand_ParseFail; 2942 2943 Loc = getLoc(); 2944 SP3Abs = trySkipToken(AsmToken::Pipe); 2945 if (Abs && SP3Abs) { 2946 Error(Loc, "expected register or immediate"); 2947 return MatchOperand_ParseFail; 2948 } 2949 2950 OperandMatchResultTy Res; 2951 if (AllowImm) { 2952 Res = parseRegOrImm(Operands, SP3Abs); 2953 } else { 2954 Res = parseReg(Operands); 2955 } 2956 if (Res != MatchOperand_Success) { 2957 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2958 } 2959 2960 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2961 return MatchOperand_ParseFail; 2962 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2963 return MatchOperand_ParseFail; 2964 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2965 return MatchOperand_ParseFail; 2966 2967 AMDGPUOperand::Modifiers Mods; 2968 Mods.Abs = Abs || SP3Abs; 2969 Mods.Neg = Neg || SP3Neg; 2970 2971 if (Mods.hasFPModifiers()) { 2972 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2973 if (Op.isExpr()) { 2974 Error(Op.getStartLoc(), "expected an absolute expression"); 2975 return MatchOperand_ParseFail; 2976 } 2977 Op.setModifiers(Mods); 2978 } 2979 return MatchOperand_Success; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2984 bool AllowImm) { 2985 bool Sext = trySkipId("sext"); 2986 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2987 return MatchOperand_ParseFail; 2988 2989 OperandMatchResultTy Res; 2990 if (AllowImm) { 2991 Res = parseRegOrImm(Operands); 2992 } else { 2993 Res = parseReg(Operands); 2994 } 2995 if (Res != MatchOperand_Success) { 2996 return Sext? MatchOperand_ParseFail : Res; 2997 } 2998 2999 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3000 return MatchOperand_ParseFail; 3001 3002 AMDGPUOperand::Modifiers Mods; 3003 Mods.Sext = Sext; 3004 3005 if (Mods.hasIntModifiers()) { 3006 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3007 if (Op.isExpr()) { 3008 Error(Op.getStartLoc(), "expected an absolute expression"); 3009 return MatchOperand_ParseFail; 3010 } 3011 Op.setModifiers(Mods); 3012 } 3013 3014 return MatchOperand_Success; 3015 } 3016 3017 OperandMatchResultTy 3018 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3019 return parseRegOrImmWithFPInputMods(Operands, false); 3020 } 3021 3022 OperandMatchResultTy 3023 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3024 return parseRegOrImmWithIntInputMods(Operands, false); 3025 } 3026 3027 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3028 auto Loc = getLoc(); 3029 if (trySkipId("off")) { 3030 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3031 AMDGPUOperand::ImmTyOff, false)); 3032 return MatchOperand_Success; 3033 } 3034 3035 if (!isRegister()) 3036 return MatchOperand_NoMatch; 3037 3038 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3039 if (Reg) { 3040 Operands.push_back(std::move(Reg)); 3041 return MatchOperand_Success; 3042 } 3043 3044 return MatchOperand_ParseFail; 3045 3046 } 3047 3048 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3049 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3050 3051 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3052 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3053 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3054 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3055 return Match_InvalidOperand; 3056 3057 if ((TSFlags & SIInstrFlags::VOP3) && 3058 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3059 getForcedEncodingSize() != 64) 3060 return Match_PreferE32; 3061 3062 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3063 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3064 // v_mac_f32/16 allow only dst_sel == DWORD; 3065 auto OpNum = 3066 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3067 const auto &Op = Inst.getOperand(OpNum); 3068 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3069 return Match_InvalidOperand; 3070 } 3071 } 3072 3073 return Match_Success; 3074 } 3075 3076 static ArrayRef<unsigned> getAllVariants() { 3077 static const unsigned Variants[] = { 3078 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3079 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3080 }; 3081 3082 return makeArrayRef(Variants); 3083 } 3084 3085 // What asm variants we should check 3086 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3087 if (getForcedEncodingSize() == 32) { 3088 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3089 return makeArrayRef(Variants); 3090 } 3091 3092 if (isForcedVOP3()) { 3093 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3094 return makeArrayRef(Variants); 3095 } 3096 3097 if (isForcedSDWA()) { 3098 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3099 AMDGPUAsmVariants::SDWA9}; 3100 return makeArrayRef(Variants); 3101 } 3102 3103 if (isForcedDPP()) { 3104 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3105 return makeArrayRef(Variants); 3106 } 3107 3108 return getAllVariants(); 3109 } 3110 3111 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3112 if (getForcedEncodingSize() == 32) 3113 return "e32"; 3114 3115 if (isForcedVOP3()) 3116 return "e64"; 3117 3118 if (isForcedSDWA()) 3119 return "sdwa"; 3120 3121 if (isForcedDPP()) 3122 return "dpp"; 3123 3124 return ""; 3125 } 3126 3127 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3128 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3129 const unsigned Num = Desc.getNumImplicitUses(); 3130 for (unsigned i = 0; i < Num; ++i) { 3131 unsigned Reg = Desc.ImplicitUses[i]; 3132 switch (Reg) { 3133 case AMDGPU::FLAT_SCR: 3134 case AMDGPU::VCC: 3135 case AMDGPU::VCC_LO: 3136 case AMDGPU::VCC_HI: 3137 case AMDGPU::M0: 3138 return Reg; 3139 default: 3140 break; 3141 } 3142 } 3143 return AMDGPU::NoRegister; 3144 } 3145 3146 // NB: This code is correct only when used to check constant 3147 // bus limitations because GFX7 support no f16 inline constants. 3148 // Note that there are no cases when a GFX7 opcode violates 3149 // constant bus limitations due to the use of an f16 constant. 3150 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3151 unsigned OpIdx) const { 3152 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3153 3154 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3155 return false; 3156 } 3157 3158 const MCOperand &MO = Inst.getOperand(OpIdx); 3159 3160 int64_t Val = MO.getImm(); 3161 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3162 3163 switch (OpSize) { // expected operand size 3164 case 8: 3165 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3166 case 4: 3167 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3168 case 2: { 3169 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3170 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3171 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3172 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3173 return AMDGPU::isInlinableIntLiteral(Val); 3174 3175 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3176 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3178 return AMDGPU::isInlinableIntLiteralV216(Val); 3179 3180 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3182 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3183 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3184 3185 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3186 } 3187 default: 3188 llvm_unreachable("invalid operand size"); 3189 } 3190 } 3191 3192 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3193 if (!isGFX10Plus()) 3194 return 1; 3195 3196 switch (Opcode) { 3197 // 64-bit shift instructions can use only one scalar value input 3198 case AMDGPU::V_LSHLREV_B64_e64: 3199 case AMDGPU::V_LSHLREV_B64_gfx10: 3200 case AMDGPU::V_LSHRREV_B64_e64: 3201 case AMDGPU::V_LSHRREV_B64_gfx10: 3202 case AMDGPU::V_ASHRREV_I64_e64: 3203 case AMDGPU::V_ASHRREV_I64_gfx10: 3204 case AMDGPU::V_LSHL_B64_e64: 3205 case AMDGPU::V_LSHR_B64_e64: 3206 case AMDGPU::V_ASHR_I64_e64: 3207 return 1; 3208 default: 3209 return 2; 3210 } 3211 } 3212 3213 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3214 const MCOperand &MO = Inst.getOperand(OpIdx); 3215 if (MO.isImm()) { 3216 return !isInlineConstant(Inst, OpIdx); 3217 } else if (MO.isReg()) { 3218 auto Reg = MO.getReg(); 3219 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3220 auto PReg = mc2PseudoReg(Reg); 3221 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3222 } else { 3223 return true; 3224 } 3225 } 3226 3227 bool 3228 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3229 const OperandVector &Operands) { 3230 const unsigned Opcode = Inst.getOpcode(); 3231 const MCInstrDesc &Desc = MII.get(Opcode); 3232 unsigned LastSGPR = AMDGPU::NoRegister; 3233 unsigned ConstantBusUseCount = 0; 3234 unsigned NumLiterals = 0; 3235 unsigned LiteralSize; 3236 3237 if (Desc.TSFlags & 3238 (SIInstrFlags::VOPC | 3239 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3240 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3241 SIInstrFlags::SDWA)) { 3242 // Check special imm operands (used by madmk, etc) 3243 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3244 ++ConstantBusUseCount; 3245 } 3246 3247 SmallDenseSet<unsigned> SGPRsUsed; 3248 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3249 if (SGPRUsed != AMDGPU::NoRegister) { 3250 SGPRsUsed.insert(SGPRUsed); 3251 ++ConstantBusUseCount; 3252 } 3253 3254 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3255 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3256 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3257 3258 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3259 3260 for (int OpIdx : OpIndices) { 3261 if (OpIdx == -1) break; 3262 3263 const MCOperand &MO = Inst.getOperand(OpIdx); 3264 if (usesConstantBus(Inst, OpIdx)) { 3265 if (MO.isReg()) { 3266 LastSGPR = mc2PseudoReg(MO.getReg()); 3267 // Pairs of registers with a partial intersections like these 3268 // s0, s[0:1] 3269 // flat_scratch_lo, flat_scratch 3270 // flat_scratch_lo, flat_scratch_hi 3271 // are theoretically valid but they are disabled anyway. 3272 // Note that this code mimics SIInstrInfo::verifyInstruction 3273 if (!SGPRsUsed.count(LastSGPR)) { 3274 SGPRsUsed.insert(LastSGPR); 3275 ++ConstantBusUseCount; 3276 } 3277 } else { // Expression or a literal 3278 3279 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3280 continue; // special operand like VINTERP attr_chan 3281 3282 // An instruction may use only one literal. 3283 // This has been validated on the previous step. 3284 // See validateVOP3Literal. 3285 // This literal may be used as more than one operand. 3286 // If all these operands are of the same size, 3287 // this literal counts as one scalar value. 3288 // Otherwise it counts as 2 scalar values. 3289 // See "GFX10 Shader Programming", section 3.6.2.3. 3290 3291 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3292 if (Size < 4) Size = 4; 3293 3294 if (NumLiterals == 0) { 3295 NumLiterals = 1; 3296 LiteralSize = Size; 3297 } else if (LiteralSize != Size) { 3298 NumLiterals = 2; 3299 } 3300 } 3301 } 3302 } 3303 } 3304 ConstantBusUseCount += NumLiterals; 3305 3306 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3307 return true; 3308 3309 SMLoc LitLoc = getLitLoc(Operands); 3310 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3311 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3312 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3313 return false; 3314 } 3315 3316 bool 3317 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3318 const OperandVector &Operands) { 3319 const unsigned Opcode = Inst.getOpcode(); 3320 const MCInstrDesc &Desc = MII.get(Opcode); 3321 3322 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3323 if (DstIdx == -1 || 3324 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3325 return true; 3326 } 3327 3328 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3329 3330 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3331 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3332 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3333 3334 assert(DstIdx != -1); 3335 const MCOperand &Dst = Inst.getOperand(DstIdx); 3336 assert(Dst.isReg()); 3337 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3338 3339 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3340 3341 for (int SrcIdx : SrcIndices) { 3342 if (SrcIdx == -1) break; 3343 const MCOperand &Src = Inst.getOperand(SrcIdx); 3344 if (Src.isReg()) { 3345 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3346 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3347 Error(getRegLoc(SrcReg, Operands), 3348 "destination must be different than all sources"); 3349 return false; 3350 } 3351 } 3352 } 3353 3354 return true; 3355 } 3356 3357 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3358 3359 const unsigned Opc = Inst.getOpcode(); 3360 const MCInstrDesc &Desc = MII.get(Opc); 3361 3362 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3363 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3364 assert(ClampIdx != -1); 3365 return Inst.getOperand(ClampIdx).getImm() == 0; 3366 } 3367 3368 return true; 3369 } 3370 3371 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3372 3373 const unsigned Opc = Inst.getOpcode(); 3374 const MCInstrDesc &Desc = MII.get(Opc); 3375 3376 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3377 return true; 3378 3379 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3380 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3381 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3382 3383 assert(VDataIdx != -1); 3384 3385 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3386 return true; 3387 3388 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3389 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3390 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3391 if (DMask == 0) 3392 DMask = 1; 3393 3394 unsigned DataSize = 3395 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3396 if (hasPackedD16()) { 3397 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3398 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3399 DataSize = (DataSize + 1) / 2; 3400 } 3401 3402 return (VDataSize / 4) == DataSize + TFESize; 3403 } 3404 3405 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3406 const unsigned Opc = Inst.getOpcode(); 3407 const MCInstrDesc &Desc = MII.get(Opc); 3408 3409 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3410 return true; 3411 3412 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3413 3414 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3415 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3416 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3417 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3418 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3419 3420 assert(VAddr0Idx != -1); 3421 assert(SrsrcIdx != -1); 3422 assert(SrsrcIdx > VAddr0Idx); 3423 3424 if (DimIdx == -1) 3425 return true; // intersect_ray 3426 3427 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3428 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3429 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3430 unsigned VAddrSize = 3431 IsNSA ? SrsrcIdx - VAddr0Idx 3432 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3433 3434 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3435 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3436 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3437 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3438 if (!IsNSA) { 3439 if (AddrSize > 8) 3440 AddrSize = 16; 3441 else if (AddrSize > 4) 3442 AddrSize = 8; 3443 } 3444 3445 return VAddrSize == AddrSize; 3446 } 3447 3448 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3449 3450 const unsigned Opc = Inst.getOpcode(); 3451 const MCInstrDesc &Desc = MII.get(Opc); 3452 3453 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3454 return true; 3455 if (!Desc.mayLoad() || !Desc.mayStore()) 3456 return true; // Not atomic 3457 3458 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3459 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3460 3461 // This is an incomplete check because image_atomic_cmpswap 3462 // may only use 0x3 and 0xf while other atomic operations 3463 // may use 0x1 and 0x3. However these limitations are 3464 // verified when we check that dmask matches dst size. 3465 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3466 } 3467 3468 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3469 3470 const unsigned Opc = Inst.getOpcode(); 3471 const MCInstrDesc &Desc = MII.get(Opc); 3472 3473 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3474 return true; 3475 3476 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3477 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3478 3479 // GATHER4 instructions use dmask in a different fashion compared to 3480 // other MIMG instructions. The only useful DMASK values are 3481 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3482 // (red,red,red,red) etc.) The ISA document doesn't mention 3483 // this. 3484 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3485 } 3486 3487 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3488 const unsigned Opc = Inst.getOpcode(); 3489 const MCInstrDesc &Desc = MII.get(Opc); 3490 3491 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3492 return true; 3493 3494 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3495 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3496 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3497 3498 if (!BaseOpcode->MSAA) 3499 return true; 3500 3501 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3502 assert(DimIdx != -1); 3503 3504 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3505 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3506 3507 return DimInfo->MSAA; 3508 } 3509 3510 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3511 { 3512 switch (Opcode) { 3513 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3514 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3515 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3516 return true; 3517 default: 3518 return false; 3519 } 3520 } 3521 3522 // movrels* opcodes should only allow VGPRS as src0. 3523 // This is specified in .td description for vop1/vop3, 3524 // but sdwa is handled differently. See isSDWAOperand. 3525 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3526 const OperandVector &Operands) { 3527 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3532 return true; 3533 3534 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3535 assert(Src0Idx != -1); 3536 3537 SMLoc ErrLoc; 3538 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3539 if (Src0.isReg()) { 3540 auto Reg = mc2PseudoReg(Src0.getReg()); 3541 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3542 if (!isSGPR(Reg, TRI)) 3543 return true; 3544 ErrLoc = getRegLoc(Reg, Operands); 3545 } else { 3546 ErrLoc = getConstLoc(Operands); 3547 } 3548 3549 Error(ErrLoc, "source operand must be a VGPR"); 3550 return false; 3551 } 3552 3553 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3554 const OperandVector &Operands) { 3555 3556 const unsigned Opc = Inst.getOpcode(); 3557 3558 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3559 return true; 3560 3561 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3562 assert(Src0Idx != -1); 3563 3564 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3565 if (!Src0.isReg()) 3566 return true; 3567 3568 auto Reg = mc2PseudoReg(Src0.getReg()); 3569 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3570 if (isSGPR(Reg, TRI)) { 3571 Error(getRegLoc(Reg, Operands), 3572 "source operand must be either a VGPR or an inline constant"); 3573 return false; 3574 } 3575 3576 return true; 3577 } 3578 3579 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3580 switch (Inst.getOpcode()) { 3581 default: 3582 return true; 3583 case V_DIV_SCALE_F32_gfx6_gfx7: 3584 case V_DIV_SCALE_F32_vi: 3585 case V_DIV_SCALE_F32_gfx10: 3586 case V_DIV_SCALE_F64_gfx6_gfx7: 3587 case V_DIV_SCALE_F64_vi: 3588 case V_DIV_SCALE_F64_gfx10: 3589 break; 3590 } 3591 3592 // TODO: Check that src0 = src1 or src2. 3593 3594 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3595 AMDGPU::OpName::src2_modifiers, 3596 AMDGPU::OpName::src2_modifiers}) { 3597 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3598 .getImm() & 3599 SISrcMods::ABS) { 3600 return false; 3601 } 3602 } 3603 3604 return true; 3605 } 3606 3607 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3608 3609 const unsigned Opc = Inst.getOpcode(); 3610 const MCInstrDesc &Desc = MII.get(Opc); 3611 3612 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3613 return true; 3614 3615 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3616 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3617 if (isCI() || isSI()) 3618 return false; 3619 } 3620 3621 return true; 3622 } 3623 3624 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3625 const unsigned Opc = Inst.getOpcode(); 3626 const MCInstrDesc &Desc = MII.get(Opc); 3627 3628 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3629 return true; 3630 3631 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3632 if (DimIdx < 0) 3633 return true; 3634 3635 long Imm = Inst.getOperand(DimIdx).getImm(); 3636 if (Imm < 0 || Imm >= 8) 3637 return false; 3638 3639 return true; 3640 } 3641 3642 static bool IsRevOpcode(const unsigned Opcode) 3643 { 3644 switch (Opcode) { 3645 case AMDGPU::V_SUBREV_F32_e32: 3646 case AMDGPU::V_SUBREV_F32_e64: 3647 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3648 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3649 case AMDGPU::V_SUBREV_F32_e32_vi: 3650 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3651 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3652 case AMDGPU::V_SUBREV_F32_e64_vi: 3653 3654 case AMDGPU::V_SUBREV_CO_U32_e32: 3655 case AMDGPU::V_SUBREV_CO_U32_e64: 3656 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3657 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3658 3659 case AMDGPU::V_SUBBREV_U32_e32: 3660 case AMDGPU::V_SUBBREV_U32_e64: 3661 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3662 case AMDGPU::V_SUBBREV_U32_e32_vi: 3663 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3664 case AMDGPU::V_SUBBREV_U32_e64_vi: 3665 3666 case AMDGPU::V_SUBREV_U32_e32: 3667 case AMDGPU::V_SUBREV_U32_e64: 3668 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3669 case AMDGPU::V_SUBREV_U32_e32_vi: 3670 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3671 case AMDGPU::V_SUBREV_U32_e64_vi: 3672 3673 case AMDGPU::V_SUBREV_F16_e32: 3674 case AMDGPU::V_SUBREV_F16_e64: 3675 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3676 case AMDGPU::V_SUBREV_F16_e32_vi: 3677 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3678 case AMDGPU::V_SUBREV_F16_e64_vi: 3679 3680 case AMDGPU::V_SUBREV_U16_e32: 3681 case AMDGPU::V_SUBREV_U16_e64: 3682 case AMDGPU::V_SUBREV_U16_e32_vi: 3683 case AMDGPU::V_SUBREV_U16_e64_vi: 3684 3685 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3686 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3687 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3688 3689 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3690 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3691 3692 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3693 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3694 3695 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3696 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3697 3698 case AMDGPU::V_LSHRREV_B32_e32: 3699 case AMDGPU::V_LSHRREV_B32_e64: 3700 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3701 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3702 case AMDGPU::V_LSHRREV_B32_e32_vi: 3703 case AMDGPU::V_LSHRREV_B32_e64_vi: 3704 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3705 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3706 3707 case AMDGPU::V_ASHRREV_I32_e32: 3708 case AMDGPU::V_ASHRREV_I32_e64: 3709 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3710 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3711 case AMDGPU::V_ASHRREV_I32_e32_vi: 3712 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3713 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3714 case AMDGPU::V_ASHRREV_I32_e64_vi: 3715 3716 case AMDGPU::V_LSHLREV_B32_e32: 3717 case AMDGPU::V_LSHLREV_B32_e64: 3718 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3719 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3720 case AMDGPU::V_LSHLREV_B32_e32_vi: 3721 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3722 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3723 case AMDGPU::V_LSHLREV_B32_e64_vi: 3724 3725 case AMDGPU::V_LSHLREV_B16_e32: 3726 case AMDGPU::V_LSHLREV_B16_e64: 3727 case AMDGPU::V_LSHLREV_B16_e32_vi: 3728 case AMDGPU::V_LSHLREV_B16_e64_vi: 3729 case AMDGPU::V_LSHLREV_B16_gfx10: 3730 3731 case AMDGPU::V_LSHRREV_B16_e32: 3732 case AMDGPU::V_LSHRREV_B16_e64: 3733 case AMDGPU::V_LSHRREV_B16_e32_vi: 3734 case AMDGPU::V_LSHRREV_B16_e64_vi: 3735 case AMDGPU::V_LSHRREV_B16_gfx10: 3736 3737 case AMDGPU::V_ASHRREV_I16_e32: 3738 case AMDGPU::V_ASHRREV_I16_e64: 3739 case AMDGPU::V_ASHRREV_I16_e32_vi: 3740 case AMDGPU::V_ASHRREV_I16_e64_vi: 3741 case AMDGPU::V_ASHRREV_I16_gfx10: 3742 3743 case AMDGPU::V_LSHLREV_B64_e64: 3744 case AMDGPU::V_LSHLREV_B64_gfx10: 3745 case AMDGPU::V_LSHLREV_B64_vi: 3746 3747 case AMDGPU::V_LSHRREV_B64_e64: 3748 case AMDGPU::V_LSHRREV_B64_gfx10: 3749 case AMDGPU::V_LSHRREV_B64_vi: 3750 3751 case AMDGPU::V_ASHRREV_I64_e64: 3752 case AMDGPU::V_ASHRREV_I64_gfx10: 3753 case AMDGPU::V_ASHRREV_I64_vi: 3754 3755 case AMDGPU::V_PK_LSHLREV_B16: 3756 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3757 case AMDGPU::V_PK_LSHLREV_B16_vi: 3758 3759 case AMDGPU::V_PK_LSHRREV_B16: 3760 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3761 case AMDGPU::V_PK_LSHRREV_B16_vi: 3762 case AMDGPU::V_PK_ASHRREV_I16: 3763 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3764 case AMDGPU::V_PK_ASHRREV_I16_vi: 3765 return true; 3766 default: 3767 return false; 3768 } 3769 } 3770 3771 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3772 3773 using namespace SIInstrFlags; 3774 const unsigned Opcode = Inst.getOpcode(); 3775 const MCInstrDesc &Desc = MII.get(Opcode); 3776 3777 // lds_direct register is defined so that it can be used 3778 // with 9-bit operands only. Ignore encodings which do not accept these. 3779 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3780 if ((Desc.TSFlags & Enc) == 0) 3781 return None; 3782 3783 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3784 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3785 if (SrcIdx == -1) 3786 break; 3787 const auto &Src = Inst.getOperand(SrcIdx); 3788 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3789 3790 if (isGFX90A()) 3791 return StringRef("lds_direct is not supported on this GPU"); 3792 3793 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3794 return StringRef("lds_direct cannot be used with this instruction"); 3795 3796 if (SrcName != OpName::src0) 3797 return StringRef("lds_direct may be used as src0 only"); 3798 } 3799 } 3800 3801 return None; 3802 } 3803 3804 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3805 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3806 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3807 if (Op.isFlatOffset()) 3808 return Op.getStartLoc(); 3809 } 3810 return getLoc(); 3811 } 3812 3813 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3814 const OperandVector &Operands) { 3815 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3816 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3817 return true; 3818 3819 auto Opcode = Inst.getOpcode(); 3820 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3821 assert(OpNum != -1); 3822 3823 const auto &Op = Inst.getOperand(OpNum); 3824 if (!hasFlatOffsets() && Op.getImm() != 0) { 3825 Error(getFlatOffsetLoc(Operands), 3826 "flat offset modifier is not supported on this GPU"); 3827 return false; 3828 } 3829 3830 // For FLAT segment the offset must be positive; 3831 // MSB is ignored and forced to zero. 3832 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3833 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3834 if (!isIntN(OffsetSize, Op.getImm())) { 3835 Error(getFlatOffsetLoc(Operands), 3836 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3837 return false; 3838 } 3839 } else { 3840 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3841 if (!isUIntN(OffsetSize, Op.getImm())) { 3842 Error(getFlatOffsetLoc(Operands), 3843 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3844 return false; 3845 } 3846 } 3847 3848 return true; 3849 } 3850 3851 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3852 // Start with second operand because SMEM Offset cannot be dst or src0. 3853 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3854 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3855 if (Op.isSMEMOffset()) 3856 return Op.getStartLoc(); 3857 } 3858 return getLoc(); 3859 } 3860 3861 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3862 const OperandVector &Operands) { 3863 if (isCI() || isSI()) 3864 return true; 3865 3866 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3867 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3868 return true; 3869 3870 auto Opcode = Inst.getOpcode(); 3871 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3872 if (OpNum == -1) 3873 return true; 3874 3875 const auto &Op = Inst.getOperand(OpNum); 3876 if (!Op.isImm()) 3877 return true; 3878 3879 uint64_t Offset = Op.getImm(); 3880 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3881 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3882 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3883 return true; 3884 3885 Error(getSMEMOffsetLoc(Operands), 3886 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3887 "expected a 21-bit signed offset"); 3888 3889 return false; 3890 } 3891 3892 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3893 unsigned Opcode = Inst.getOpcode(); 3894 const MCInstrDesc &Desc = MII.get(Opcode); 3895 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3896 return true; 3897 3898 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3899 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3900 3901 const int OpIndices[] = { Src0Idx, Src1Idx }; 3902 3903 unsigned NumExprs = 0; 3904 unsigned NumLiterals = 0; 3905 uint32_t LiteralValue; 3906 3907 for (int OpIdx : OpIndices) { 3908 if (OpIdx == -1) break; 3909 3910 const MCOperand &MO = Inst.getOperand(OpIdx); 3911 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3912 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3913 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3914 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3915 if (NumLiterals == 0 || LiteralValue != Value) { 3916 LiteralValue = Value; 3917 ++NumLiterals; 3918 } 3919 } else if (MO.isExpr()) { 3920 ++NumExprs; 3921 } 3922 } 3923 } 3924 3925 return NumLiterals + NumExprs <= 1; 3926 } 3927 3928 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3929 const unsigned Opc = Inst.getOpcode(); 3930 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3931 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3932 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3933 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3934 3935 if (OpSel & ~3) 3936 return false; 3937 } 3938 return true; 3939 } 3940 3941 // Check if VCC register matches wavefront size 3942 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3943 auto FB = getFeatureBits(); 3944 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3945 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3946 } 3947 3948 // VOP3 literal is only allowed in GFX10+ and only one can be used 3949 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3950 const OperandVector &Operands) { 3951 unsigned Opcode = Inst.getOpcode(); 3952 const MCInstrDesc &Desc = MII.get(Opcode); 3953 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3954 return true; 3955 3956 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3957 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3958 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3959 3960 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3961 3962 unsigned NumExprs = 0; 3963 unsigned NumLiterals = 0; 3964 uint32_t LiteralValue; 3965 3966 for (int OpIdx : OpIndices) { 3967 if (OpIdx == -1) break; 3968 3969 const MCOperand &MO = Inst.getOperand(OpIdx); 3970 if (!MO.isImm() && !MO.isExpr()) 3971 continue; 3972 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3973 continue; 3974 3975 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3976 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3977 Error(getConstLoc(Operands), 3978 "inline constants are not allowed for this operand"); 3979 return false; 3980 } 3981 3982 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3983 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3984 if (NumLiterals == 0 || LiteralValue != Value) { 3985 LiteralValue = Value; 3986 ++NumLiterals; 3987 } 3988 } else if (MO.isExpr()) { 3989 ++NumExprs; 3990 } 3991 } 3992 NumLiterals += NumExprs; 3993 3994 if (!NumLiterals) 3995 return true; 3996 3997 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3998 Error(getLitLoc(Operands), "literal operands are not supported"); 3999 return false; 4000 } 4001 4002 if (NumLiterals > 1) { 4003 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4004 return false; 4005 } 4006 4007 return true; 4008 } 4009 4010 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4011 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4012 const MCRegisterInfo *MRI) { 4013 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4014 if (OpIdx < 0) 4015 return -1; 4016 4017 const MCOperand &Op = Inst.getOperand(OpIdx); 4018 if (!Op.isReg()) 4019 return -1; 4020 4021 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4022 auto Reg = Sub ? Sub : Op.getReg(); 4023 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4024 return AGRP32.contains(Reg) ? 1 : 0; 4025 } 4026 4027 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4028 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4029 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4030 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4031 SIInstrFlags::DS)) == 0) 4032 return true; 4033 4034 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4035 : AMDGPU::OpName::vdata; 4036 4037 const MCRegisterInfo *MRI = getMRI(); 4038 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4039 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4040 4041 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4042 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4043 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4044 return false; 4045 } 4046 4047 auto FB = getFeatureBits(); 4048 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4049 if (DataAreg < 0 || DstAreg < 0) 4050 return true; 4051 return DstAreg == DataAreg; 4052 } 4053 4054 return DstAreg < 1 && DataAreg < 1; 4055 } 4056 4057 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4058 auto FB = getFeatureBits(); 4059 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4060 return true; 4061 4062 const MCRegisterInfo *MRI = getMRI(); 4063 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4064 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4065 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4066 const MCOperand &Op = Inst.getOperand(I); 4067 if (!Op.isReg()) 4068 continue; 4069 4070 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4071 if (!Sub) 4072 continue; 4073 4074 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4075 return false; 4076 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4077 return false; 4078 } 4079 4080 return true; 4081 } 4082 4083 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4084 const OperandVector &Operands, 4085 const SMLoc &IDLoc) { 4086 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4087 AMDGPU::OpName::cpol); 4088 if (CPolPos == -1) 4089 return true; 4090 4091 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4092 4093 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4094 if ((TSFlags & (SIInstrFlags::SMRD)) && 4095 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4096 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4097 return false; 4098 } 4099 4100 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4101 return true; 4102 4103 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4104 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4105 Error(IDLoc, "instruction must use glc"); 4106 return false; 4107 } 4108 } else { 4109 if (CPol & CPol::GLC) { 4110 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4111 StringRef CStr(S.getPointer()); 4112 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4113 Error(S, "instruction must not use glc"); 4114 return false; 4115 } 4116 } 4117 4118 if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) { 4119 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4120 StringRef CStr(S.getPointer()); 4121 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4122 Error(S, "instruction must not use scc"); 4123 return false; 4124 } 4125 4126 return true; 4127 } 4128 4129 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4130 const SMLoc &IDLoc, 4131 const OperandVector &Operands) { 4132 if (auto ErrMsg = validateLdsDirect(Inst)) { 4133 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4134 return false; 4135 } 4136 if (!validateSOPLiteral(Inst)) { 4137 Error(getLitLoc(Operands), 4138 "only one literal operand is allowed"); 4139 return false; 4140 } 4141 if (!validateVOP3Literal(Inst, Operands)) { 4142 return false; 4143 } 4144 if (!validateConstantBusLimitations(Inst, Operands)) { 4145 return false; 4146 } 4147 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4148 return false; 4149 } 4150 if (!validateIntClampSupported(Inst)) { 4151 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4152 "integer clamping is not supported on this GPU"); 4153 return false; 4154 } 4155 if (!validateOpSel(Inst)) { 4156 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4157 "invalid op_sel operand"); 4158 return false; 4159 } 4160 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4161 if (!validateMIMGD16(Inst)) { 4162 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4163 "d16 modifier is not supported on this GPU"); 4164 return false; 4165 } 4166 if (!validateMIMGDim(Inst)) { 4167 Error(IDLoc, "dim modifier is required on this GPU"); 4168 return false; 4169 } 4170 if (!validateMIMGMSAA(Inst)) { 4171 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4172 "invalid dim; must be MSAA type"); 4173 return false; 4174 } 4175 if (!validateMIMGDataSize(Inst)) { 4176 Error(IDLoc, 4177 "image data size does not match dmask and tfe"); 4178 return false; 4179 } 4180 if (!validateMIMGAddrSize(Inst)) { 4181 Error(IDLoc, 4182 "image address size does not match dim and a16"); 4183 return false; 4184 } 4185 if (!validateMIMGAtomicDMask(Inst)) { 4186 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4187 "invalid atomic image dmask"); 4188 return false; 4189 } 4190 if (!validateMIMGGatherDMask(Inst)) { 4191 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4192 "invalid image_gather dmask: only one bit must be set"); 4193 return false; 4194 } 4195 if (!validateMovrels(Inst, Operands)) { 4196 return false; 4197 } 4198 if (!validateFlatOffset(Inst, Operands)) { 4199 return false; 4200 } 4201 if (!validateSMEMOffset(Inst, Operands)) { 4202 return false; 4203 } 4204 if (!validateMAIAccWrite(Inst, Operands)) { 4205 return false; 4206 } 4207 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4208 return false; 4209 } 4210 4211 if (!validateAGPRLdSt(Inst)) { 4212 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4213 ? "invalid register class: data and dst should be all VGPR or AGPR" 4214 : "invalid register class: agpr loads and stores not supported on this GPU" 4215 ); 4216 return false; 4217 } 4218 if (!validateVGPRAlign(Inst)) { 4219 Error(IDLoc, 4220 "invalid register class: vgpr tuples must be 64 bit aligned"); 4221 return false; 4222 } 4223 4224 if (!validateDivScale(Inst)) { 4225 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4226 return false; 4227 } 4228 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4229 return false; 4230 } 4231 4232 return true; 4233 } 4234 4235 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4236 const FeatureBitset &FBS, 4237 unsigned VariantID = 0); 4238 4239 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4240 const FeatureBitset &AvailableFeatures, 4241 unsigned VariantID); 4242 4243 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4244 const FeatureBitset &FBS) { 4245 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4246 } 4247 4248 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4249 const FeatureBitset &FBS, 4250 ArrayRef<unsigned> Variants) { 4251 for (auto Variant : Variants) { 4252 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4253 return true; 4254 } 4255 4256 return false; 4257 } 4258 4259 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4260 const SMLoc &IDLoc) { 4261 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4262 4263 // Check if requested instruction variant is supported. 4264 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4265 return false; 4266 4267 // This instruction is not supported. 4268 // Clear any other pending errors because they are no longer relevant. 4269 getParser().clearPendingErrors(); 4270 4271 // Requested instruction variant is not supported. 4272 // Check if any other variants are supported. 4273 StringRef VariantName = getMatchedVariantName(); 4274 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4275 return Error(IDLoc, 4276 Twine(VariantName, 4277 " variant of this instruction is not supported")); 4278 } 4279 4280 // Finally check if this instruction is supported on any other GPU. 4281 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4282 return Error(IDLoc, "instruction not supported on this GPU"); 4283 } 4284 4285 // Instruction not supported on any GPU. Probably a typo. 4286 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4287 return Error(IDLoc, "invalid instruction" + Suggestion); 4288 } 4289 4290 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4291 OperandVector &Operands, 4292 MCStreamer &Out, 4293 uint64_t &ErrorInfo, 4294 bool MatchingInlineAsm) { 4295 MCInst Inst; 4296 unsigned Result = Match_Success; 4297 for (auto Variant : getMatchedVariants()) { 4298 uint64_t EI; 4299 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4300 Variant); 4301 // We order match statuses from least to most specific. We use most specific 4302 // status as resulting 4303 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4304 if ((R == Match_Success) || 4305 (R == Match_PreferE32) || 4306 (R == Match_MissingFeature && Result != Match_PreferE32) || 4307 (R == Match_InvalidOperand && Result != Match_MissingFeature 4308 && Result != Match_PreferE32) || 4309 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4310 && Result != Match_MissingFeature 4311 && Result != Match_PreferE32)) { 4312 Result = R; 4313 ErrorInfo = EI; 4314 } 4315 if (R == Match_Success) 4316 break; 4317 } 4318 4319 if (Result == Match_Success) { 4320 if (!validateInstruction(Inst, IDLoc, Operands)) { 4321 return true; 4322 } 4323 Inst.setLoc(IDLoc); 4324 Out.emitInstruction(Inst, getSTI()); 4325 return false; 4326 } 4327 4328 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4329 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4330 return true; 4331 } 4332 4333 switch (Result) { 4334 default: break; 4335 case Match_MissingFeature: 4336 // It has been verified that the specified instruction 4337 // mnemonic is valid. A match was found but it requires 4338 // features which are not supported on this GPU. 4339 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4340 4341 case Match_InvalidOperand: { 4342 SMLoc ErrorLoc = IDLoc; 4343 if (ErrorInfo != ~0ULL) { 4344 if (ErrorInfo >= Operands.size()) { 4345 return Error(IDLoc, "too few operands for instruction"); 4346 } 4347 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4348 if (ErrorLoc == SMLoc()) 4349 ErrorLoc = IDLoc; 4350 } 4351 return Error(ErrorLoc, "invalid operand for instruction"); 4352 } 4353 4354 case Match_PreferE32: 4355 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4356 "should be encoded as e32"); 4357 case Match_MnemonicFail: 4358 llvm_unreachable("Invalid instructions should have been handled already"); 4359 } 4360 llvm_unreachable("Implement any new match types added!"); 4361 } 4362 4363 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4364 int64_t Tmp = -1; 4365 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4366 return true; 4367 } 4368 if (getParser().parseAbsoluteExpression(Tmp)) { 4369 return true; 4370 } 4371 Ret = static_cast<uint32_t>(Tmp); 4372 return false; 4373 } 4374 4375 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4376 uint32_t &Minor) { 4377 if (ParseAsAbsoluteExpression(Major)) 4378 return TokError("invalid major version"); 4379 4380 if (!trySkipToken(AsmToken::Comma)) 4381 return TokError("minor version number required, comma expected"); 4382 4383 if (ParseAsAbsoluteExpression(Minor)) 4384 return TokError("invalid minor version"); 4385 4386 return false; 4387 } 4388 4389 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4390 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4391 return TokError("directive only supported for amdgcn architecture"); 4392 4393 std::string Target; 4394 4395 SMLoc TargetStart = getLoc(); 4396 if (getParser().parseEscapedString(Target)) 4397 return true; 4398 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4399 4400 std::string ExpectedTarget; 4401 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4402 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4403 4404 if (Target != ExpectedTargetOS.str()) 4405 return Error(TargetRange.Start, "target must match options", TargetRange); 4406 4407 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4408 return false; 4409 } 4410 4411 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4412 return Error(Range.Start, "value out of range", Range); 4413 } 4414 4415 bool AMDGPUAsmParser::calculateGPRBlocks( 4416 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4417 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4418 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4419 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4420 // TODO(scott.linder): These calculations are duplicated from 4421 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4422 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4423 4424 unsigned NumVGPRs = NextFreeVGPR; 4425 unsigned NumSGPRs = NextFreeSGPR; 4426 4427 if (Version.Major >= 10) 4428 NumSGPRs = 0; 4429 else { 4430 unsigned MaxAddressableNumSGPRs = 4431 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4432 4433 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4434 NumSGPRs > MaxAddressableNumSGPRs) 4435 return OutOfRangeError(SGPRRange); 4436 4437 NumSGPRs += 4438 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4439 4440 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4441 NumSGPRs > MaxAddressableNumSGPRs) 4442 return OutOfRangeError(SGPRRange); 4443 4444 if (Features.test(FeatureSGPRInitBug)) 4445 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4446 } 4447 4448 VGPRBlocks = 4449 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4450 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4451 4452 return false; 4453 } 4454 4455 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4456 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4457 return TokError("directive only supported for amdgcn architecture"); 4458 4459 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4460 return TokError("directive only supported for amdhsa OS"); 4461 4462 StringRef KernelName; 4463 if (getParser().parseIdentifier(KernelName)) 4464 return true; 4465 4466 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4467 4468 StringSet<> Seen; 4469 4470 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4471 4472 SMRange VGPRRange; 4473 uint64_t NextFreeVGPR = 0; 4474 uint64_t AccumOffset = 0; 4475 SMRange SGPRRange; 4476 uint64_t NextFreeSGPR = 0; 4477 unsigned UserSGPRCount = 0; 4478 bool ReserveVCC = true; 4479 bool ReserveFlatScr = true; 4480 bool ReserveXNACK = hasXNACK(); 4481 Optional<bool> EnableWavefrontSize32; 4482 4483 while (true) { 4484 while (trySkipToken(AsmToken::EndOfStatement)); 4485 4486 StringRef ID; 4487 SMRange IDRange = getTok().getLocRange(); 4488 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4489 return true; 4490 4491 if (ID == ".end_amdhsa_kernel") 4492 break; 4493 4494 if (Seen.find(ID) != Seen.end()) 4495 return TokError(".amdhsa_ directives cannot be repeated"); 4496 Seen.insert(ID); 4497 4498 SMLoc ValStart = getLoc(); 4499 int64_t IVal; 4500 if (getParser().parseAbsoluteExpression(IVal)) 4501 return true; 4502 SMLoc ValEnd = getLoc(); 4503 SMRange ValRange = SMRange(ValStart, ValEnd); 4504 4505 if (IVal < 0) 4506 return OutOfRangeError(ValRange); 4507 4508 uint64_t Val = IVal; 4509 4510 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4511 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4512 return OutOfRangeError(RANGE); \ 4513 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4514 4515 if (ID == ".amdhsa_group_segment_fixed_size") { 4516 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4517 return OutOfRangeError(ValRange); 4518 KD.group_segment_fixed_size = Val; 4519 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4520 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4521 return OutOfRangeError(ValRange); 4522 KD.private_segment_fixed_size = Val; 4523 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4524 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4525 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4526 Val, ValRange); 4527 if (Val) 4528 UserSGPRCount += 4; 4529 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4530 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4531 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4532 ValRange); 4533 if (Val) 4534 UserSGPRCount += 2; 4535 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4536 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4537 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4538 ValRange); 4539 if (Val) 4540 UserSGPRCount += 2; 4541 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4542 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4543 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4544 Val, ValRange); 4545 if (Val) 4546 UserSGPRCount += 2; 4547 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4548 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4549 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4550 ValRange); 4551 if (Val) 4552 UserSGPRCount += 2; 4553 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4554 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4555 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4556 ValRange); 4557 if (Val) 4558 UserSGPRCount += 2; 4559 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4560 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4562 Val, ValRange); 4563 if (Val) 4564 UserSGPRCount += 1; 4565 } else if (ID == ".amdhsa_wavefront_size32") { 4566 if (IVersion.Major < 10) 4567 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4568 EnableWavefrontSize32 = Val; 4569 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4570 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4571 Val, ValRange); 4572 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4573 PARSE_BITS_ENTRY( 4574 KD.compute_pgm_rsrc2, 4575 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4576 ValRange); 4577 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4578 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4579 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4580 ValRange); 4581 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4582 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4583 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4584 ValRange); 4585 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4586 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4587 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4588 ValRange); 4589 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4590 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4591 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4592 ValRange); 4593 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4594 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4595 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4596 ValRange); 4597 } else if (ID == ".amdhsa_next_free_vgpr") { 4598 VGPRRange = ValRange; 4599 NextFreeVGPR = Val; 4600 } else if (ID == ".amdhsa_next_free_sgpr") { 4601 SGPRRange = ValRange; 4602 NextFreeSGPR = Val; 4603 } else if (ID == ".amdhsa_accum_offset") { 4604 if (!isGFX90A()) 4605 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4606 AccumOffset = Val; 4607 } else if (ID == ".amdhsa_reserve_vcc") { 4608 if (!isUInt<1>(Val)) 4609 return OutOfRangeError(ValRange); 4610 ReserveVCC = Val; 4611 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4612 if (IVersion.Major < 7) 4613 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4614 if (!isUInt<1>(Val)) 4615 return OutOfRangeError(ValRange); 4616 ReserveFlatScr = Val; 4617 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4618 if (IVersion.Major < 8) 4619 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4620 if (!isUInt<1>(Val)) 4621 return OutOfRangeError(ValRange); 4622 ReserveXNACK = Val; 4623 } else if (ID == ".amdhsa_float_round_mode_32") { 4624 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4625 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4626 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4627 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4628 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4629 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4630 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4631 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4632 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4633 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4634 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4635 ValRange); 4636 } else if (ID == ".amdhsa_dx10_clamp") { 4637 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4638 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4639 } else if (ID == ".amdhsa_ieee_mode") { 4640 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4641 Val, ValRange); 4642 } else if (ID == ".amdhsa_fp16_overflow") { 4643 if (IVersion.Major < 9) 4644 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4645 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4646 ValRange); 4647 } else if (ID == ".amdhsa_tg_split") { 4648 if (!isGFX90A()) 4649 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4651 ValRange); 4652 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4653 if (IVersion.Major < 10) 4654 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4655 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4656 ValRange); 4657 } else if (ID == ".amdhsa_memory_ordered") { 4658 if (IVersion.Major < 10) 4659 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4661 ValRange); 4662 } else if (ID == ".amdhsa_forward_progress") { 4663 if (IVersion.Major < 10) 4664 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4666 ValRange); 4667 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4668 PARSE_BITS_ENTRY( 4669 KD.compute_pgm_rsrc2, 4670 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4671 ValRange); 4672 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4674 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4675 Val, ValRange); 4676 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4677 PARSE_BITS_ENTRY( 4678 KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4680 ValRange); 4681 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4683 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4684 Val, ValRange); 4685 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4687 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4688 Val, ValRange); 4689 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4691 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4692 Val, ValRange); 4693 } else if (ID == ".amdhsa_exception_int_div_zero") { 4694 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4695 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4696 Val, ValRange); 4697 } else { 4698 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4699 } 4700 4701 #undef PARSE_BITS_ENTRY 4702 } 4703 4704 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4705 return TokError(".amdhsa_next_free_vgpr directive is required"); 4706 4707 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4708 return TokError(".amdhsa_next_free_sgpr directive is required"); 4709 4710 unsigned VGPRBlocks; 4711 unsigned SGPRBlocks; 4712 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4713 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4714 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4715 SGPRBlocks)) 4716 return true; 4717 4718 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4719 VGPRBlocks)) 4720 return OutOfRangeError(VGPRRange); 4721 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4722 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4723 4724 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4725 SGPRBlocks)) 4726 return OutOfRangeError(SGPRRange); 4727 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4728 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4729 SGPRBlocks); 4730 4731 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4732 return TokError("too many user SGPRs enabled"); 4733 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4734 UserSGPRCount); 4735 4736 if (isGFX90A()) { 4737 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4738 return TokError(".amdhsa_accum_offset directive is required"); 4739 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4740 return TokError("accum_offset should be in range [4..256] in " 4741 "increments of 4"); 4742 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4743 return TokError("accum_offset exceeds total VGPR allocation"); 4744 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4745 (AccumOffset / 4 - 1)); 4746 } 4747 4748 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4749 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4750 ReserveFlatScr, ReserveXNACK); 4751 return false; 4752 } 4753 4754 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4755 uint32_t Major; 4756 uint32_t Minor; 4757 4758 if (ParseDirectiveMajorMinor(Major, Minor)) 4759 return true; 4760 4761 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4762 return false; 4763 } 4764 4765 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4766 uint32_t Major; 4767 uint32_t Minor; 4768 uint32_t Stepping; 4769 StringRef VendorName; 4770 StringRef ArchName; 4771 4772 // If this directive has no arguments, then use the ISA version for the 4773 // targeted GPU. 4774 if (isToken(AsmToken::EndOfStatement)) { 4775 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4776 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4777 ISA.Stepping, 4778 "AMD", "AMDGPU"); 4779 return false; 4780 } 4781 4782 if (ParseDirectiveMajorMinor(Major, Minor)) 4783 return true; 4784 4785 if (!trySkipToken(AsmToken::Comma)) 4786 return TokError("stepping version number required, comma expected"); 4787 4788 if (ParseAsAbsoluteExpression(Stepping)) 4789 return TokError("invalid stepping version"); 4790 4791 if (!trySkipToken(AsmToken::Comma)) 4792 return TokError("vendor name required, comma expected"); 4793 4794 if (!parseString(VendorName, "invalid vendor name")) 4795 return true; 4796 4797 if (!trySkipToken(AsmToken::Comma)) 4798 return TokError("arch name required, comma expected"); 4799 4800 if (!parseString(ArchName, "invalid arch name")) 4801 return true; 4802 4803 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4804 VendorName, ArchName); 4805 return false; 4806 } 4807 4808 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4809 amd_kernel_code_t &Header) { 4810 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4811 // assembly for backwards compatibility. 4812 if (ID == "max_scratch_backing_memory_byte_size") { 4813 Parser.eatToEndOfStatement(); 4814 return false; 4815 } 4816 4817 SmallString<40> ErrStr; 4818 raw_svector_ostream Err(ErrStr); 4819 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4820 return TokError(Err.str()); 4821 } 4822 Lex(); 4823 4824 if (ID == "enable_wavefront_size32") { 4825 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4826 if (!isGFX10Plus()) 4827 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4828 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4829 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4830 } else { 4831 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4832 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4833 } 4834 } 4835 4836 if (ID == "wavefront_size") { 4837 if (Header.wavefront_size == 5) { 4838 if (!isGFX10Plus()) 4839 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4840 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4841 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4842 } else if (Header.wavefront_size == 6) { 4843 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4844 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4845 } 4846 } 4847 4848 if (ID == "enable_wgp_mode") { 4849 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4850 !isGFX10Plus()) 4851 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4852 } 4853 4854 if (ID == "enable_mem_ordered") { 4855 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4856 !isGFX10Plus()) 4857 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4858 } 4859 4860 if (ID == "enable_fwd_progress") { 4861 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4862 !isGFX10Plus()) 4863 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4864 } 4865 4866 return false; 4867 } 4868 4869 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4870 amd_kernel_code_t Header; 4871 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4872 4873 while (true) { 4874 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4875 // will set the current token to EndOfStatement. 4876 while(trySkipToken(AsmToken::EndOfStatement)); 4877 4878 StringRef ID; 4879 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4880 return true; 4881 4882 if (ID == ".end_amd_kernel_code_t") 4883 break; 4884 4885 if (ParseAMDKernelCodeTValue(ID, Header)) 4886 return true; 4887 } 4888 4889 getTargetStreamer().EmitAMDKernelCodeT(Header); 4890 4891 return false; 4892 } 4893 4894 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4895 StringRef KernelName; 4896 if (!parseId(KernelName, "expected symbol name")) 4897 return true; 4898 4899 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4900 ELF::STT_AMDGPU_HSA_KERNEL); 4901 4902 KernelScope.initialize(getContext()); 4903 return false; 4904 } 4905 4906 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4907 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4908 return Error(getLoc(), 4909 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4910 "architectures"); 4911 } 4912 4913 auto ISAVersionStringFromASM = getToken().getStringContents(); 4914 4915 std::string ISAVersionStringFromSTI; 4916 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4917 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4918 4919 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4920 return Error(getLoc(), 4921 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4922 "arguments specified through the command line"); 4923 } 4924 4925 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4926 Lex(); 4927 4928 return false; 4929 } 4930 4931 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4932 const char *AssemblerDirectiveBegin; 4933 const char *AssemblerDirectiveEnd; 4934 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4935 isHsaAbiVersion3(&getSTI()) 4936 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4937 HSAMD::V3::AssemblerDirectiveEnd) 4938 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4939 HSAMD::AssemblerDirectiveEnd); 4940 4941 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4942 return Error(getLoc(), 4943 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4944 "not available on non-amdhsa OSes")).str()); 4945 } 4946 4947 std::string HSAMetadataString; 4948 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4949 HSAMetadataString)) 4950 return true; 4951 4952 if (isHsaAbiVersion3(&getSTI())) { 4953 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4954 return Error(getLoc(), "invalid HSA metadata"); 4955 } else { 4956 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4957 return Error(getLoc(), "invalid HSA metadata"); 4958 } 4959 4960 return false; 4961 } 4962 4963 /// Common code to parse out a block of text (typically YAML) between start and 4964 /// end directives. 4965 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4966 const char *AssemblerDirectiveEnd, 4967 std::string &CollectString) { 4968 4969 raw_string_ostream CollectStream(CollectString); 4970 4971 getLexer().setSkipSpace(false); 4972 4973 bool FoundEnd = false; 4974 while (!isToken(AsmToken::Eof)) { 4975 while (isToken(AsmToken::Space)) { 4976 CollectStream << getTokenStr(); 4977 Lex(); 4978 } 4979 4980 if (trySkipId(AssemblerDirectiveEnd)) { 4981 FoundEnd = true; 4982 break; 4983 } 4984 4985 CollectStream << Parser.parseStringToEndOfStatement() 4986 << getContext().getAsmInfo()->getSeparatorString(); 4987 4988 Parser.eatToEndOfStatement(); 4989 } 4990 4991 getLexer().setSkipSpace(true); 4992 4993 if (isToken(AsmToken::Eof) && !FoundEnd) { 4994 return TokError(Twine("expected directive ") + 4995 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4996 } 4997 4998 CollectStream.flush(); 4999 return false; 5000 } 5001 5002 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5003 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5004 std::string String; 5005 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5006 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5007 return true; 5008 5009 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5010 if (!PALMetadata->setFromString(String)) 5011 return Error(getLoc(), "invalid PAL metadata"); 5012 return false; 5013 } 5014 5015 /// Parse the assembler directive for old linear-format PAL metadata. 5016 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5017 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5018 return Error(getLoc(), 5019 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5020 "not available on non-amdpal OSes")).str()); 5021 } 5022 5023 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5024 PALMetadata->setLegacy(); 5025 for (;;) { 5026 uint32_t Key, Value; 5027 if (ParseAsAbsoluteExpression(Key)) { 5028 return TokError(Twine("invalid value in ") + 5029 Twine(PALMD::AssemblerDirective)); 5030 } 5031 if (!trySkipToken(AsmToken::Comma)) { 5032 return TokError(Twine("expected an even number of values in ") + 5033 Twine(PALMD::AssemblerDirective)); 5034 } 5035 if (ParseAsAbsoluteExpression(Value)) { 5036 return TokError(Twine("invalid value in ") + 5037 Twine(PALMD::AssemblerDirective)); 5038 } 5039 PALMetadata->setRegister(Key, Value); 5040 if (!trySkipToken(AsmToken::Comma)) 5041 break; 5042 } 5043 return false; 5044 } 5045 5046 /// ParseDirectiveAMDGPULDS 5047 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5048 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5049 if (getParser().checkForValidSection()) 5050 return true; 5051 5052 StringRef Name; 5053 SMLoc NameLoc = getLoc(); 5054 if (getParser().parseIdentifier(Name)) 5055 return TokError("expected identifier in directive"); 5056 5057 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5058 if (parseToken(AsmToken::Comma, "expected ','")) 5059 return true; 5060 5061 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5062 5063 int64_t Size; 5064 SMLoc SizeLoc = getLoc(); 5065 if (getParser().parseAbsoluteExpression(Size)) 5066 return true; 5067 if (Size < 0) 5068 return Error(SizeLoc, "size must be non-negative"); 5069 if (Size > LocalMemorySize) 5070 return Error(SizeLoc, "size is too large"); 5071 5072 int64_t Alignment = 4; 5073 if (trySkipToken(AsmToken::Comma)) { 5074 SMLoc AlignLoc = getLoc(); 5075 if (getParser().parseAbsoluteExpression(Alignment)) 5076 return true; 5077 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5078 return Error(AlignLoc, "alignment must be a power of two"); 5079 5080 // Alignment larger than the size of LDS is possible in theory, as long 5081 // as the linker manages to place to symbol at address 0, but we do want 5082 // to make sure the alignment fits nicely into a 32-bit integer. 5083 if (Alignment >= 1u << 31) 5084 return Error(AlignLoc, "alignment is too large"); 5085 } 5086 5087 if (parseToken(AsmToken::EndOfStatement, 5088 "unexpected token in '.amdgpu_lds' directive")) 5089 return true; 5090 5091 Symbol->redefineIfPossible(); 5092 if (!Symbol->isUndefined()) 5093 return Error(NameLoc, "invalid symbol redefinition"); 5094 5095 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5096 return false; 5097 } 5098 5099 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5100 StringRef IDVal = DirectiveID.getString(); 5101 5102 if (isHsaAbiVersion3(&getSTI())) { 5103 if (IDVal == ".amdgcn_target") 5104 return ParseDirectiveAMDGCNTarget(); 5105 5106 if (IDVal == ".amdhsa_kernel") 5107 return ParseDirectiveAMDHSAKernel(); 5108 5109 // TODO: Restructure/combine with PAL metadata directive. 5110 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5111 return ParseDirectiveHSAMetadata(); 5112 } else { 5113 if (IDVal == ".hsa_code_object_version") 5114 return ParseDirectiveHSACodeObjectVersion(); 5115 5116 if (IDVal == ".hsa_code_object_isa") 5117 return ParseDirectiveHSACodeObjectISA(); 5118 5119 if (IDVal == ".amd_kernel_code_t") 5120 return ParseDirectiveAMDKernelCodeT(); 5121 5122 if (IDVal == ".amdgpu_hsa_kernel") 5123 return ParseDirectiveAMDGPUHsaKernel(); 5124 5125 if (IDVal == ".amd_amdgpu_isa") 5126 return ParseDirectiveISAVersion(); 5127 5128 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5129 return ParseDirectiveHSAMetadata(); 5130 } 5131 5132 if (IDVal == ".amdgpu_lds") 5133 return ParseDirectiveAMDGPULDS(); 5134 5135 if (IDVal == PALMD::AssemblerDirectiveBegin) 5136 return ParseDirectivePALMetadataBegin(); 5137 5138 if (IDVal == PALMD::AssemblerDirective) 5139 return ParseDirectivePALMetadata(); 5140 5141 return true; 5142 } 5143 5144 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5145 unsigned RegNo) const { 5146 5147 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5148 R.isValid(); ++R) { 5149 if (*R == RegNo) 5150 return isGFX9Plus(); 5151 } 5152 5153 // GFX10 has 2 more SGPRs 104 and 105. 5154 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5155 R.isValid(); ++R) { 5156 if (*R == RegNo) 5157 return hasSGPR104_SGPR105(); 5158 } 5159 5160 switch (RegNo) { 5161 case AMDGPU::SRC_SHARED_BASE: 5162 case AMDGPU::SRC_SHARED_LIMIT: 5163 case AMDGPU::SRC_PRIVATE_BASE: 5164 case AMDGPU::SRC_PRIVATE_LIMIT: 5165 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5166 return isGFX9Plus(); 5167 case AMDGPU::TBA: 5168 case AMDGPU::TBA_LO: 5169 case AMDGPU::TBA_HI: 5170 case AMDGPU::TMA: 5171 case AMDGPU::TMA_LO: 5172 case AMDGPU::TMA_HI: 5173 return !isGFX9Plus(); 5174 case AMDGPU::XNACK_MASK: 5175 case AMDGPU::XNACK_MASK_LO: 5176 case AMDGPU::XNACK_MASK_HI: 5177 return (isVI() || isGFX9()) && hasXNACK(); 5178 case AMDGPU::SGPR_NULL: 5179 return isGFX10Plus(); 5180 default: 5181 break; 5182 } 5183 5184 if (isCI()) 5185 return true; 5186 5187 if (isSI() || isGFX10Plus()) { 5188 // No flat_scr on SI. 5189 // On GFX10 flat scratch is not a valid register operand and can only be 5190 // accessed with s_setreg/s_getreg. 5191 switch (RegNo) { 5192 case AMDGPU::FLAT_SCR: 5193 case AMDGPU::FLAT_SCR_LO: 5194 case AMDGPU::FLAT_SCR_HI: 5195 return false; 5196 default: 5197 return true; 5198 } 5199 } 5200 5201 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5202 // SI/CI have. 5203 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5204 R.isValid(); ++R) { 5205 if (*R == RegNo) 5206 return hasSGPR102_SGPR103(); 5207 } 5208 5209 return true; 5210 } 5211 5212 OperandMatchResultTy 5213 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5214 OperandMode Mode) { 5215 // Try to parse with a custom parser 5216 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5217 5218 // If we successfully parsed the operand or if there as an error parsing, 5219 // we are done. 5220 // 5221 // If we are parsing after we reach EndOfStatement then this means we 5222 // are appending default values to the Operands list. This is only done 5223 // by custom parser, so we shouldn't continue on to the generic parsing. 5224 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5225 isToken(AsmToken::EndOfStatement)) 5226 return ResTy; 5227 5228 SMLoc RBraceLoc; 5229 SMLoc LBraceLoc = getLoc(); 5230 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5231 unsigned Prefix = Operands.size(); 5232 5233 for (;;) { 5234 auto Loc = getLoc(); 5235 ResTy = parseReg(Operands); 5236 if (ResTy == MatchOperand_NoMatch) 5237 Error(Loc, "expected a register"); 5238 if (ResTy != MatchOperand_Success) 5239 return MatchOperand_ParseFail; 5240 5241 RBraceLoc = getLoc(); 5242 if (trySkipToken(AsmToken::RBrac)) 5243 break; 5244 5245 if (!skipToken(AsmToken::Comma, 5246 "expected a comma or a closing square bracket")) { 5247 return MatchOperand_ParseFail; 5248 } 5249 } 5250 5251 if (Operands.size() - Prefix > 1) { 5252 Operands.insert(Operands.begin() + Prefix, 5253 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5254 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5255 } 5256 5257 return MatchOperand_Success; 5258 } 5259 5260 return parseRegOrImm(Operands); 5261 } 5262 5263 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5264 // Clear any forced encodings from the previous instruction. 5265 setForcedEncodingSize(0); 5266 setForcedDPP(false); 5267 setForcedSDWA(false); 5268 5269 if (Name.endswith("_e64")) { 5270 setForcedEncodingSize(64); 5271 return Name.substr(0, Name.size() - 4); 5272 } else if (Name.endswith("_e32")) { 5273 setForcedEncodingSize(32); 5274 return Name.substr(0, Name.size() - 4); 5275 } else if (Name.endswith("_dpp")) { 5276 setForcedDPP(true); 5277 return Name.substr(0, Name.size() - 4); 5278 } else if (Name.endswith("_sdwa")) { 5279 setForcedSDWA(true); 5280 return Name.substr(0, Name.size() - 5); 5281 } 5282 return Name; 5283 } 5284 5285 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5286 StringRef Name, 5287 SMLoc NameLoc, OperandVector &Operands) { 5288 // Add the instruction mnemonic 5289 Name = parseMnemonicSuffix(Name); 5290 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5291 5292 bool IsMIMG = Name.startswith("image_"); 5293 5294 while (!trySkipToken(AsmToken::EndOfStatement)) { 5295 OperandMode Mode = OperandMode_Default; 5296 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5297 Mode = OperandMode_NSA; 5298 CPolSeen = 0; 5299 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5300 5301 if (Res != MatchOperand_Success) { 5302 checkUnsupportedInstruction(Name, NameLoc); 5303 if (!Parser.hasPendingError()) { 5304 // FIXME: use real operand location rather than the current location. 5305 StringRef Msg = 5306 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5307 "not a valid operand."; 5308 Error(getLoc(), Msg); 5309 } 5310 while (!trySkipToken(AsmToken::EndOfStatement)) { 5311 lex(); 5312 } 5313 return true; 5314 } 5315 5316 // Eat the comma or space if there is one. 5317 trySkipToken(AsmToken::Comma); 5318 } 5319 5320 return false; 5321 } 5322 5323 //===----------------------------------------------------------------------===// 5324 // Utility functions 5325 //===----------------------------------------------------------------------===// 5326 5327 OperandMatchResultTy 5328 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5329 5330 if (!trySkipId(Prefix, AsmToken::Colon)) 5331 return MatchOperand_NoMatch; 5332 5333 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5334 } 5335 5336 OperandMatchResultTy 5337 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5338 AMDGPUOperand::ImmTy ImmTy, 5339 bool (*ConvertResult)(int64_t&)) { 5340 SMLoc S = getLoc(); 5341 int64_t Value = 0; 5342 5343 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5344 if (Res != MatchOperand_Success) 5345 return Res; 5346 5347 if (ConvertResult && !ConvertResult(Value)) { 5348 Error(S, "invalid " + StringRef(Prefix) + " value."); 5349 } 5350 5351 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5352 return MatchOperand_Success; 5353 } 5354 5355 OperandMatchResultTy 5356 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5357 OperandVector &Operands, 5358 AMDGPUOperand::ImmTy ImmTy, 5359 bool (*ConvertResult)(int64_t&)) { 5360 SMLoc S = getLoc(); 5361 if (!trySkipId(Prefix, AsmToken::Colon)) 5362 return MatchOperand_NoMatch; 5363 5364 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5365 return MatchOperand_ParseFail; 5366 5367 unsigned Val = 0; 5368 const unsigned MaxSize = 4; 5369 5370 // FIXME: How to verify the number of elements matches the number of src 5371 // operands? 5372 for (int I = 0; ; ++I) { 5373 int64_t Op; 5374 SMLoc Loc = getLoc(); 5375 if (!parseExpr(Op)) 5376 return MatchOperand_ParseFail; 5377 5378 if (Op != 0 && Op != 1) { 5379 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5380 return MatchOperand_ParseFail; 5381 } 5382 5383 Val |= (Op << I); 5384 5385 if (trySkipToken(AsmToken::RBrac)) 5386 break; 5387 5388 if (I + 1 == MaxSize) { 5389 Error(getLoc(), "expected a closing square bracket"); 5390 return MatchOperand_ParseFail; 5391 } 5392 5393 if (!skipToken(AsmToken::Comma, "expected a comma")) 5394 return MatchOperand_ParseFail; 5395 } 5396 5397 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5398 return MatchOperand_Success; 5399 } 5400 5401 OperandMatchResultTy 5402 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5403 AMDGPUOperand::ImmTy ImmTy) { 5404 int64_t Bit; 5405 SMLoc S = getLoc(); 5406 5407 if (trySkipId(Name)) { 5408 Bit = 1; 5409 } else if (trySkipId("no", Name)) { 5410 Bit = 0; 5411 } else { 5412 return MatchOperand_NoMatch; 5413 } 5414 5415 if (Name == "r128" && !hasMIMG_R128()) { 5416 Error(S, "r128 modifier is not supported on this GPU"); 5417 return MatchOperand_ParseFail; 5418 } 5419 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5420 Error(S, "a16 modifier is not supported on this GPU"); 5421 return MatchOperand_ParseFail; 5422 } 5423 5424 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5425 ImmTy = AMDGPUOperand::ImmTyR128A16; 5426 5427 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5428 return MatchOperand_Success; 5429 } 5430 5431 OperandMatchResultTy 5432 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5433 unsigned CPolOn = 0; 5434 unsigned CPolOff = 0; 5435 SMLoc S = getLoc(); 5436 5437 if (trySkipId("glc")) 5438 CPolOn = AMDGPU::CPol::GLC; 5439 else if (trySkipId("noglc")) 5440 CPolOff = AMDGPU::CPol::GLC; 5441 else if (trySkipId("slc")) 5442 CPolOn = AMDGPU::CPol::SLC; 5443 else if (trySkipId("noslc")) 5444 CPolOff = AMDGPU::CPol::SLC; 5445 else if (trySkipId("dlc")) 5446 CPolOn = AMDGPU::CPol::DLC; 5447 else if (trySkipId("nodlc")) 5448 CPolOff = AMDGPU::CPol::DLC; 5449 else if (trySkipId("scc")) 5450 CPolOn = AMDGPU::CPol::SCC; 5451 else if (trySkipId("noscc")) 5452 CPolOff = AMDGPU::CPol::SCC; 5453 else 5454 return MatchOperand_NoMatch; 5455 5456 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5457 Error(S, "dlc modifier is not supported on this GPU"); 5458 return MatchOperand_ParseFail; 5459 } 5460 5461 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5462 Error(S, "scc modifier is not supported on this GPU"); 5463 return MatchOperand_ParseFail; 5464 } 5465 5466 if (CPolSeen & (CPolOn | CPolOff)) { 5467 Error(S, "duplicate cache policy modifier"); 5468 return MatchOperand_ParseFail; 5469 } 5470 5471 CPolSeen |= (CPolOn | CPolOff); 5472 5473 for (unsigned I = 1; I != Operands.size(); ++I) { 5474 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5475 if (Op.isCPol()) { 5476 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5477 return MatchOperand_Success; 5478 } 5479 } 5480 5481 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5482 AMDGPUOperand::ImmTyCPol)); 5483 5484 return MatchOperand_Success; 5485 } 5486 5487 static void addOptionalImmOperand( 5488 MCInst& Inst, const OperandVector& Operands, 5489 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5490 AMDGPUOperand::ImmTy ImmT, 5491 int64_t Default = 0) { 5492 auto i = OptionalIdx.find(ImmT); 5493 if (i != OptionalIdx.end()) { 5494 unsigned Idx = i->second; 5495 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5496 } else { 5497 Inst.addOperand(MCOperand::createImm(Default)); 5498 } 5499 } 5500 5501 OperandMatchResultTy 5502 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5503 StringRef &Value, 5504 SMLoc &StringLoc) { 5505 if (!trySkipId(Prefix, AsmToken::Colon)) 5506 return MatchOperand_NoMatch; 5507 5508 StringLoc = getLoc(); 5509 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5510 : MatchOperand_ParseFail; 5511 } 5512 5513 //===----------------------------------------------------------------------===// 5514 // MTBUF format 5515 //===----------------------------------------------------------------------===// 5516 5517 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5518 int64_t MaxVal, 5519 int64_t &Fmt) { 5520 int64_t Val; 5521 SMLoc Loc = getLoc(); 5522 5523 auto Res = parseIntWithPrefix(Pref, Val); 5524 if (Res == MatchOperand_ParseFail) 5525 return false; 5526 if (Res == MatchOperand_NoMatch) 5527 return true; 5528 5529 if (Val < 0 || Val > MaxVal) { 5530 Error(Loc, Twine("out of range ", StringRef(Pref))); 5531 return false; 5532 } 5533 5534 Fmt = Val; 5535 return true; 5536 } 5537 5538 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5539 // values to live in a joint format operand in the MCInst encoding. 5540 OperandMatchResultTy 5541 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5542 using namespace llvm::AMDGPU::MTBUFFormat; 5543 5544 int64_t Dfmt = DFMT_UNDEF; 5545 int64_t Nfmt = NFMT_UNDEF; 5546 5547 // dfmt and nfmt can appear in either order, and each is optional. 5548 for (int I = 0; I < 2; ++I) { 5549 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5550 return MatchOperand_ParseFail; 5551 5552 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5553 return MatchOperand_ParseFail; 5554 } 5555 // Skip optional comma between dfmt/nfmt 5556 // but guard against 2 commas following each other. 5557 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5558 !peekToken().is(AsmToken::Comma)) { 5559 trySkipToken(AsmToken::Comma); 5560 } 5561 } 5562 5563 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5564 return MatchOperand_NoMatch; 5565 5566 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5567 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5568 5569 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5570 return MatchOperand_Success; 5571 } 5572 5573 OperandMatchResultTy 5574 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5575 using namespace llvm::AMDGPU::MTBUFFormat; 5576 5577 int64_t Fmt = UFMT_UNDEF; 5578 5579 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5580 return MatchOperand_ParseFail; 5581 5582 if (Fmt == UFMT_UNDEF) 5583 return MatchOperand_NoMatch; 5584 5585 Format = Fmt; 5586 return MatchOperand_Success; 5587 } 5588 5589 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5590 int64_t &Nfmt, 5591 StringRef FormatStr, 5592 SMLoc Loc) { 5593 using namespace llvm::AMDGPU::MTBUFFormat; 5594 int64_t Format; 5595 5596 Format = getDfmt(FormatStr); 5597 if (Format != DFMT_UNDEF) { 5598 Dfmt = Format; 5599 return true; 5600 } 5601 5602 Format = getNfmt(FormatStr, getSTI()); 5603 if (Format != NFMT_UNDEF) { 5604 Nfmt = Format; 5605 return true; 5606 } 5607 5608 Error(Loc, "unsupported format"); 5609 return false; 5610 } 5611 5612 OperandMatchResultTy 5613 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5614 SMLoc FormatLoc, 5615 int64_t &Format) { 5616 using namespace llvm::AMDGPU::MTBUFFormat; 5617 5618 int64_t Dfmt = DFMT_UNDEF; 5619 int64_t Nfmt = NFMT_UNDEF; 5620 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5621 return MatchOperand_ParseFail; 5622 5623 if (trySkipToken(AsmToken::Comma)) { 5624 StringRef Str; 5625 SMLoc Loc = getLoc(); 5626 if (!parseId(Str, "expected a format string") || 5627 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5628 return MatchOperand_ParseFail; 5629 } 5630 if (Dfmt == DFMT_UNDEF) { 5631 Error(Loc, "duplicate numeric format"); 5632 return MatchOperand_ParseFail; 5633 } else if (Nfmt == NFMT_UNDEF) { 5634 Error(Loc, "duplicate data format"); 5635 return MatchOperand_ParseFail; 5636 } 5637 } 5638 5639 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5640 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5641 5642 if (isGFX10Plus()) { 5643 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5644 if (Ufmt == UFMT_UNDEF) { 5645 Error(FormatLoc, "unsupported format"); 5646 return MatchOperand_ParseFail; 5647 } 5648 Format = Ufmt; 5649 } else { 5650 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5651 } 5652 5653 return MatchOperand_Success; 5654 } 5655 5656 OperandMatchResultTy 5657 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5658 SMLoc Loc, 5659 int64_t &Format) { 5660 using namespace llvm::AMDGPU::MTBUFFormat; 5661 5662 auto Id = getUnifiedFormat(FormatStr); 5663 if (Id == UFMT_UNDEF) 5664 return MatchOperand_NoMatch; 5665 5666 if (!isGFX10Plus()) { 5667 Error(Loc, "unified format is not supported on this GPU"); 5668 return MatchOperand_ParseFail; 5669 } 5670 5671 Format = Id; 5672 return MatchOperand_Success; 5673 } 5674 5675 OperandMatchResultTy 5676 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5677 using namespace llvm::AMDGPU::MTBUFFormat; 5678 SMLoc Loc = getLoc(); 5679 5680 if (!parseExpr(Format)) 5681 return MatchOperand_ParseFail; 5682 if (!isValidFormatEncoding(Format, getSTI())) { 5683 Error(Loc, "out of range format"); 5684 return MatchOperand_ParseFail; 5685 } 5686 5687 return MatchOperand_Success; 5688 } 5689 5690 OperandMatchResultTy 5691 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5692 using namespace llvm::AMDGPU::MTBUFFormat; 5693 5694 if (!trySkipId("format", AsmToken::Colon)) 5695 return MatchOperand_NoMatch; 5696 5697 if (trySkipToken(AsmToken::LBrac)) { 5698 StringRef FormatStr; 5699 SMLoc Loc = getLoc(); 5700 if (!parseId(FormatStr, "expected a format string")) 5701 return MatchOperand_ParseFail; 5702 5703 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5704 if (Res == MatchOperand_NoMatch) 5705 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5706 if (Res != MatchOperand_Success) 5707 return Res; 5708 5709 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5710 return MatchOperand_ParseFail; 5711 5712 return MatchOperand_Success; 5713 } 5714 5715 return parseNumericFormat(Format); 5716 } 5717 5718 OperandMatchResultTy 5719 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5720 using namespace llvm::AMDGPU::MTBUFFormat; 5721 5722 int64_t Format = getDefaultFormatEncoding(getSTI()); 5723 OperandMatchResultTy Res; 5724 SMLoc Loc = getLoc(); 5725 5726 // Parse legacy format syntax. 5727 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5728 if (Res == MatchOperand_ParseFail) 5729 return Res; 5730 5731 bool FormatFound = (Res == MatchOperand_Success); 5732 5733 Operands.push_back( 5734 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5735 5736 if (FormatFound) 5737 trySkipToken(AsmToken::Comma); 5738 5739 if (isToken(AsmToken::EndOfStatement)) { 5740 // We are expecting an soffset operand, 5741 // but let matcher handle the error. 5742 return MatchOperand_Success; 5743 } 5744 5745 // Parse soffset. 5746 Res = parseRegOrImm(Operands); 5747 if (Res != MatchOperand_Success) 5748 return Res; 5749 5750 trySkipToken(AsmToken::Comma); 5751 5752 if (!FormatFound) { 5753 Res = parseSymbolicOrNumericFormat(Format); 5754 if (Res == MatchOperand_ParseFail) 5755 return Res; 5756 if (Res == MatchOperand_Success) { 5757 auto Size = Operands.size(); 5758 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5759 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5760 Op.setImm(Format); 5761 } 5762 return MatchOperand_Success; 5763 } 5764 5765 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5766 Error(getLoc(), "duplicate format"); 5767 return MatchOperand_ParseFail; 5768 } 5769 return MatchOperand_Success; 5770 } 5771 5772 //===----------------------------------------------------------------------===// 5773 // ds 5774 //===----------------------------------------------------------------------===// 5775 5776 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5777 const OperandVector &Operands) { 5778 OptionalImmIndexMap OptionalIdx; 5779 5780 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5782 5783 // Add the register arguments 5784 if (Op.isReg()) { 5785 Op.addRegOperands(Inst, 1); 5786 continue; 5787 } 5788 5789 // Handle optional arguments 5790 OptionalIdx[Op.getImmTy()] = i; 5791 } 5792 5793 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5796 5797 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5798 } 5799 5800 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5801 bool IsGdsHardcoded) { 5802 OptionalImmIndexMap OptionalIdx; 5803 5804 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5805 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5806 5807 // Add the register arguments 5808 if (Op.isReg()) { 5809 Op.addRegOperands(Inst, 1); 5810 continue; 5811 } 5812 5813 if (Op.isToken() && Op.getToken() == "gds") { 5814 IsGdsHardcoded = true; 5815 continue; 5816 } 5817 5818 // Handle optional arguments 5819 OptionalIdx[Op.getImmTy()] = i; 5820 } 5821 5822 AMDGPUOperand::ImmTy OffsetType = 5823 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5824 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5825 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5826 AMDGPUOperand::ImmTyOffset; 5827 5828 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5829 5830 if (!IsGdsHardcoded) { 5831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5832 } 5833 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5834 } 5835 5836 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5837 OptionalImmIndexMap OptionalIdx; 5838 5839 unsigned OperandIdx[4]; 5840 unsigned EnMask = 0; 5841 int SrcIdx = 0; 5842 5843 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5844 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5845 5846 // Add the register arguments 5847 if (Op.isReg()) { 5848 assert(SrcIdx < 4); 5849 OperandIdx[SrcIdx] = Inst.size(); 5850 Op.addRegOperands(Inst, 1); 5851 ++SrcIdx; 5852 continue; 5853 } 5854 5855 if (Op.isOff()) { 5856 assert(SrcIdx < 4); 5857 OperandIdx[SrcIdx] = Inst.size(); 5858 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5859 ++SrcIdx; 5860 continue; 5861 } 5862 5863 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5864 Op.addImmOperands(Inst, 1); 5865 continue; 5866 } 5867 5868 if (Op.isToken() && Op.getToken() == "done") 5869 continue; 5870 5871 // Handle optional arguments 5872 OptionalIdx[Op.getImmTy()] = i; 5873 } 5874 5875 assert(SrcIdx == 4); 5876 5877 bool Compr = false; 5878 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5879 Compr = true; 5880 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5881 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5882 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5883 } 5884 5885 for (auto i = 0; i < SrcIdx; ++i) { 5886 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5887 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5888 } 5889 } 5890 5891 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5893 5894 Inst.addOperand(MCOperand::createImm(EnMask)); 5895 } 5896 5897 //===----------------------------------------------------------------------===// 5898 // s_waitcnt 5899 //===----------------------------------------------------------------------===// 5900 5901 static bool 5902 encodeCnt( 5903 const AMDGPU::IsaVersion ISA, 5904 int64_t &IntVal, 5905 int64_t CntVal, 5906 bool Saturate, 5907 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5908 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5909 { 5910 bool Failed = false; 5911 5912 IntVal = encode(ISA, IntVal, CntVal); 5913 if (CntVal != decode(ISA, IntVal)) { 5914 if (Saturate) { 5915 IntVal = encode(ISA, IntVal, -1); 5916 } else { 5917 Failed = true; 5918 } 5919 } 5920 return Failed; 5921 } 5922 5923 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5924 5925 SMLoc CntLoc = getLoc(); 5926 StringRef CntName = getTokenStr(); 5927 5928 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5929 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5930 return false; 5931 5932 int64_t CntVal; 5933 SMLoc ValLoc = getLoc(); 5934 if (!parseExpr(CntVal)) 5935 return false; 5936 5937 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5938 5939 bool Failed = true; 5940 bool Sat = CntName.endswith("_sat"); 5941 5942 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5943 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5944 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5945 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5946 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5947 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5948 } else { 5949 Error(CntLoc, "invalid counter name " + CntName); 5950 return false; 5951 } 5952 5953 if (Failed) { 5954 Error(ValLoc, "too large value for " + CntName); 5955 return false; 5956 } 5957 5958 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5959 return false; 5960 5961 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5962 if (isToken(AsmToken::EndOfStatement)) { 5963 Error(getLoc(), "expected a counter name"); 5964 return false; 5965 } 5966 } 5967 5968 return true; 5969 } 5970 5971 OperandMatchResultTy 5972 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5973 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5974 int64_t Waitcnt = getWaitcntBitMask(ISA); 5975 SMLoc S = getLoc(); 5976 5977 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5978 while (!isToken(AsmToken::EndOfStatement)) { 5979 if (!parseCnt(Waitcnt)) 5980 return MatchOperand_ParseFail; 5981 } 5982 } else { 5983 if (!parseExpr(Waitcnt)) 5984 return MatchOperand_ParseFail; 5985 } 5986 5987 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5988 return MatchOperand_Success; 5989 } 5990 5991 bool 5992 AMDGPUOperand::isSWaitCnt() const { 5993 return isImm(); 5994 } 5995 5996 //===----------------------------------------------------------------------===// 5997 // hwreg 5998 //===----------------------------------------------------------------------===// 5999 6000 bool 6001 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6002 OperandInfoTy &Offset, 6003 OperandInfoTy &Width) { 6004 using namespace llvm::AMDGPU::Hwreg; 6005 6006 // The register may be specified by name or using a numeric code 6007 HwReg.Loc = getLoc(); 6008 if (isToken(AsmToken::Identifier) && 6009 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6010 HwReg.IsSymbolic = true; 6011 lex(); // skip register name 6012 } else if (!parseExpr(HwReg.Id, "a register name")) { 6013 return false; 6014 } 6015 6016 if (trySkipToken(AsmToken::RParen)) 6017 return true; 6018 6019 // parse optional params 6020 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6021 return false; 6022 6023 Offset.Loc = getLoc(); 6024 if (!parseExpr(Offset.Id)) 6025 return false; 6026 6027 if (!skipToken(AsmToken::Comma, "expected a comma")) 6028 return false; 6029 6030 Width.Loc = getLoc(); 6031 return parseExpr(Width.Id) && 6032 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6033 } 6034 6035 bool 6036 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6037 const OperandInfoTy &Offset, 6038 const OperandInfoTy &Width) { 6039 6040 using namespace llvm::AMDGPU::Hwreg; 6041 6042 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6043 Error(HwReg.Loc, 6044 "specified hardware register is not supported on this GPU"); 6045 return false; 6046 } 6047 if (!isValidHwreg(HwReg.Id)) { 6048 Error(HwReg.Loc, 6049 "invalid code of hardware register: only 6-bit values are legal"); 6050 return false; 6051 } 6052 if (!isValidHwregOffset(Offset.Id)) { 6053 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6054 return false; 6055 } 6056 if (!isValidHwregWidth(Width.Id)) { 6057 Error(Width.Loc, 6058 "invalid bitfield width: only values from 1 to 32 are legal"); 6059 return false; 6060 } 6061 return true; 6062 } 6063 6064 OperandMatchResultTy 6065 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6066 using namespace llvm::AMDGPU::Hwreg; 6067 6068 int64_t ImmVal = 0; 6069 SMLoc Loc = getLoc(); 6070 6071 if (trySkipId("hwreg", AsmToken::LParen)) { 6072 OperandInfoTy HwReg(ID_UNKNOWN_); 6073 OperandInfoTy Offset(OFFSET_DEFAULT_); 6074 OperandInfoTy Width(WIDTH_DEFAULT_); 6075 if (parseHwregBody(HwReg, Offset, Width) && 6076 validateHwreg(HwReg, Offset, Width)) { 6077 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6078 } else { 6079 return MatchOperand_ParseFail; 6080 } 6081 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6082 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6083 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6084 return MatchOperand_ParseFail; 6085 } 6086 } else { 6087 return MatchOperand_ParseFail; 6088 } 6089 6090 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6091 return MatchOperand_Success; 6092 } 6093 6094 bool AMDGPUOperand::isHwreg() const { 6095 return isImmTy(ImmTyHwreg); 6096 } 6097 6098 //===----------------------------------------------------------------------===// 6099 // sendmsg 6100 //===----------------------------------------------------------------------===// 6101 6102 bool 6103 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6104 OperandInfoTy &Op, 6105 OperandInfoTy &Stream) { 6106 using namespace llvm::AMDGPU::SendMsg; 6107 6108 Msg.Loc = getLoc(); 6109 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6110 Msg.IsSymbolic = true; 6111 lex(); // skip message name 6112 } else if (!parseExpr(Msg.Id, "a message name")) { 6113 return false; 6114 } 6115 6116 if (trySkipToken(AsmToken::Comma)) { 6117 Op.IsDefined = true; 6118 Op.Loc = getLoc(); 6119 if (isToken(AsmToken::Identifier) && 6120 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6121 lex(); // skip operation name 6122 } else if (!parseExpr(Op.Id, "an operation name")) { 6123 return false; 6124 } 6125 6126 if (trySkipToken(AsmToken::Comma)) { 6127 Stream.IsDefined = true; 6128 Stream.Loc = getLoc(); 6129 if (!parseExpr(Stream.Id)) 6130 return false; 6131 } 6132 } 6133 6134 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6135 } 6136 6137 bool 6138 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6139 const OperandInfoTy &Op, 6140 const OperandInfoTy &Stream) { 6141 using namespace llvm::AMDGPU::SendMsg; 6142 6143 // Validation strictness depends on whether message is specified 6144 // in a symbolc or in a numeric form. In the latter case 6145 // only encoding possibility is checked. 6146 bool Strict = Msg.IsSymbolic; 6147 6148 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6149 Error(Msg.Loc, "invalid message id"); 6150 return false; 6151 } 6152 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6153 if (Op.IsDefined) { 6154 Error(Op.Loc, "message does not support operations"); 6155 } else { 6156 Error(Msg.Loc, "missing message operation"); 6157 } 6158 return false; 6159 } 6160 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6161 Error(Op.Loc, "invalid operation id"); 6162 return false; 6163 } 6164 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6165 Error(Stream.Loc, "message operation does not support streams"); 6166 return false; 6167 } 6168 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6169 Error(Stream.Loc, "invalid message stream id"); 6170 return false; 6171 } 6172 return true; 6173 } 6174 6175 OperandMatchResultTy 6176 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6177 using namespace llvm::AMDGPU::SendMsg; 6178 6179 int64_t ImmVal = 0; 6180 SMLoc Loc = getLoc(); 6181 6182 if (trySkipId("sendmsg", AsmToken::LParen)) { 6183 OperandInfoTy Msg(ID_UNKNOWN_); 6184 OperandInfoTy Op(OP_NONE_); 6185 OperandInfoTy Stream(STREAM_ID_NONE_); 6186 if (parseSendMsgBody(Msg, Op, Stream) && 6187 validateSendMsg(Msg, Op, Stream)) { 6188 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6189 } else { 6190 return MatchOperand_ParseFail; 6191 } 6192 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6193 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6194 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6195 return MatchOperand_ParseFail; 6196 } 6197 } else { 6198 return MatchOperand_ParseFail; 6199 } 6200 6201 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6202 return MatchOperand_Success; 6203 } 6204 6205 bool AMDGPUOperand::isSendMsg() const { 6206 return isImmTy(ImmTySendMsg); 6207 } 6208 6209 //===----------------------------------------------------------------------===// 6210 // v_interp 6211 //===----------------------------------------------------------------------===// 6212 6213 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6214 StringRef Str; 6215 SMLoc S = getLoc(); 6216 6217 if (!parseId(Str)) 6218 return MatchOperand_NoMatch; 6219 6220 int Slot = StringSwitch<int>(Str) 6221 .Case("p10", 0) 6222 .Case("p20", 1) 6223 .Case("p0", 2) 6224 .Default(-1); 6225 6226 if (Slot == -1) { 6227 Error(S, "invalid interpolation slot"); 6228 return MatchOperand_ParseFail; 6229 } 6230 6231 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6232 AMDGPUOperand::ImmTyInterpSlot)); 6233 return MatchOperand_Success; 6234 } 6235 6236 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6237 StringRef Str; 6238 SMLoc S = getLoc(); 6239 6240 if (!parseId(Str)) 6241 return MatchOperand_NoMatch; 6242 6243 if (!Str.startswith("attr")) { 6244 Error(S, "invalid interpolation attribute"); 6245 return MatchOperand_ParseFail; 6246 } 6247 6248 StringRef Chan = Str.take_back(2); 6249 int AttrChan = StringSwitch<int>(Chan) 6250 .Case(".x", 0) 6251 .Case(".y", 1) 6252 .Case(".z", 2) 6253 .Case(".w", 3) 6254 .Default(-1); 6255 if (AttrChan == -1) { 6256 Error(S, "invalid or missing interpolation attribute channel"); 6257 return MatchOperand_ParseFail; 6258 } 6259 6260 Str = Str.drop_back(2).drop_front(4); 6261 6262 uint8_t Attr; 6263 if (Str.getAsInteger(10, Attr)) { 6264 Error(S, "invalid or missing interpolation attribute number"); 6265 return MatchOperand_ParseFail; 6266 } 6267 6268 if (Attr > 63) { 6269 Error(S, "out of bounds interpolation attribute number"); 6270 return MatchOperand_ParseFail; 6271 } 6272 6273 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6274 6275 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6276 AMDGPUOperand::ImmTyInterpAttr)); 6277 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6278 AMDGPUOperand::ImmTyAttrChan)); 6279 return MatchOperand_Success; 6280 } 6281 6282 //===----------------------------------------------------------------------===// 6283 // exp 6284 //===----------------------------------------------------------------------===// 6285 6286 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6287 using namespace llvm::AMDGPU::Exp; 6288 6289 StringRef Str; 6290 SMLoc S = getLoc(); 6291 6292 if (!parseId(Str)) 6293 return MatchOperand_NoMatch; 6294 6295 unsigned Id = getTgtId(Str); 6296 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6297 Error(S, (Id == ET_INVALID) ? 6298 "invalid exp target" : 6299 "exp target is not supported on this GPU"); 6300 return MatchOperand_ParseFail; 6301 } 6302 6303 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6304 AMDGPUOperand::ImmTyExpTgt)); 6305 return MatchOperand_Success; 6306 } 6307 6308 //===----------------------------------------------------------------------===// 6309 // parser helpers 6310 //===----------------------------------------------------------------------===// 6311 6312 bool 6313 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6314 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6315 } 6316 6317 bool 6318 AMDGPUAsmParser::isId(const StringRef Id) const { 6319 return isId(getToken(), Id); 6320 } 6321 6322 bool 6323 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6324 return getTokenKind() == Kind; 6325 } 6326 6327 bool 6328 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6329 if (isId(Id)) { 6330 lex(); 6331 return true; 6332 } 6333 return false; 6334 } 6335 6336 bool 6337 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6338 if (isToken(AsmToken::Identifier)) { 6339 StringRef Tok = getTokenStr(); 6340 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6341 lex(); 6342 return true; 6343 } 6344 } 6345 return false; 6346 } 6347 6348 bool 6349 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6350 if (isId(Id) && peekToken().is(Kind)) { 6351 lex(); 6352 lex(); 6353 return true; 6354 } 6355 return false; 6356 } 6357 6358 bool 6359 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6360 if (isToken(Kind)) { 6361 lex(); 6362 return true; 6363 } 6364 return false; 6365 } 6366 6367 bool 6368 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6369 const StringRef ErrMsg) { 6370 if (!trySkipToken(Kind)) { 6371 Error(getLoc(), ErrMsg); 6372 return false; 6373 } 6374 return true; 6375 } 6376 6377 bool 6378 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6379 SMLoc S = getLoc(); 6380 6381 const MCExpr *Expr; 6382 if (Parser.parseExpression(Expr)) 6383 return false; 6384 6385 if (Expr->evaluateAsAbsolute(Imm)) 6386 return true; 6387 6388 if (Expected.empty()) { 6389 Error(S, "expected absolute expression"); 6390 } else { 6391 Error(S, Twine("expected ", Expected) + 6392 Twine(" or an absolute expression")); 6393 } 6394 return false; 6395 } 6396 6397 bool 6398 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6399 SMLoc S = getLoc(); 6400 6401 const MCExpr *Expr; 6402 if (Parser.parseExpression(Expr)) 6403 return false; 6404 6405 int64_t IntVal; 6406 if (Expr->evaluateAsAbsolute(IntVal)) { 6407 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6408 } else { 6409 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6410 } 6411 return true; 6412 } 6413 6414 bool 6415 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6416 if (isToken(AsmToken::String)) { 6417 Val = getToken().getStringContents(); 6418 lex(); 6419 return true; 6420 } else { 6421 Error(getLoc(), ErrMsg); 6422 return false; 6423 } 6424 } 6425 6426 bool 6427 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6428 if (isToken(AsmToken::Identifier)) { 6429 Val = getTokenStr(); 6430 lex(); 6431 return true; 6432 } else { 6433 if (!ErrMsg.empty()) 6434 Error(getLoc(), ErrMsg); 6435 return false; 6436 } 6437 } 6438 6439 AsmToken 6440 AMDGPUAsmParser::getToken() const { 6441 return Parser.getTok(); 6442 } 6443 6444 AsmToken 6445 AMDGPUAsmParser::peekToken() { 6446 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6447 } 6448 6449 void 6450 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6451 auto TokCount = getLexer().peekTokens(Tokens); 6452 6453 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6454 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6455 } 6456 6457 AsmToken::TokenKind 6458 AMDGPUAsmParser::getTokenKind() const { 6459 return getLexer().getKind(); 6460 } 6461 6462 SMLoc 6463 AMDGPUAsmParser::getLoc() const { 6464 return getToken().getLoc(); 6465 } 6466 6467 StringRef 6468 AMDGPUAsmParser::getTokenStr() const { 6469 return getToken().getString(); 6470 } 6471 6472 void 6473 AMDGPUAsmParser::lex() { 6474 Parser.Lex(); 6475 } 6476 6477 SMLoc 6478 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6479 const OperandVector &Operands) const { 6480 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6481 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6482 if (Test(Op)) 6483 return Op.getStartLoc(); 6484 } 6485 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6486 } 6487 6488 SMLoc 6489 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6490 const OperandVector &Operands) const { 6491 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6492 return getOperandLoc(Test, Operands); 6493 } 6494 6495 SMLoc 6496 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6497 const OperandVector &Operands) const { 6498 auto Test = [=](const AMDGPUOperand& Op) { 6499 return Op.isRegKind() && Op.getReg() == Reg; 6500 }; 6501 return getOperandLoc(Test, Operands); 6502 } 6503 6504 SMLoc 6505 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6506 auto Test = [](const AMDGPUOperand& Op) { 6507 return Op.IsImmKindLiteral() || Op.isExpr(); 6508 }; 6509 return getOperandLoc(Test, Operands); 6510 } 6511 6512 SMLoc 6513 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6514 auto Test = [](const AMDGPUOperand& Op) { 6515 return Op.isImmKindConst(); 6516 }; 6517 return getOperandLoc(Test, Operands); 6518 } 6519 6520 //===----------------------------------------------------------------------===// 6521 // swizzle 6522 //===----------------------------------------------------------------------===// 6523 6524 LLVM_READNONE 6525 static unsigned 6526 encodeBitmaskPerm(const unsigned AndMask, 6527 const unsigned OrMask, 6528 const unsigned XorMask) { 6529 using namespace llvm::AMDGPU::Swizzle; 6530 6531 return BITMASK_PERM_ENC | 6532 (AndMask << BITMASK_AND_SHIFT) | 6533 (OrMask << BITMASK_OR_SHIFT) | 6534 (XorMask << BITMASK_XOR_SHIFT); 6535 } 6536 6537 bool 6538 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6539 const unsigned MinVal, 6540 const unsigned MaxVal, 6541 const StringRef ErrMsg, 6542 SMLoc &Loc) { 6543 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6544 return false; 6545 } 6546 Loc = getLoc(); 6547 if (!parseExpr(Op)) { 6548 return false; 6549 } 6550 if (Op < MinVal || Op > MaxVal) { 6551 Error(Loc, ErrMsg); 6552 return false; 6553 } 6554 6555 return true; 6556 } 6557 6558 bool 6559 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6560 const unsigned MinVal, 6561 const unsigned MaxVal, 6562 const StringRef ErrMsg) { 6563 SMLoc Loc; 6564 for (unsigned i = 0; i < OpNum; ++i) { 6565 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6566 return false; 6567 } 6568 6569 return true; 6570 } 6571 6572 bool 6573 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6574 using namespace llvm::AMDGPU::Swizzle; 6575 6576 int64_t Lane[LANE_NUM]; 6577 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6578 "expected a 2-bit lane id")) { 6579 Imm = QUAD_PERM_ENC; 6580 for (unsigned I = 0; I < LANE_NUM; ++I) { 6581 Imm |= Lane[I] << (LANE_SHIFT * I); 6582 } 6583 return true; 6584 } 6585 return false; 6586 } 6587 6588 bool 6589 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6590 using namespace llvm::AMDGPU::Swizzle; 6591 6592 SMLoc Loc; 6593 int64_t GroupSize; 6594 int64_t LaneIdx; 6595 6596 if (!parseSwizzleOperand(GroupSize, 6597 2, 32, 6598 "group size must be in the interval [2,32]", 6599 Loc)) { 6600 return false; 6601 } 6602 if (!isPowerOf2_64(GroupSize)) { 6603 Error(Loc, "group size must be a power of two"); 6604 return false; 6605 } 6606 if (parseSwizzleOperand(LaneIdx, 6607 0, GroupSize - 1, 6608 "lane id must be in the interval [0,group size - 1]", 6609 Loc)) { 6610 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6611 return true; 6612 } 6613 return false; 6614 } 6615 6616 bool 6617 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6618 using namespace llvm::AMDGPU::Swizzle; 6619 6620 SMLoc Loc; 6621 int64_t GroupSize; 6622 6623 if (!parseSwizzleOperand(GroupSize, 6624 2, 32, 6625 "group size must be in the interval [2,32]", 6626 Loc)) { 6627 return false; 6628 } 6629 if (!isPowerOf2_64(GroupSize)) { 6630 Error(Loc, "group size must be a power of two"); 6631 return false; 6632 } 6633 6634 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6635 return true; 6636 } 6637 6638 bool 6639 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6640 using namespace llvm::AMDGPU::Swizzle; 6641 6642 SMLoc Loc; 6643 int64_t GroupSize; 6644 6645 if (!parseSwizzleOperand(GroupSize, 6646 1, 16, 6647 "group size must be in the interval [1,16]", 6648 Loc)) { 6649 return false; 6650 } 6651 if (!isPowerOf2_64(GroupSize)) { 6652 Error(Loc, "group size must be a power of two"); 6653 return false; 6654 } 6655 6656 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6657 return true; 6658 } 6659 6660 bool 6661 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6662 using namespace llvm::AMDGPU::Swizzle; 6663 6664 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6665 return false; 6666 } 6667 6668 StringRef Ctl; 6669 SMLoc StrLoc = getLoc(); 6670 if (!parseString(Ctl)) { 6671 return false; 6672 } 6673 if (Ctl.size() != BITMASK_WIDTH) { 6674 Error(StrLoc, "expected a 5-character mask"); 6675 return false; 6676 } 6677 6678 unsigned AndMask = 0; 6679 unsigned OrMask = 0; 6680 unsigned XorMask = 0; 6681 6682 for (size_t i = 0; i < Ctl.size(); ++i) { 6683 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6684 switch(Ctl[i]) { 6685 default: 6686 Error(StrLoc, "invalid mask"); 6687 return false; 6688 case '0': 6689 break; 6690 case '1': 6691 OrMask |= Mask; 6692 break; 6693 case 'p': 6694 AndMask |= Mask; 6695 break; 6696 case 'i': 6697 AndMask |= Mask; 6698 XorMask |= Mask; 6699 break; 6700 } 6701 } 6702 6703 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6704 return true; 6705 } 6706 6707 bool 6708 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6709 6710 SMLoc OffsetLoc = getLoc(); 6711 6712 if (!parseExpr(Imm, "a swizzle macro")) { 6713 return false; 6714 } 6715 if (!isUInt<16>(Imm)) { 6716 Error(OffsetLoc, "expected a 16-bit offset"); 6717 return false; 6718 } 6719 return true; 6720 } 6721 6722 bool 6723 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6724 using namespace llvm::AMDGPU::Swizzle; 6725 6726 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6727 6728 SMLoc ModeLoc = getLoc(); 6729 bool Ok = false; 6730 6731 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6732 Ok = parseSwizzleQuadPerm(Imm); 6733 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6734 Ok = parseSwizzleBitmaskPerm(Imm); 6735 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6736 Ok = parseSwizzleBroadcast(Imm); 6737 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6738 Ok = parseSwizzleSwap(Imm); 6739 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6740 Ok = parseSwizzleReverse(Imm); 6741 } else { 6742 Error(ModeLoc, "expected a swizzle mode"); 6743 } 6744 6745 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6746 } 6747 6748 return false; 6749 } 6750 6751 OperandMatchResultTy 6752 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6753 SMLoc S = getLoc(); 6754 int64_t Imm = 0; 6755 6756 if (trySkipId("offset")) { 6757 6758 bool Ok = false; 6759 if (skipToken(AsmToken::Colon, "expected a colon")) { 6760 if (trySkipId("swizzle")) { 6761 Ok = parseSwizzleMacro(Imm); 6762 } else { 6763 Ok = parseSwizzleOffset(Imm); 6764 } 6765 } 6766 6767 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6768 6769 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6770 } else { 6771 // Swizzle "offset" operand is optional. 6772 // If it is omitted, try parsing other optional operands. 6773 return parseOptionalOpr(Operands); 6774 } 6775 } 6776 6777 bool 6778 AMDGPUOperand::isSwizzle() const { 6779 return isImmTy(ImmTySwizzle); 6780 } 6781 6782 //===----------------------------------------------------------------------===// 6783 // VGPR Index Mode 6784 //===----------------------------------------------------------------------===// 6785 6786 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6787 6788 using namespace llvm::AMDGPU::VGPRIndexMode; 6789 6790 if (trySkipToken(AsmToken::RParen)) { 6791 return OFF; 6792 } 6793 6794 int64_t Imm = 0; 6795 6796 while (true) { 6797 unsigned Mode = 0; 6798 SMLoc S = getLoc(); 6799 6800 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6801 if (trySkipId(IdSymbolic[ModeId])) { 6802 Mode = 1 << ModeId; 6803 break; 6804 } 6805 } 6806 6807 if (Mode == 0) { 6808 Error(S, (Imm == 0)? 6809 "expected a VGPR index mode or a closing parenthesis" : 6810 "expected a VGPR index mode"); 6811 return UNDEF; 6812 } 6813 6814 if (Imm & Mode) { 6815 Error(S, "duplicate VGPR index mode"); 6816 return UNDEF; 6817 } 6818 Imm |= Mode; 6819 6820 if (trySkipToken(AsmToken::RParen)) 6821 break; 6822 if (!skipToken(AsmToken::Comma, 6823 "expected a comma or a closing parenthesis")) 6824 return UNDEF; 6825 } 6826 6827 return Imm; 6828 } 6829 6830 OperandMatchResultTy 6831 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6832 6833 using namespace llvm::AMDGPU::VGPRIndexMode; 6834 6835 int64_t Imm = 0; 6836 SMLoc S = getLoc(); 6837 6838 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6839 Imm = parseGPRIdxMacro(); 6840 if (Imm == UNDEF) 6841 return MatchOperand_ParseFail; 6842 } else { 6843 if (getParser().parseAbsoluteExpression(Imm)) 6844 return MatchOperand_ParseFail; 6845 if (Imm < 0 || !isUInt<4>(Imm)) { 6846 Error(S, "invalid immediate: only 4-bit values are legal"); 6847 return MatchOperand_ParseFail; 6848 } 6849 } 6850 6851 Operands.push_back( 6852 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6853 return MatchOperand_Success; 6854 } 6855 6856 bool AMDGPUOperand::isGPRIdxMode() const { 6857 return isImmTy(ImmTyGprIdxMode); 6858 } 6859 6860 //===----------------------------------------------------------------------===// 6861 // sopp branch targets 6862 //===----------------------------------------------------------------------===// 6863 6864 OperandMatchResultTy 6865 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6866 6867 // Make sure we are not parsing something 6868 // that looks like a label or an expression but is not. 6869 // This will improve error messages. 6870 if (isRegister() || isModifier()) 6871 return MatchOperand_NoMatch; 6872 6873 if (!parseExpr(Operands)) 6874 return MatchOperand_ParseFail; 6875 6876 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6877 assert(Opr.isImm() || Opr.isExpr()); 6878 SMLoc Loc = Opr.getStartLoc(); 6879 6880 // Currently we do not support arbitrary expressions as branch targets. 6881 // Only labels and absolute expressions are accepted. 6882 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6883 Error(Loc, "expected an absolute expression or a label"); 6884 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6885 Error(Loc, "expected a 16-bit signed jump offset"); 6886 } 6887 6888 return MatchOperand_Success; 6889 } 6890 6891 //===----------------------------------------------------------------------===// 6892 // Boolean holding registers 6893 //===----------------------------------------------------------------------===// 6894 6895 OperandMatchResultTy 6896 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6897 return parseReg(Operands); 6898 } 6899 6900 //===----------------------------------------------------------------------===// 6901 // mubuf 6902 //===----------------------------------------------------------------------===// 6903 6904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6905 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6906 } 6907 6908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const { 6909 return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(), 6910 AMDGPUOperand::ImmTyCPol); 6911 } 6912 6913 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6914 const OperandVector &Operands, 6915 bool IsAtomic, 6916 bool IsLds) { 6917 bool IsLdsOpcode = IsLds; 6918 bool HasLdsModifier = false; 6919 OptionalImmIndexMap OptionalIdx; 6920 unsigned FirstOperandIdx = 1; 6921 bool IsAtomicReturn = false; 6922 6923 if (IsAtomic) { 6924 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6925 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6926 if (!Op.isCPol()) 6927 continue; 6928 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6929 break; 6930 } 6931 6932 if (!IsAtomicReturn) { 6933 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6934 if (NewOpc != -1) 6935 Inst.setOpcode(NewOpc); 6936 } 6937 6938 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6939 SIInstrFlags::IsAtomicRet; 6940 } 6941 6942 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6944 6945 // Add the register arguments 6946 if (Op.isReg()) { 6947 Op.addRegOperands(Inst, 1); 6948 // Insert a tied src for atomic return dst. 6949 // This cannot be postponed as subsequent calls to 6950 // addImmOperands rely on correct number of MC operands. 6951 if (IsAtomicReturn && i == FirstOperandIdx) 6952 Op.addRegOperands(Inst, 1); 6953 continue; 6954 } 6955 6956 // Handle the case where soffset is an immediate 6957 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6958 Op.addImmOperands(Inst, 1); 6959 continue; 6960 } 6961 6962 HasLdsModifier |= Op.isLDS(); 6963 6964 // Handle tokens like 'offen' which are sometimes hard-coded into the 6965 // asm string. There are no MCInst operands for these. 6966 if (Op.isToken()) { 6967 continue; 6968 } 6969 assert(Op.isImm()); 6970 6971 // Handle optional arguments 6972 OptionalIdx[Op.getImmTy()] = i; 6973 } 6974 6975 // This is a workaround for an llvm quirk which may result in an 6976 // incorrect instruction selection. Lds and non-lds versions of 6977 // MUBUF instructions are identical except that lds versions 6978 // have mandatory 'lds' modifier. However this modifier follows 6979 // optional modifiers and llvm asm matcher regards this 'lds' 6980 // modifier as an optional one. As a result, an lds version 6981 // of opcode may be selected even if it has no 'lds' modifier. 6982 if (IsLdsOpcode && !HasLdsModifier) { 6983 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6984 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6985 Inst.setOpcode(NoLdsOpcode); 6986 IsLdsOpcode = false; 6987 } 6988 } 6989 6990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6992 6993 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6995 } 6996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6997 } 6998 6999 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7000 OptionalImmIndexMap OptionalIdx; 7001 7002 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7003 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7004 7005 // Add the register arguments 7006 if (Op.isReg()) { 7007 Op.addRegOperands(Inst, 1); 7008 continue; 7009 } 7010 7011 // Handle the case where soffset is an immediate 7012 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7013 Op.addImmOperands(Inst, 1); 7014 continue; 7015 } 7016 7017 // Handle tokens like 'offen' which are sometimes hard-coded into the 7018 // asm string. There are no MCInst operands for these. 7019 if (Op.isToken()) { 7020 continue; 7021 } 7022 assert(Op.isImm()); 7023 7024 // Handle optional arguments 7025 OptionalIdx[Op.getImmTy()] = i; 7026 } 7027 7028 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7029 AMDGPUOperand::ImmTyOffset); 7030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7031 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7032 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7033 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7034 } 7035 7036 //===----------------------------------------------------------------------===// 7037 // mimg 7038 //===----------------------------------------------------------------------===// 7039 7040 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7041 bool IsAtomic) { 7042 unsigned I = 1; 7043 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7044 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7045 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7046 } 7047 7048 if (IsAtomic) { 7049 // Add src, same as dst 7050 assert(Desc.getNumDefs() == 1); 7051 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7052 } 7053 7054 OptionalImmIndexMap OptionalIdx; 7055 7056 for (unsigned E = Operands.size(); I != E; ++I) { 7057 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7058 7059 // Add the register arguments 7060 if (Op.isReg()) { 7061 Op.addRegOperands(Inst, 1); 7062 } else if (Op.isImmModifier()) { 7063 OptionalIdx[Op.getImmTy()] = I; 7064 } else if (!Op.isToken()) { 7065 llvm_unreachable("unexpected operand type"); 7066 } 7067 } 7068 7069 bool IsGFX10Plus = isGFX10Plus(); 7070 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7072 if (IsGFX10Plus) 7073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7077 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7079 if (IsGFX10Plus) 7080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7082 if (!IsGFX10Plus) 7083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7084 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7085 } 7086 7087 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7088 cvtMIMG(Inst, Operands, true); 7089 } 7090 7091 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7092 OptionalImmIndexMap OptionalIdx; 7093 bool IsAtomicReturn = false; 7094 7095 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7096 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7097 if (!Op.isCPol()) 7098 continue; 7099 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7100 break; 7101 } 7102 7103 if (!IsAtomicReturn) { 7104 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7105 if (NewOpc != -1) 7106 Inst.setOpcode(NewOpc); 7107 } 7108 7109 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7110 SIInstrFlags::IsAtomicRet; 7111 7112 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7113 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7114 7115 // Add the register arguments 7116 if (Op.isReg()) { 7117 Op.addRegOperands(Inst, 1); 7118 if (IsAtomicReturn && i == 1) 7119 Op.addRegOperands(Inst, 1); 7120 continue; 7121 } 7122 7123 // Handle the case where soffset is an immediate 7124 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7125 Op.addImmOperands(Inst, 1); 7126 continue; 7127 } 7128 7129 // Handle tokens like 'offen' which are sometimes hard-coded into the 7130 // asm string. There are no MCInst operands for these. 7131 if (Op.isToken()) { 7132 continue; 7133 } 7134 assert(Op.isImm()); 7135 7136 // Handle optional arguments 7137 OptionalIdx[Op.getImmTy()] = i; 7138 } 7139 7140 if ((int)Inst.getNumOperands() <= 7141 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7144 } 7145 7146 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7147 const OperandVector &Operands) { 7148 for (unsigned I = 1; I < Operands.size(); ++I) { 7149 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7150 if (Operand.isReg()) 7151 Operand.addRegOperands(Inst, 1); 7152 } 7153 7154 Inst.addOperand(MCOperand::createImm(1)); // a16 7155 } 7156 7157 //===----------------------------------------------------------------------===// 7158 // smrd 7159 //===----------------------------------------------------------------------===// 7160 7161 bool AMDGPUOperand::isSMRDOffset8() const { 7162 return isImm() && isUInt<8>(getImm()); 7163 } 7164 7165 bool AMDGPUOperand::isSMEMOffset() const { 7166 return isImm(); // Offset range is checked later by validator. 7167 } 7168 7169 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7170 // 32-bit literals are only supported on CI and we only want to use them 7171 // when the offset is > 8-bits. 7172 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7173 } 7174 7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7176 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7177 } 7178 7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7180 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7181 } 7182 7183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7184 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7185 } 7186 7187 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7188 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7189 } 7190 7191 //===----------------------------------------------------------------------===// 7192 // vop3 7193 //===----------------------------------------------------------------------===// 7194 7195 static bool ConvertOmodMul(int64_t &Mul) { 7196 if (Mul != 1 && Mul != 2 && Mul != 4) 7197 return false; 7198 7199 Mul >>= 1; 7200 return true; 7201 } 7202 7203 static bool ConvertOmodDiv(int64_t &Div) { 7204 if (Div == 1) { 7205 Div = 0; 7206 return true; 7207 } 7208 7209 if (Div == 2) { 7210 Div = 3; 7211 return true; 7212 } 7213 7214 return false; 7215 } 7216 7217 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7218 // This is intentional and ensures compatibility with sp3. 7219 // See bug 35397 for details. 7220 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7221 if (BoundCtrl == 0 || BoundCtrl == 1) { 7222 BoundCtrl = 1; 7223 return true; 7224 } 7225 return false; 7226 } 7227 7228 // Note: the order in this table matches the order of operands in AsmString. 7229 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7230 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7231 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7232 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7233 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7234 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7235 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7236 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7237 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7238 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7239 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7240 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7241 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7242 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7243 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7244 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7245 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7246 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7247 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7248 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7249 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7250 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7251 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7252 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7253 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7254 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7255 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7256 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7257 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7258 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7259 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7260 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7261 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7262 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7263 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7264 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7265 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7266 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7267 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7268 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7269 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7270 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7271 }; 7272 7273 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7274 7275 OperandMatchResultTy res = parseOptionalOpr(Operands); 7276 7277 // This is a hack to enable hardcoded mandatory operands which follow 7278 // optional operands. 7279 // 7280 // Current design assumes that all operands after the first optional operand 7281 // are also optional. However implementation of some instructions violates 7282 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7283 // 7284 // To alleviate this problem, we have to (implicitly) parse extra operands 7285 // to make sure autogenerated parser of custom operands never hit hardcoded 7286 // mandatory operands. 7287 7288 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7289 if (res != MatchOperand_Success || 7290 isToken(AsmToken::EndOfStatement)) 7291 break; 7292 7293 trySkipToken(AsmToken::Comma); 7294 res = parseOptionalOpr(Operands); 7295 } 7296 7297 return res; 7298 } 7299 7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7301 OperandMatchResultTy res; 7302 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7303 // try to parse any optional operand here 7304 if (Op.IsBit) { 7305 res = parseNamedBit(Op.Name, Operands, Op.Type); 7306 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7307 res = parseOModOperand(Operands); 7308 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7309 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7310 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7311 res = parseSDWASel(Operands, Op.Name, Op.Type); 7312 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7313 res = parseSDWADstUnused(Operands); 7314 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7315 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7316 Op.Type == AMDGPUOperand::ImmTyNegLo || 7317 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7318 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7319 Op.ConvertResult); 7320 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7321 res = parseDim(Operands); 7322 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7323 res = parseCPol(Operands); 7324 } else { 7325 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7326 } 7327 if (res != MatchOperand_NoMatch) { 7328 return res; 7329 } 7330 } 7331 return MatchOperand_NoMatch; 7332 } 7333 7334 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7335 StringRef Name = getTokenStr(); 7336 if (Name == "mul") { 7337 return parseIntWithPrefix("mul", Operands, 7338 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7339 } 7340 7341 if (Name == "div") { 7342 return parseIntWithPrefix("div", Operands, 7343 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7344 } 7345 7346 return MatchOperand_NoMatch; 7347 } 7348 7349 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7350 cvtVOP3P(Inst, Operands); 7351 7352 int Opc = Inst.getOpcode(); 7353 7354 int SrcNum; 7355 const int Ops[] = { AMDGPU::OpName::src0, 7356 AMDGPU::OpName::src1, 7357 AMDGPU::OpName::src2 }; 7358 for (SrcNum = 0; 7359 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7360 ++SrcNum); 7361 assert(SrcNum > 0); 7362 7363 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7364 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7365 7366 if ((OpSel & (1 << SrcNum)) != 0) { 7367 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7368 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7369 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7370 } 7371 } 7372 7373 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7374 // 1. This operand is input modifiers 7375 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7376 // 2. This is not last operand 7377 && Desc.NumOperands > (OpNum + 1) 7378 // 3. Next operand is register class 7379 && Desc.OpInfo[OpNum + 1].RegClass != -1 7380 // 4. Next register is not tied to any other operand 7381 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7382 } 7383 7384 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7385 { 7386 OptionalImmIndexMap OptionalIdx; 7387 unsigned Opc = Inst.getOpcode(); 7388 7389 unsigned I = 1; 7390 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7391 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7392 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7393 } 7394 7395 for (unsigned E = Operands.size(); I != E; ++I) { 7396 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7397 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7398 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7399 } else if (Op.isInterpSlot() || 7400 Op.isInterpAttr() || 7401 Op.isAttrChan()) { 7402 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7403 } else if (Op.isImmModifier()) { 7404 OptionalIdx[Op.getImmTy()] = I; 7405 } else { 7406 llvm_unreachable("unhandled operand type"); 7407 } 7408 } 7409 7410 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7412 } 7413 7414 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7416 } 7417 7418 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7420 } 7421 } 7422 7423 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7424 OptionalImmIndexMap &OptionalIdx) { 7425 unsigned Opc = Inst.getOpcode(); 7426 7427 unsigned I = 1; 7428 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7429 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7430 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7431 } 7432 7433 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7434 // This instruction has src modifiers 7435 for (unsigned E = Operands.size(); I != E; ++I) { 7436 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7437 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7438 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7439 } else if (Op.isImmModifier()) { 7440 OptionalIdx[Op.getImmTy()] = I; 7441 } else if (Op.isRegOrImm()) { 7442 Op.addRegOrImmOperands(Inst, 1); 7443 } else { 7444 llvm_unreachable("unhandled operand type"); 7445 } 7446 } 7447 } else { 7448 // No src modifiers 7449 for (unsigned E = Operands.size(); I != E; ++I) { 7450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7451 if (Op.isMod()) { 7452 OptionalIdx[Op.getImmTy()] = I; 7453 } else { 7454 Op.addRegOrImmOperands(Inst, 1); 7455 } 7456 } 7457 } 7458 7459 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7461 } 7462 7463 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7464 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7465 } 7466 7467 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7468 // it has src2 register operand that is tied to dst operand 7469 // we don't allow modifiers for this operand in assembler so src2_modifiers 7470 // should be 0. 7471 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7472 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7473 Opc == AMDGPU::V_MAC_F32_e64_vi || 7474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7475 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7476 Opc == AMDGPU::V_MAC_F16_e64_vi || 7477 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7478 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7479 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7480 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7481 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7482 auto it = Inst.begin(); 7483 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7484 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7485 ++it; 7486 // Copy the operand to ensure it's not invalidated when Inst grows. 7487 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7488 } 7489 } 7490 7491 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7492 OptionalImmIndexMap OptionalIdx; 7493 cvtVOP3(Inst, Operands, OptionalIdx); 7494 } 7495 7496 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7497 const OperandVector &Operands) { 7498 OptionalImmIndexMap OptIdx; 7499 const int Opc = Inst.getOpcode(); 7500 const MCInstrDesc &Desc = MII.get(Opc); 7501 7502 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7503 7504 cvtVOP3(Inst, Operands, OptIdx); 7505 7506 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7507 assert(!IsPacked); 7508 Inst.addOperand(Inst.getOperand(0)); 7509 } 7510 7511 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7512 // instruction, and then figure out where to actually put the modifiers 7513 7514 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7515 7516 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7517 if (OpSelHiIdx != -1) { 7518 int DefaultVal = IsPacked ? -1 : 0; 7519 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7520 DefaultVal); 7521 } 7522 7523 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7524 if (NegLoIdx != -1) { 7525 assert(IsPacked); 7526 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7527 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7528 } 7529 7530 const int Ops[] = { AMDGPU::OpName::src0, 7531 AMDGPU::OpName::src1, 7532 AMDGPU::OpName::src2 }; 7533 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7534 AMDGPU::OpName::src1_modifiers, 7535 AMDGPU::OpName::src2_modifiers }; 7536 7537 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7538 7539 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7540 unsigned OpSelHi = 0; 7541 unsigned NegLo = 0; 7542 unsigned NegHi = 0; 7543 7544 if (OpSelHiIdx != -1) { 7545 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7546 } 7547 7548 if (NegLoIdx != -1) { 7549 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7550 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7551 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7552 } 7553 7554 for (int J = 0; J < 3; ++J) { 7555 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7556 if (OpIdx == -1) 7557 break; 7558 7559 uint32_t ModVal = 0; 7560 7561 if ((OpSel & (1 << J)) != 0) 7562 ModVal |= SISrcMods::OP_SEL_0; 7563 7564 if ((OpSelHi & (1 << J)) != 0) 7565 ModVal |= SISrcMods::OP_SEL_1; 7566 7567 if ((NegLo & (1 << J)) != 0) 7568 ModVal |= SISrcMods::NEG; 7569 7570 if ((NegHi & (1 << J)) != 0) 7571 ModVal |= SISrcMods::NEG_HI; 7572 7573 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7574 7575 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7576 } 7577 } 7578 7579 //===----------------------------------------------------------------------===// 7580 // dpp 7581 //===----------------------------------------------------------------------===// 7582 7583 bool AMDGPUOperand::isDPP8() const { 7584 return isImmTy(ImmTyDPP8); 7585 } 7586 7587 bool AMDGPUOperand::isDPPCtrl() const { 7588 using namespace AMDGPU::DPP; 7589 7590 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7591 if (result) { 7592 int64_t Imm = getImm(); 7593 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7594 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7595 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7596 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7597 (Imm == DppCtrl::WAVE_SHL1) || 7598 (Imm == DppCtrl::WAVE_ROL1) || 7599 (Imm == DppCtrl::WAVE_SHR1) || 7600 (Imm == DppCtrl::WAVE_ROR1) || 7601 (Imm == DppCtrl::ROW_MIRROR) || 7602 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7603 (Imm == DppCtrl::BCAST15) || 7604 (Imm == DppCtrl::BCAST31) || 7605 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7606 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7607 } 7608 return false; 7609 } 7610 7611 //===----------------------------------------------------------------------===// 7612 // mAI 7613 //===----------------------------------------------------------------------===// 7614 7615 bool AMDGPUOperand::isBLGP() const { 7616 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7617 } 7618 7619 bool AMDGPUOperand::isCBSZ() const { 7620 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7621 } 7622 7623 bool AMDGPUOperand::isABID() const { 7624 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7625 } 7626 7627 bool AMDGPUOperand::isS16Imm() const { 7628 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7629 } 7630 7631 bool AMDGPUOperand::isU16Imm() const { 7632 return isImm() && isUInt<16>(getImm()); 7633 } 7634 7635 //===----------------------------------------------------------------------===// 7636 // dim 7637 //===----------------------------------------------------------------------===// 7638 7639 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7640 // We want to allow "dim:1D" etc., 7641 // but the initial 1 is tokenized as an integer. 7642 std::string Token; 7643 if (isToken(AsmToken::Integer)) { 7644 SMLoc Loc = getToken().getEndLoc(); 7645 Token = std::string(getTokenStr()); 7646 lex(); 7647 if (getLoc() != Loc) 7648 return false; 7649 } 7650 7651 StringRef Suffix; 7652 if (!parseId(Suffix)) 7653 return false; 7654 Token += Suffix; 7655 7656 StringRef DimId = Token; 7657 if (DimId.startswith("SQ_RSRC_IMG_")) 7658 DimId = DimId.drop_front(12); 7659 7660 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7661 if (!DimInfo) 7662 return false; 7663 7664 Encoding = DimInfo->Encoding; 7665 return true; 7666 } 7667 7668 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7669 if (!isGFX10Plus()) 7670 return MatchOperand_NoMatch; 7671 7672 SMLoc S = getLoc(); 7673 7674 if (!trySkipId("dim", AsmToken::Colon)) 7675 return MatchOperand_NoMatch; 7676 7677 unsigned Encoding; 7678 SMLoc Loc = getLoc(); 7679 if (!parseDimId(Encoding)) { 7680 Error(Loc, "invalid dim value"); 7681 return MatchOperand_ParseFail; 7682 } 7683 7684 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7685 AMDGPUOperand::ImmTyDim)); 7686 return MatchOperand_Success; 7687 } 7688 7689 //===----------------------------------------------------------------------===// 7690 // dpp 7691 //===----------------------------------------------------------------------===// 7692 7693 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7694 SMLoc S = getLoc(); 7695 7696 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7697 return MatchOperand_NoMatch; 7698 7699 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7700 7701 int64_t Sels[8]; 7702 7703 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7704 return MatchOperand_ParseFail; 7705 7706 for (size_t i = 0; i < 8; ++i) { 7707 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7708 return MatchOperand_ParseFail; 7709 7710 SMLoc Loc = getLoc(); 7711 if (getParser().parseAbsoluteExpression(Sels[i])) 7712 return MatchOperand_ParseFail; 7713 if (0 > Sels[i] || 7 < Sels[i]) { 7714 Error(Loc, "expected a 3-bit value"); 7715 return MatchOperand_ParseFail; 7716 } 7717 } 7718 7719 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7720 return MatchOperand_ParseFail; 7721 7722 unsigned DPP8 = 0; 7723 for (size_t i = 0; i < 8; ++i) 7724 DPP8 |= (Sels[i] << (i * 3)); 7725 7726 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7727 return MatchOperand_Success; 7728 } 7729 7730 bool 7731 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7732 const OperandVector &Operands) { 7733 if (Ctrl == "row_newbcast") 7734 return isGFX90A(); 7735 7736 // DPP64 is supported for row_newbcast only. 7737 const MCRegisterInfo *MRI = getMRI(); 7738 if (Operands.size() > 2 && Operands[1]->isReg() && 7739 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7740 return false; 7741 7742 if (Ctrl == "row_share" || 7743 Ctrl == "row_xmask") 7744 return isGFX10Plus(); 7745 7746 if (Ctrl == "wave_shl" || 7747 Ctrl == "wave_shr" || 7748 Ctrl == "wave_rol" || 7749 Ctrl == "wave_ror" || 7750 Ctrl == "row_bcast") 7751 return isVI() || isGFX9(); 7752 7753 return Ctrl == "row_mirror" || 7754 Ctrl == "row_half_mirror" || 7755 Ctrl == "quad_perm" || 7756 Ctrl == "row_shl" || 7757 Ctrl == "row_shr" || 7758 Ctrl == "row_ror"; 7759 } 7760 7761 int64_t 7762 AMDGPUAsmParser::parseDPPCtrlPerm() { 7763 // quad_perm:[%d,%d,%d,%d] 7764 7765 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7766 return -1; 7767 7768 int64_t Val = 0; 7769 for (int i = 0; i < 4; ++i) { 7770 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7771 return -1; 7772 7773 int64_t Temp; 7774 SMLoc Loc = getLoc(); 7775 if (getParser().parseAbsoluteExpression(Temp)) 7776 return -1; 7777 if (Temp < 0 || Temp > 3) { 7778 Error(Loc, "expected a 2-bit value"); 7779 return -1; 7780 } 7781 7782 Val += (Temp << i * 2); 7783 } 7784 7785 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7786 return -1; 7787 7788 return Val; 7789 } 7790 7791 int64_t 7792 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7793 using namespace AMDGPU::DPP; 7794 7795 // sel:%d 7796 7797 int64_t Val; 7798 SMLoc Loc = getLoc(); 7799 7800 if (getParser().parseAbsoluteExpression(Val)) 7801 return -1; 7802 7803 struct DppCtrlCheck { 7804 int64_t Ctrl; 7805 int Lo; 7806 int Hi; 7807 }; 7808 7809 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7810 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7811 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7812 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7813 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7814 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7815 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7816 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7817 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7818 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7819 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7820 .Default({-1, 0, 0}); 7821 7822 bool Valid; 7823 if (Check.Ctrl == -1) { 7824 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7825 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7826 } else { 7827 Valid = Check.Lo <= Val && Val <= Check.Hi; 7828 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7829 } 7830 7831 if (!Valid) { 7832 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7833 return -1; 7834 } 7835 7836 return Val; 7837 } 7838 7839 OperandMatchResultTy 7840 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7841 using namespace AMDGPU::DPP; 7842 7843 if (!isToken(AsmToken::Identifier) || 7844 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7845 return MatchOperand_NoMatch; 7846 7847 SMLoc S = getLoc(); 7848 int64_t Val = -1; 7849 StringRef Ctrl; 7850 7851 parseId(Ctrl); 7852 7853 if (Ctrl == "row_mirror") { 7854 Val = DppCtrl::ROW_MIRROR; 7855 } else if (Ctrl == "row_half_mirror") { 7856 Val = DppCtrl::ROW_HALF_MIRROR; 7857 } else { 7858 if (skipToken(AsmToken::Colon, "expected a colon")) { 7859 if (Ctrl == "quad_perm") { 7860 Val = parseDPPCtrlPerm(); 7861 } else { 7862 Val = parseDPPCtrlSel(Ctrl); 7863 } 7864 } 7865 } 7866 7867 if (Val == -1) 7868 return MatchOperand_ParseFail; 7869 7870 Operands.push_back( 7871 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7872 return MatchOperand_Success; 7873 } 7874 7875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7876 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7877 } 7878 7879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7881 } 7882 7883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7884 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7885 } 7886 7887 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7888 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7889 } 7890 7891 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7892 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7893 } 7894 7895 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7896 OptionalImmIndexMap OptionalIdx; 7897 7898 unsigned I = 1; 7899 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7900 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7901 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7902 } 7903 7904 int Fi = 0; 7905 for (unsigned E = Operands.size(); I != E; ++I) { 7906 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7907 MCOI::TIED_TO); 7908 if (TiedTo != -1) { 7909 assert((unsigned)TiedTo < Inst.getNumOperands()); 7910 // handle tied old or src2 for MAC instructions 7911 Inst.addOperand(Inst.getOperand(TiedTo)); 7912 } 7913 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7914 // Add the register arguments 7915 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7916 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7917 // Skip it. 7918 continue; 7919 } 7920 7921 if (IsDPP8) { 7922 if (Op.isDPP8()) { 7923 Op.addImmOperands(Inst, 1); 7924 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7925 Op.addRegWithFPInputModsOperands(Inst, 2); 7926 } else if (Op.isFI()) { 7927 Fi = Op.getImm(); 7928 } else if (Op.isReg()) { 7929 Op.addRegOperands(Inst, 1); 7930 } else { 7931 llvm_unreachable("Invalid operand type"); 7932 } 7933 } else { 7934 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7935 Op.addRegWithFPInputModsOperands(Inst, 2); 7936 } else if (Op.isDPPCtrl()) { 7937 Op.addImmOperands(Inst, 1); 7938 } else if (Op.isImm()) { 7939 // Handle optional arguments 7940 OptionalIdx[Op.getImmTy()] = I; 7941 } else { 7942 llvm_unreachable("Invalid operand type"); 7943 } 7944 } 7945 } 7946 7947 if (IsDPP8) { 7948 using namespace llvm::AMDGPU::DPP; 7949 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7950 } else { 7951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7954 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7956 } 7957 } 7958 } 7959 7960 //===----------------------------------------------------------------------===// 7961 // sdwa 7962 //===----------------------------------------------------------------------===// 7963 7964 OperandMatchResultTy 7965 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7966 AMDGPUOperand::ImmTy Type) { 7967 using namespace llvm::AMDGPU::SDWA; 7968 7969 SMLoc S = getLoc(); 7970 StringRef Value; 7971 OperandMatchResultTy res; 7972 7973 SMLoc StringLoc; 7974 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7975 if (res != MatchOperand_Success) { 7976 return res; 7977 } 7978 7979 int64_t Int; 7980 Int = StringSwitch<int64_t>(Value) 7981 .Case("BYTE_0", SdwaSel::BYTE_0) 7982 .Case("BYTE_1", SdwaSel::BYTE_1) 7983 .Case("BYTE_2", SdwaSel::BYTE_2) 7984 .Case("BYTE_3", SdwaSel::BYTE_3) 7985 .Case("WORD_0", SdwaSel::WORD_0) 7986 .Case("WORD_1", SdwaSel::WORD_1) 7987 .Case("DWORD", SdwaSel::DWORD) 7988 .Default(0xffffffff); 7989 7990 if (Int == 0xffffffff) { 7991 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7992 return MatchOperand_ParseFail; 7993 } 7994 7995 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7996 return MatchOperand_Success; 7997 } 7998 7999 OperandMatchResultTy 8000 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8001 using namespace llvm::AMDGPU::SDWA; 8002 8003 SMLoc S = getLoc(); 8004 StringRef Value; 8005 OperandMatchResultTy res; 8006 8007 SMLoc StringLoc; 8008 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8009 if (res != MatchOperand_Success) { 8010 return res; 8011 } 8012 8013 int64_t Int; 8014 Int = StringSwitch<int64_t>(Value) 8015 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8016 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8017 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8018 .Default(0xffffffff); 8019 8020 if (Int == 0xffffffff) { 8021 Error(StringLoc, "invalid dst_unused value"); 8022 return MatchOperand_ParseFail; 8023 } 8024 8025 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8026 return MatchOperand_Success; 8027 } 8028 8029 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8030 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8031 } 8032 8033 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8034 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8035 } 8036 8037 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8038 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8039 } 8040 8041 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8042 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8043 } 8044 8045 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8046 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8047 } 8048 8049 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8050 uint64_t BasicInstType, 8051 bool SkipDstVcc, 8052 bool SkipSrcVcc) { 8053 using namespace llvm::AMDGPU::SDWA; 8054 8055 OptionalImmIndexMap OptionalIdx; 8056 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8057 bool SkippedVcc = false; 8058 8059 unsigned I = 1; 8060 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8061 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8062 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8063 } 8064 8065 for (unsigned E = Operands.size(); I != E; ++I) { 8066 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8067 if (SkipVcc && !SkippedVcc && Op.isReg() && 8068 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8069 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8070 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8071 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8072 // Skip VCC only if we didn't skip it on previous iteration. 8073 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8074 if (BasicInstType == SIInstrFlags::VOP2 && 8075 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8076 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8077 SkippedVcc = true; 8078 continue; 8079 } else if (BasicInstType == SIInstrFlags::VOPC && 8080 Inst.getNumOperands() == 0) { 8081 SkippedVcc = true; 8082 continue; 8083 } 8084 } 8085 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8086 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8087 } else if (Op.isImm()) { 8088 // Handle optional arguments 8089 OptionalIdx[Op.getImmTy()] = I; 8090 } else { 8091 llvm_unreachable("Invalid operand type"); 8092 } 8093 SkippedVcc = false; 8094 } 8095 8096 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8097 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8098 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8099 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8100 switch (BasicInstType) { 8101 case SIInstrFlags::VOP1: 8102 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8103 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8105 } 8106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8109 break; 8110 8111 case SIInstrFlags::VOP2: 8112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8113 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8115 } 8116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8120 break; 8121 8122 case SIInstrFlags::VOPC: 8123 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8127 break; 8128 8129 default: 8130 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8131 } 8132 } 8133 8134 // special case v_mac_{f16, f32}: 8135 // it has src2 register operand that is tied to dst operand 8136 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8137 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8138 auto it = Inst.begin(); 8139 std::advance( 8140 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8141 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8142 } 8143 } 8144 8145 //===----------------------------------------------------------------------===// 8146 // mAI 8147 //===----------------------------------------------------------------------===// 8148 8149 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8150 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8151 } 8152 8153 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8154 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8155 } 8156 8157 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8158 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8159 } 8160 8161 /// Force static initialization. 8162 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8163 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8164 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8165 } 8166 8167 #define GET_REGISTER_MATCHER 8168 #define GET_MATCHER_IMPLEMENTATION 8169 #define GET_MNEMONIC_SPELL_CHECKER 8170 #define GET_MNEMONIC_CHECKER 8171 #include "AMDGPUGenAsmMatcher.inc" 8172 8173 // This fuction should be defined after auto-generated include so that we have 8174 // MatchClassKind enum defined 8175 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8176 unsigned Kind) { 8177 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8178 // But MatchInstructionImpl() expects to meet token and fails to validate 8179 // operand. This method checks if we are given immediate operand but expect to 8180 // get corresponding token. 8181 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8182 switch (Kind) { 8183 case MCK_addr64: 8184 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8185 case MCK_gds: 8186 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8187 case MCK_lds: 8188 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8189 case MCK_idxen: 8190 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8191 case MCK_offen: 8192 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8193 case MCK_SSrcB32: 8194 // When operands have expression values, they will return true for isToken, 8195 // because it is not possible to distinguish between a token and an 8196 // expression at parse time. MatchInstructionImpl() will always try to 8197 // match an operand as a token, when isToken returns true, and when the 8198 // name of the expression is not a valid token, the match will fail, 8199 // so we need to handle it here. 8200 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8201 case MCK_SSrcF32: 8202 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8203 case MCK_SoppBrTarget: 8204 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8205 case MCK_VReg32OrOff: 8206 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8207 case MCK_InterpSlot: 8208 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8209 case MCK_Attr: 8210 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8211 case MCK_AttrChan: 8212 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8213 case MCK_ImmSMEMOffset: 8214 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8215 case MCK_SReg_64: 8216 case MCK_SReg_64_XEXEC: 8217 // Null is defined as a 32-bit register but 8218 // it should also be enabled with 64-bit operands. 8219 // The following code enables it for SReg_64 operands 8220 // used as source and destination. Remaining source 8221 // operands are handled in isInlinableImm. 8222 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8223 default: 8224 return Match_InvalidOperand; 8225 } 8226 } 8227 8228 //===----------------------------------------------------------------------===// 8229 // endpgm 8230 //===----------------------------------------------------------------------===// 8231 8232 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8233 SMLoc S = getLoc(); 8234 int64_t Imm = 0; 8235 8236 if (!parseExpr(Imm)) { 8237 // The operand is optional, if not present default to 0 8238 Imm = 0; 8239 } 8240 8241 if (!isUInt<16>(Imm)) { 8242 Error(S, "expected a 16-bit value"); 8243 return MatchOperand_ParseFail; 8244 } 8245 8246 Operands.push_back( 8247 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8248 return MatchOperand_Success; 8249 } 8250 8251 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8252