1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool isSI() const { 1333 return AMDGPU::isSI(getSTI()); 1334 } 1335 1336 bool isCI() const { 1337 return AMDGPU::isCI(getSTI()); 1338 } 1339 1340 bool isVI() const { 1341 return AMDGPU::isVI(getSTI()); 1342 } 1343 1344 bool isGFX9() const { 1345 return AMDGPU::isGFX9(getSTI()); 1346 } 1347 1348 bool isGFX90A() const { 1349 return AMDGPU::isGFX90A(getSTI()); 1350 } 1351 1352 bool isGFX9Plus() const { 1353 return AMDGPU::isGFX9Plus(getSTI()); 1354 } 1355 1356 bool isGFX10() const { 1357 return AMDGPU::isGFX10(getSTI()); 1358 } 1359 1360 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1361 1362 bool isGFX10_BEncoding() const { 1363 return AMDGPU::isGFX10_BEncoding(getSTI()); 1364 } 1365 1366 bool hasInv2PiInlineImm() const { 1367 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1368 } 1369 1370 bool hasFlatOffsets() const { 1371 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1372 } 1373 1374 bool hasSGPR102_SGPR103() const { 1375 return !isVI() && !isGFX9(); 1376 } 1377 1378 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1379 1380 bool hasIntClamp() const { 1381 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1382 } 1383 1384 AMDGPUTargetStreamer &getTargetStreamer() { 1385 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1386 return static_cast<AMDGPUTargetStreamer &>(TS); 1387 } 1388 1389 const MCRegisterInfo *getMRI() const { 1390 // We need this const_cast because for some reason getContext() is not const 1391 // in MCAsmParser. 1392 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1393 } 1394 1395 const MCInstrInfo *getMII() const { 1396 return &MII; 1397 } 1398 1399 const FeatureBitset &getFeatureBits() const { 1400 return getSTI().getFeatureBits(); 1401 } 1402 1403 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1404 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1405 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1406 1407 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1408 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1409 bool isForcedDPP() const { return ForcedDPP; } 1410 bool isForcedSDWA() const { return ForcedSDWA; } 1411 ArrayRef<unsigned> getMatchedVariants() const; 1412 StringRef getMatchedVariantName() const; 1413 1414 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1415 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1416 bool RestoreOnFailure); 1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1418 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1419 SMLoc &EndLoc) override; 1420 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1421 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1422 unsigned Kind) override; 1423 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1424 OperandVector &Operands, MCStreamer &Out, 1425 uint64_t &ErrorInfo, 1426 bool MatchingInlineAsm) override; 1427 bool ParseDirective(AsmToken DirectiveID) override; 1428 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1429 OperandMode Mode = OperandMode_Default); 1430 StringRef parseMnemonicSuffix(StringRef Name); 1431 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1432 SMLoc NameLoc, OperandVector &Operands) override; 1433 //bool ProcessInstruction(MCInst &Inst); 1434 1435 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1436 1437 OperandMatchResultTy 1438 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1439 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1440 bool (*ConvertResult)(int64_t &) = nullptr); 1441 1442 OperandMatchResultTy 1443 parseOperandArrayWithPrefix(const char *Prefix, 1444 OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t&) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseNamedBit(StringRef Name, OperandVector &Operands, 1450 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1451 OperandMatchResultTy parseCPol(OperandVector &Operands); 1452 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1453 StringRef &Value, 1454 SMLoc &StringLoc); 1455 1456 bool isModifier(); 1457 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1458 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1459 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1460 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool parseSP3NegModifier(); 1462 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1463 OperandMatchResultTy parseReg(OperandVector &Operands); 1464 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1465 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1466 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1467 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1468 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1469 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1470 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1471 OperandMatchResultTy parseUfmt(int64_t &Format); 1472 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1473 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1474 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1475 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1476 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1477 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1478 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1479 1480 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1481 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1482 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1483 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1484 1485 bool parseCnt(int64_t &IntVal); 1486 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1487 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1488 1489 private: 1490 struct OperandInfoTy { 1491 SMLoc Loc; 1492 int64_t Id; 1493 bool IsSymbolic = false; 1494 bool IsDefined = false; 1495 1496 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1497 }; 1498 1499 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1500 bool validateSendMsg(const OperandInfoTy &Msg, 1501 const OperandInfoTy &Op, 1502 const OperandInfoTy &Stream); 1503 1504 bool parseHwregBody(OperandInfoTy &HwReg, 1505 OperandInfoTy &Offset, 1506 OperandInfoTy &Width); 1507 bool validateHwreg(const OperandInfoTy &HwReg, 1508 const OperandInfoTy &Offset, 1509 const OperandInfoTy &Width); 1510 1511 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1512 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1513 1514 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1515 const OperandVector &Operands) const; 1516 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1517 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1518 SMLoc getLitLoc(const OperandVector &Operands) const; 1519 SMLoc getConstLoc(const OperandVector &Operands) const; 1520 1521 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1522 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1523 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1524 bool validateSOPLiteral(const MCInst &Inst) const; 1525 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1527 bool validateIntClampSupported(const MCInst &Inst); 1528 bool validateMIMGAtomicDMask(const MCInst &Inst); 1529 bool validateMIMGGatherDMask(const MCInst &Inst); 1530 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1531 bool validateMIMGDataSize(const MCInst &Inst); 1532 bool validateMIMGAddrSize(const MCInst &Inst); 1533 bool validateMIMGD16(const MCInst &Inst); 1534 bool validateMIMGDim(const MCInst &Inst); 1535 bool validateMIMGMSAA(const MCInst &Inst); 1536 bool validateOpSel(const MCInst &Inst); 1537 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1538 bool validateVccOperand(unsigned Reg) const; 1539 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateAGPRLdSt(const MCInst &Inst) const; 1542 bool validateVGPRAlign(const MCInst &Inst) const; 1543 bool validateDivScale(const MCInst &Inst); 1544 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1545 const SMLoc &IDLoc); 1546 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1547 unsigned getConstantBusLimit(unsigned Opcode) const; 1548 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1549 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1550 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1551 1552 bool isSupportedMnemo(StringRef Mnemo, 1553 const FeatureBitset &FBS); 1554 bool isSupportedMnemo(StringRef Mnemo, 1555 const FeatureBitset &FBS, 1556 ArrayRef<unsigned> Variants); 1557 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1558 1559 bool isId(const StringRef Id) const; 1560 bool isId(const AsmToken &Token, const StringRef Id) const; 1561 bool isToken(const AsmToken::TokenKind Kind) const; 1562 bool trySkipId(const StringRef Id); 1563 bool trySkipId(const StringRef Pref, const StringRef Id); 1564 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1565 bool trySkipToken(const AsmToken::TokenKind Kind); 1566 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1567 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1568 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1569 1570 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1571 AsmToken::TokenKind getTokenKind() const; 1572 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1573 bool parseExpr(OperandVector &Operands); 1574 StringRef getTokenStr() const; 1575 AsmToken peekToken(); 1576 AsmToken getToken() const; 1577 SMLoc getLoc() const; 1578 void lex(); 1579 1580 public: 1581 void onBeginOfFile() override; 1582 1583 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1584 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1585 1586 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1587 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1588 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1589 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1590 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1591 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1592 1593 bool parseSwizzleOperand(int64_t &Op, 1594 const unsigned MinVal, 1595 const unsigned MaxVal, 1596 const StringRef ErrMsg, 1597 SMLoc &Loc); 1598 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1599 const unsigned MinVal, 1600 const unsigned MaxVal, 1601 const StringRef ErrMsg); 1602 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1603 bool parseSwizzleOffset(int64_t &Imm); 1604 bool parseSwizzleMacro(int64_t &Imm); 1605 bool parseSwizzleQuadPerm(int64_t &Imm); 1606 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1607 bool parseSwizzleBroadcast(int64_t &Imm); 1608 bool parseSwizzleSwap(int64_t &Imm); 1609 bool parseSwizzleReverse(int64_t &Imm); 1610 1611 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1612 int64_t parseGPRIdxMacro(); 1613 1614 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1615 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1616 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1617 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1618 1619 AMDGPUOperand::Ptr defaultCPol() const; 1620 1621 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1622 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1623 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1624 AMDGPUOperand::Ptr defaultFlatOffset() const; 1625 1626 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1627 1628 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1629 OptionalImmIndexMap &OptionalIdx); 1630 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1631 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1632 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1633 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1634 OptionalImmIndexMap &OptionalIdx); 1635 1636 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1637 1638 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1639 bool IsAtomic = false); 1640 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1641 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1642 1643 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1644 1645 bool parseDimId(unsigned &Encoding); 1646 OperandMatchResultTy parseDim(OperandVector &Operands); 1647 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1648 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1649 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1650 int64_t parseDPPCtrlSel(StringRef Ctrl); 1651 int64_t parseDPPCtrlPerm(); 1652 AMDGPUOperand::Ptr defaultRowMask() const; 1653 AMDGPUOperand::Ptr defaultBankMask() const; 1654 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1655 AMDGPUOperand::Ptr defaultFI() const; 1656 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1657 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1658 1659 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1660 AMDGPUOperand::ImmTy Type); 1661 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1662 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1663 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1664 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1667 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1668 uint64_t BasicInstType, 1669 bool SkipDstVcc = false, 1670 bool SkipSrcVcc = false); 1671 1672 AMDGPUOperand::Ptr defaultBLGP() const; 1673 AMDGPUOperand::Ptr defaultCBSZ() const; 1674 AMDGPUOperand::Ptr defaultABID() const; 1675 1676 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1677 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1678 }; 1679 1680 struct OptionalOperand { 1681 const char *Name; 1682 AMDGPUOperand::ImmTy Type; 1683 bool IsBit; 1684 bool (*ConvertResult)(int64_t&); 1685 }; 1686 1687 } // end anonymous namespace 1688 1689 // May be called with integer type with equivalent bitwidth. 1690 static const fltSemantics *getFltSemantics(unsigned Size) { 1691 switch (Size) { 1692 case 4: 1693 return &APFloat::IEEEsingle(); 1694 case 8: 1695 return &APFloat::IEEEdouble(); 1696 case 2: 1697 return &APFloat::IEEEhalf(); 1698 default: 1699 llvm_unreachable("unsupported fp type"); 1700 } 1701 } 1702 1703 static const fltSemantics *getFltSemantics(MVT VT) { 1704 return getFltSemantics(VT.getSizeInBits() / 8); 1705 } 1706 1707 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1708 switch (OperandType) { 1709 case AMDGPU::OPERAND_REG_IMM_INT32: 1710 case AMDGPU::OPERAND_REG_IMM_FP32: 1711 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1712 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1713 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1714 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1715 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1716 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1717 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1718 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1719 return &APFloat::IEEEsingle(); 1720 case AMDGPU::OPERAND_REG_IMM_INT64: 1721 case AMDGPU::OPERAND_REG_IMM_FP64: 1722 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1723 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1724 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1725 return &APFloat::IEEEdouble(); 1726 case AMDGPU::OPERAND_REG_IMM_INT16: 1727 case AMDGPU::OPERAND_REG_IMM_FP16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1729 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1736 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1737 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1738 return &APFloat::IEEEhalf(); 1739 default: 1740 llvm_unreachable("unsupported fp type"); 1741 } 1742 } 1743 1744 //===----------------------------------------------------------------------===// 1745 // Operand 1746 //===----------------------------------------------------------------------===// 1747 1748 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1749 bool Lost; 1750 1751 // Convert literal to single precision 1752 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1753 APFloat::rmNearestTiesToEven, 1754 &Lost); 1755 // We allow precision lost but not overflow or underflow 1756 if (Status != APFloat::opOK && 1757 Lost && 1758 ((Status & APFloat::opOverflow) != 0 || 1759 (Status & APFloat::opUnderflow) != 0)) { 1760 return false; 1761 } 1762 1763 return true; 1764 } 1765 1766 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1767 return isUIntN(Size, Val) || isIntN(Size, Val); 1768 } 1769 1770 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1771 if (VT.getScalarType() == MVT::i16) { 1772 // FP immediate values are broken. 1773 return isInlinableIntLiteral(Val); 1774 } 1775 1776 // f16/v2f16 operands work correctly for all values. 1777 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1778 } 1779 1780 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1781 1782 // This is a hack to enable named inline values like 1783 // shared_base with both 32-bit and 64-bit operands. 1784 // Note that these values are defined as 1785 // 32-bit operands only. 1786 if (isInlineValue()) { 1787 return true; 1788 } 1789 1790 if (!isImmTy(ImmTyNone)) { 1791 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1792 return false; 1793 } 1794 // TODO: We should avoid using host float here. It would be better to 1795 // check the float bit values which is what a few other places do. 1796 // We've had bot failures before due to weird NaN support on mips hosts. 1797 1798 APInt Literal(64, Imm.Val); 1799 1800 if (Imm.IsFPImm) { // We got fp literal token 1801 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1802 return AMDGPU::isInlinableLiteral64(Imm.Val, 1803 AsmParser->hasInv2PiInlineImm()); 1804 } 1805 1806 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1807 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1808 return false; 1809 1810 if (type.getScalarSizeInBits() == 16) { 1811 return isInlineableLiteralOp16( 1812 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1813 type, AsmParser->hasInv2PiInlineImm()); 1814 } 1815 1816 // Check if single precision literal is inlinable 1817 return AMDGPU::isInlinableLiteral32( 1818 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1819 AsmParser->hasInv2PiInlineImm()); 1820 } 1821 1822 // We got int literal token. 1823 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1824 return AMDGPU::isInlinableLiteral64(Imm.Val, 1825 AsmParser->hasInv2PiInlineImm()); 1826 } 1827 1828 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1829 return false; 1830 } 1831 1832 if (type.getScalarSizeInBits() == 16) { 1833 return isInlineableLiteralOp16( 1834 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1835 type, AsmParser->hasInv2PiInlineImm()); 1836 } 1837 1838 return AMDGPU::isInlinableLiteral32( 1839 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1840 AsmParser->hasInv2PiInlineImm()); 1841 } 1842 1843 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1844 // Check that this immediate can be added as literal 1845 if (!isImmTy(ImmTyNone)) { 1846 return false; 1847 } 1848 1849 if (!Imm.IsFPImm) { 1850 // We got int literal token. 1851 1852 if (type == MVT::f64 && hasFPModifiers()) { 1853 // Cannot apply fp modifiers to int literals preserving the same semantics 1854 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1855 // disable these cases. 1856 return false; 1857 } 1858 1859 unsigned Size = type.getSizeInBits(); 1860 if (Size == 64) 1861 Size = 32; 1862 1863 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1864 // types. 1865 return isSafeTruncation(Imm.Val, Size); 1866 } 1867 1868 // We got fp literal token 1869 if (type == MVT::f64) { // Expected 64-bit fp operand 1870 // We would set low 64-bits of literal to zeroes but we accept this literals 1871 return true; 1872 } 1873 1874 if (type == MVT::i64) { // Expected 64-bit int operand 1875 // We don't allow fp literals in 64-bit integer instructions. It is 1876 // unclear how we should encode them. 1877 return false; 1878 } 1879 1880 // We allow fp literals with f16x2 operands assuming that the specified 1881 // literal goes into the lower half and the upper half is zero. We also 1882 // require that the literal may be losslesly converted to f16. 1883 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1884 (type == MVT::v2i16)? MVT::i16 : 1885 (type == MVT::v2f32)? MVT::f32 : type; 1886 1887 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1888 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1889 } 1890 1891 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1892 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1893 } 1894 1895 bool AMDGPUOperand::isVRegWithInputMods() const { 1896 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1897 // GFX90A allows DPP on 64-bit operands. 1898 (isRegClass(AMDGPU::VReg_64RegClassID) && 1899 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1900 } 1901 1902 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1903 if (AsmParser->isVI()) 1904 return isVReg32(); 1905 else if (AsmParser->isGFX9Plus()) 1906 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1907 else 1908 return false; 1909 } 1910 1911 bool AMDGPUOperand::isSDWAFP16Operand() const { 1912 return isSDWAOperand(MVT::f16); 1913 } 1914 1915 bool AMDGPUOperand::isSDWAFP32Operand() const { 1916 return isSDWAOperand(MVT::f32); 1917 } 1918 1919 bool AMDGPUOperand::isSDWAInt16Operand() const { 1920 return isSDWAOperand(MVT::i16); 1921 } 1922 1923 bool AMDGPUOperand::isSDWAInt32Operand() const { 1924 return isSDWAOperand(MVT::i32); 1925 } 1926 1927 bool AMDGPUOperand::isBoolReg() const { 1928 auto FB = AsmParser->getFeatureBits(); 1929 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1930 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1931 } 1932 1933 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1934 { 1935 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1936 assert(Size == 2 || Size == 4 || Size == 8); 1937 1938 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1939 1940 if (Imm.Mods.Abs) { 1941 Val &= ~FpSignMask; 1942 } 1943 if (Imm.Mods.Neg) { 1944 Val ^= FpSignMask; 1945 } 1946 1947 return Val; 1948 } 1949 1950 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1951 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1952 Inst.getNumOperands())) { 1953 addLiteralImmOperand(Inst, Imm.Val, 1954 ApplyModifiers & 1955 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1956 } else { 1957 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1958 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1959 setImmKindNone(); 1960 } 1961 } 1962 1963 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1964 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1965 auto OpNum = Inst.getNumOperands(); 1966 // Check that this operand accepts literals 1967 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1968 1969 if (ApplyModifiers) { 1970 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1971 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1972 Val = applyInputFPModifiers(Val, Size); 1973 } 1974 1975 APInt Literal(64, Val); 1976 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1977 1978 if (Imm.IsFPImm) { // We got fp literal token 1979 switch (OpTy) { 1980 case AMDGPU::OPERAND_REG_IMM_INT64: 1981 case AMDGPU::OPERAND_REG_IMM_FP64: 1982 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1983 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1984 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1985 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1986 AsmParser->hasInv2PiInlineImm())) { 1987 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1988 setImmKindConst(); 1989 return; 1990 } 1991 1992 // Non-inlineable 1993 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1994 // For fp operands we check if low 32 bits are zeros 1995 if (Literal.getLoBits(32) != 0) { 1996 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1997 "Can't encode literal as exact 64-bit floating-point operand. " 1998 "Low 32-bits will be set to zero"); 1999 } 2000 2001 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2002 setImmKindLiteral(); 2003 return; 2004 } 2005 2006 // We don't allow fp literals in 64-bit integer instructions. It is 2007 // unclear how we should encode them. This case should be checked earlier 2008 // in predicate methods (isLiteralImm()) 2009 llvm_unreachable("fp literal in 64-bit integer instruction."); 2010 2011 case AMDGPU::OPERAND_REG_IMM_INT32: 2012 case AMDGPU::OPERAND_REG_IMM_FP32: 2013 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2014 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2015 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2016 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2017 case AMDGPU::OPERAND_REG_IMM_INT16: 2018 case AMDGPU::OPERAND_REG_IMM_FP16: 2019 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2020 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2021 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2022 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2026 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2027 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2028 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2030 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2031 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2032 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2033 bool lost; 2034 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2035 // Convert literal to single precision 2036 FPLiteral.convert(*getOpFltSemantics(OpTy), 2037 APFloat::rmNearestTiesToEven, &lost); 2038 // We allow precision lost but not overflow or underflow. This should be 2039 // checked earlier in isLiteralImm() 2040 2041 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2042 Inst.addOperand(MCOperand::createImm(ImmVal)); 2043 setImmKindLiteral(); 2044 return; 2045 } 2046 default: 2047 llvm_unreachable("invalid operand size"); 2048 } 2049 2050 return; 2051 } 2052 2053 // We got int literal token. 2054 // Only sign extend inline immediates. 2055 switch (OpTy) { 2056 case AMDGPU::OPERAND_REG_IMM_INT32: 2057 case AMDGPU::OPERAND_REG_IMM_FP32: 2058 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2059 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2060 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2061 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2062 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2063 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2064 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2066 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2067 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2068 if (isSafeTruncation(Val, 32) && 2069 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2070 AsmParser->hasInv2PiInlineImm())) { 2071 Inst.addOperand(MCOperand::createImm(Val)); 2072 setImmKindConst(); 2073 return; 2074 } 2075 2076 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2077 setImmKindLiteral(); 2078 return; 2079 2080 case AMDGPU::OPERAND_REG_IMM_INT64: 2081 case AMDGPU::OPERAND_REG_IMM_FP64: 2082 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2083 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2085 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2086 Inst.addOperand(MCOperand::createImm(Val)); 2087 setImmKindConst(); 2088 return; 2089 } 2090 2091 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2092 setImmKindLiteral(); 2093 return; 2094 2095 case AMDGPU::OPERAND_REG_IMM_INT16: 2096 case AMDGPU::OPERAND_REG_IMM_FP16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2098 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2101 if (isSafeTruncation(Val, 16) && 2102 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2103 AsmParser->hasInv2PiInlineImm())) { 2104 Inst.addOperand(MCOperand::createImm(Val)); 2105 setImmKindConst(); 2106 return; 2107 } 2108 2109 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2110 setImmKindLiteral(); 2111 return; 2112 2113 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2114 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2115 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2117 assert(isSafeTruncation(Val, 16)); 2118 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2119 AsmParser->hasInv2PiInlineImm())); 2120 2121 Inst.addOperand(MCOperand::createImm(Val)); 2122 return; 2123 } 2124 default: 2125 llvm_unreachable("invalid operand size"); 2126 } 2127 } 2128 2129 template <unsigned Bitwidth> 2130 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2131 APInt Literal(64, Imm.Val); 2132 setImmKindNone(); 2133 2134 if (!Imm.IsFPImm) { 2135 // We got int literal token. 2136 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2137 return; 2138 } 2139 2140 bool Lost; 2141 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2142 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2143 APFloat::rmNearestTiesToEven, &Lost); 2144 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2145 } 2146 2147 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2148 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2149 } 2150 2151 static bool isInlineValue(unsigned Reg) { 2152 switch (Reg) { 2153 case AMDGPU::SRC_SHARED_BASE: 2154 case AMDGPU::SRC_SHARED_LIMIT: 2155 case AMDGPU::SRC_PRIVATE_BASE: 2156 case AMDGPU::SRC_PRIVATE_LIMIT: 2157 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2158 return true; 2159 case AMDGPU::SRC_VCCZ: 2160 case AMDGPU::SRC_EXECZ: 2161 case AMDGPU::SRC_SCC: 2162 return true; 2163 case AMDGPU::SGPR_NULL: 2164 return true; 2165 default: 2166 return false; 2167 } 2168 } 2169 2170 bool AMDGPUOperand::isInlineValue() const { 2171 return isRegKind() && ::isInlineValue(getReg()); 2172 } 2173 2174 //===----------------------------------------------------------------------===// 2175 // AsmParser 2176 //===----------------------------------------------------------------------===// 2177 2178 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2179 if (Is == IS_VGPR) { 2180 switch (RegWidth) { 2181 default: return -1; 2182 case 1: return AMDGPU::VGPR_32RegClassID; 2183 case 2: return AMDGPU::VReg_64RegClassID; 2184 case 3: return AMDGPU::VReg_96RegClassID; 2185 case 4: return AMDGPU::VReg_128RegClassID; 2186 case 5: return AMDGPU::VReg_160RegClassID; 2187 case 6: return AMDGPU::VReg_192RegClassID; 2188 case 8: return AMDGPU::VReg_256RegClassID; 2189 case 16: return AMDGPU::VReg_512RegClassID; 2190 case 32: return AMDGPU::VReg_1024RegClassID; 2191 } 2192 } else if (Is == IS_TTMP) { 2193 switch (RegWidth) { 2194 default: return -1; 2195 case 1: return AMDGPU::TTMP_32RegClassID; 2196 case 2: return AMDGPU::TTMP_64RegClassID; 2197 case 4: return AMDGPU::TTMP_128RegClassID; 2198 case 8: return AMDGPU::TTMP_256RegClassID; 2199 case 16: return AMDGPU::TTMP_512RegClassID; 2200 } 2201 } else if (Is == IS_SGPR) { 2202 switch (RegWidth) { 2203 default: return -1; 2204 case 1: return AMDGPU::SGPR_32RegClassID; 2205 case 2: return AMDGPU::SGPR_64RegClassID; 2206 case 3: return AMDGPU::SGPR_96RegClassID; 2207 case 4: return AMDGPU::SGPR_128RegClassID; 2208 case 5: return AMDGPU::SGPR_160RegClassID; 2209 case 6: return AMDGPU::SGPR_192RegClassID; 2210 case 8: return AMDGPU::SGPR_256RegClassID; 2211 case 16: return AMDGPU::SGPR_512RegClassID; 2212 } 2213 } else if (Is == IS_AGPR) { 2214 switch (RegWidth) { 2215 default: return -1; 2216 case 1: return AMDGPU::AGPR_32RegClassID; 2217 case 2: return AMDGPU::AReg_64RegClassID; 2218 case 3: return AMDGPU::AReg_96RegClassID; 2219 case 4: return AMDGPU::AReg_128RegClassID; 2220 case 5: return AMDGPU::AReg_160RegClassID; 2221 case 6: return AMDGPU::AReg_192RegClassID; 2222 case 8: return AMDGPU::AReg_256RegClassID; 2223 case 16: return AMDGPU::AReg_512RegClassID; 2224 case 32: return AMDGPU::AReg_1024RegClassID; 2225 } 2226 } 2227 return -1; 2228 } 2229 2230 static unsigned getSpecialRegForName(StringRef RegName) { 2231 return StringSwitch<unsigned>(RegName) 2232 .Case("exec", AMDGPU::EXEC) 2233 .Case("vcc", AMDGPU::VCC) 2234 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2235 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2236 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2237 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2238 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2239 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2240 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2241 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2242 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2243 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2244 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2245 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2246 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2247 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2248 .Case("m0", AMDGPU::M0) 2249 .Case("vccz", AMDGPU::SRC_VCCZ) 2250 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2251 .Case("execz", AMDGPU::SRC_EXECZ) 2252 .Case("src_execz", AMDGPU::SRC_EXECZ) 2253 .Case("scc", AMDGPU::SRC_SCC) 2254 .Case("src_scc", AMDGPU::SRC_SCC) 2255 .Case("tba", AMDGPU::TBA) 2256 .Case("tma", AMDGPU::TMA) 2257 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2258 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2259 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2260 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2261 .Case("vcc_lo", AMDGPU::VCC_LO) 2262 .Case("vcc_hi", AMDGPU::VCC_HI) 2263 .Case("exec_lo", AMDGPU::EXEC_LO) 2264 .Case("exec_hi", AMDGPU::EXEC_HI) 2265 .Case("tma_lo", AMDGPU::TMA_LO) 2266 .Case("tma_hi", AMDGPU::TMA_HI) 2267 .Case("tba_lo", AMDGPU::TBA_LO) 2268 .Case("tba_hi", AMDGPU::TBA_HI) 2269 .Case("pc", AMDGPU::PC_REG) 2270 .Case("null", AMDGPU::SGPR_NULL) 2271 .Default(AMDGPU::NoRegister); 2272 } 2273 2274 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2275 SMLoc &EndLoc, bool RestoreOnFailure) { 2276 auto R = parseRegister(); 2277 if (!R) return true; 2278 assert(R->isReg()); 2279 RegNo = R->getReg(); 2280 StartLoc = R->getStartLoc(); 2281 EndLoc = R->getEndLoc(); 2282 return false; 2283 } 2284 2285 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2286 SMLoc &EndLoc) { 2287 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2288 } 2289 2290 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2291 SMLoc &StartLoc, 2292 SMLoc &EndLoc) { 2293 bool Result = 2294 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2295 bool PendingErrors = getParser().hasPendingError(); 2296 getParser().clearPendingErrors(); 2297 if (PendingErrors) 2298 return MatchOperand_ParseFail; 2299 if (Result) 2300 return MatchOperand_NoMatch; 2301 return MatchOperand_Success; 2302 } 2303 2304 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2305 RegisterKind RegKind, unsigned Reg1, 2306 SMLoc Loc) { 2307 switch (RegKind) { 2308 case IS_SPECIAL: 2309 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2310 Reg = AMDGPU::EXEC; 2311 RegWidth = 2; 2312 return true; 2313 } 2314 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2315 Reg = AMDGPU::FLAT_SCR; 2316 RegWidth = 2; 2317 return true; 2318 } 2319 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2320 Reg = AMDGPU::XNACK_MASK; 2321 RegWidth = 2; 2322 return true; 2323 } 2324 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2325 Reg = AMDGPU::VCC; 2326 RegWidth = 2; 2327 return true; 2328 } 2329 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2330 Reg = AMDGPU::TBA; 2331 RegWidth = 2; 2332 return true; 2333 } 2334 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2335 Reg = AMDGPU::TMA; 2336 RegWidth = 2; 2337 return true; 2338 } 2339 Error(Loc, "register does not fit in the list"); 2340 return false; 2341 case IS_VGPR: 2342 case IS_SGPR: 2343 case IS_AGPR: 2344 case IS_TTMP: 2345 if (Reg1 != Reg + RegWidth) { 2346 Error(Loc, "registers in a list must have consecutive indices"); 2347 return false; 2348 } 2349 RegWidth++; 2350 return true; 2351 default: 2352 llvm_unreachable("unexpected register kind"); 2353 } 2354 } 2355 2356 struct RegInfo { 2357 StringLiteral Name; 2358 RegisterKind Kind; 2359 }; 2360 2361 static constexpr RegInfo RegularRegisters[] = { 2362 {{"v"}, IS_VGPR}, 2363 {{"s"}, IS_SGPR}, 2364 {{"ttmp"}, IS_TTMP}, 2365 {{"acc"}, IS_AGPR}, 2366 {{"a"}, IS_AGPR}, 2367 }; 2368 2369 static bool isRegularReg(RegisterKind Kind) { 2370 return Kind == IS_VGPR || 2371 Kind == IS_SGPR || 2372 Kind == IS_TTMP || 2373 Kind == IS_AGPR; 2374 } 2375 2376 static const RegInfo* getRegularRegInfo(StringRef Str) { 2377 for (const RegInfo &Reg : RegularRegisters) 2378 if (Str.startswith(Reg.Name)) 2379 return &Reg; 2380 return nullptr; 2381 } 2382 2383 static bool getRegNum(StringRef Str, unsigned& Num) { 2384 return !Str.getAsInteger(10, Num); 2385 } 2386 2387 bool 2388 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2389 const AsmToken &NextToken) const { 2390 2391 // A list of consecutive registers: [s0,s1,s2,s3] 2392 if (Token.is(AsmToken::LBrac)) 2393 return true; 2394 2395 if (!Token.is(AsmToken::Identifier)) 2396 return false; 2397 2398 // A single register like s0 or a range of registers like s[0:1] 2399 2400 StringRef Str = Token.getString(); 2401 const RegInfo *Reg = getRegularRegInfo(Str); 2402 if (Reg) { 2403 StringRef RegName = Reg->Name; 2404 StringRef RegSuffix = Str.substr(RegName.size()); 2405 if (!RegSuffix.empty()) { 2406 unsigned Num; 2407 // A single register with an index: rXX 2408 if (getRegNum(RegSuffix, Num)) 2409 return true; 2410 } else { 2411 // A range of registers: r[XX:YY]. 2412 if (NextToken.is(AsmToken::LBrac)) 2413 return true; 2414 } 2415 } 2416 2417 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2418 } 2419 2420 bool 2421 AMDGPUAsmParser::isRegister() 2422 { 2423 return isRegister(getToken(), peekToken()); 2424 } 2425 2426 unsigned 2427 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2428 unsigned RegNum, 2429 unsigned RegWidth, 2430 SMLoc Loc) { 2431 2432 assert(isRegularReg(RegKind)); 2433 2434 unsigned AlignSize = 1; 2435 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2436 // SGPR and TTMP registers must be aligned. 2437 // Max required alignment is 4 dwords. 2438 AlignSize = std::min(RegWidth, 4u); 2439 } 2440 2441 if (RegNum % AlignSize != 0) { 2442 Error(Loc, "invalid register alignment"); 2443 return AMDGPU::NoRegister; 2444 } 2445 2446 unsigned RegIdx = RegNum / AlignSize; 2447 int RCID = getRegClass(RegKind, RegWidth); 2448 if (RCID == -1) { 2449 Error(Loc, "invalid or unsupported register size"); 2450 return AMDGPU::NoRegister; 2451 } 2452 2453 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2454 const MCRegisterClass RC = TRI->getRegClass(RCID); 2455 if (RegIdx >= RC.getNumRegs()) { 2456 Error(Loc, "register index is out of range"); 2457 return AMDGPU::NoRegister; 2458 } 2459 2460 return RC.getRegister(RegIdx); 2461 } 2462 2463 bool 2464 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2465 int64_t RegLo, RegHi; 2466 if (!skipToken(AsmToken::LBrac, "missing register index")) 2467 return false; 2468 2469 SMLoc FirstIdxLoc = getLoc(); 2470 SMLoc SecondIdxLoc; 2471 2472 if (!parseExpr(RegLo)) 2473 return false; 2474 2475 if (trySkipToken(AsmToken::Colon)) { 2476 SecondIdxLoc = getLoc(); 2477 if (!parseExpr(RegHi)) 2478 return false; 2479 } else { 2480 RegHi = RegLo; 2481 } 2482 2483 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2484 return false; 2485 2486 if (!isUInt<32>(RegLo)) { 2487 Error(FirstIdxLoc, "invalid register index"); 2488 return false; 2489 } 2490 2491 if (!isUInt<32>(RegHi)) { 2492 Error(SecondIdxLoc, "invalid register index"); 2493 return false; 2494 } 2495 2496 if (RegLo > RegHi) { 2497 Error(FirstIdxLoc, "first register index should not exceed second index"); 2498 return false; 2499 } 2500 2501 Num = static_cast<unsigned>(RegLo); 2502 Width = (RegHi - RegLo) + 1; 2503 return true; 2504 } 2505 2506 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2507 unsigned &RegNum, unsigned &RegWidth, 2508 SmallVectorImpl<AsmToken> &Tokens) { 2509 assert(isToken(AsmToken::Identifier)); 2510 unsigned Reg = getSpecialRegForName(getTokenStr()); 2511 if (Reg) { 2512 RegNum = 0; 2513 RegWidth = 1; 2514 RegKind = IS_SPECIAL; 2515 Tokens.push_back(getToken()); 2516 lex(); // skip register name 2517 } 2518 return Reg; 2519 } 2520 2521 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2522 unsigned &RegNum, unsigned &RegWidth, 2523 SmallVectorImpl<AsmToken> &Tokens) { 2524 assert(isToken(AsmToken::Identifier)); 2525 StringRef RegName = getTokenStr(); 2526 auto Loc = getLoc(); 2527 2528 const RegInfo *RI = getRegularRegInfo(RegName); 2529 if (!RI) { 2530 Error(Loc, "invalid register name"); 2531 return AMDGPU::NoRegister; 2532 } 2533 2534 Tokens.push_back(getToken()); 2535 lex(); // skip register name 2536 2537 RegKind = RI->Kind; 2538 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2539 if (!RegSuffix.empty()) { 2540 // Single 32-bit register: vXX. 2541 if (!getRegNum(RegSuffix, RegNum)) { 2542 Error(Loc, "invalid register index"); 2543 return AMDGPU::NoRegister; 2544 } 2545 RegWidth = 1; 2546 } else { 2547 // Range of registers: v[XX:YY]. ":YY" is optional. 2548 if (!ParseRegRange(RegNum, RegWidth)) 2549 return AMDGPU::NoRegister; 2550 } 2551 2552 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2553 } 2554 2555 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2556 unsigned &RegWidth, 2557 SmallVectorImpl<AsmToken> &Tokens) { 2558 unsigned Reg = AMDGPU::NoRegister; 2559 auto ListLoc = getLoc(); 2560 2561 if (!skipToken(AsmToken::LBrac, 2562 "expected a register or a list of registers")) { 2563 return AMDGPU::NoRegister; 2564 } 2565 2566 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2567 2568 auto Loc = getLoc(); 2569 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2570 return AMDGPU::NoRegister; 2571 if (RegWidth != 1) { 2572 Error(Loc, "expected a single 32-bit register"); 2573 return AMDGPU::NoRegister; 2574 } 2575 2576 for (; trySkipToken(AsmToken::Comma); ) { 2577 RegisterKind NextRegKind; 2578 unsigned NextReg, NextRegNum, NextRegWidth; 2579 Loc = getLoc(); 2580 2581 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2582 NextRegNum, NextRegWidth, 2583 Tokens)) { 2584 return AMDGPU::NoRegister; 2585 } 2586 if (NextRegWidth != 1) { 2587 Error(Loc, "expected a single 32-bit register"); 2588 return AMDGPU::NoRegister; 2589 } 2590 if (NextRegKind != RegKind) { 2591 Error(Loc, "registers in a list must be of the same kind"); 2592 return AMDGPU::NoRegister; 2593 } 2594 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2595 return AMDGPU::NoRegister; 2596 } 2597 2598 if (!skipToken(AsmToken::RBrac, 2599 "expected a comma or a closing square bracket")) { 2600 return AMDGPU::NoRegister; 2601 } 2602 2603 if (isRegularReg(RegKind)) 2604 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2605 2606 return Reg; 2607 } 2608 2609 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2610 unsigned &RegNum, unsigned &RegWidth, 2611 SmallVectorImpl<AsmToken> &Tokens) { 2612 auto Loc = getLoc(); 2613 Reg = AMDGPU::NoRegister; 2614 2615 if (isToken(AsmToken::Identifier)) { 2616 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2617 if (Reg == AMDGPU::NoRegister) 2618 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2619 } else { 2620 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2621 } 2622 2623 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2624 if (Reg == AMDGPU::NoRegister) { 2625 assert(Parser.hasPendingError()); 2626 return false; 2627 } 2628 2629 if (!subtargetHasRegister(*TRI, Reg)) { 2630 if (Reg == AMDGPU::SGPR_NULL) { 2631 Error(Loc, "'null' operand is not supported on this GPU"); 2632 } else { 2633 Error(Loc, "register not available on this GPU"); 2634 } 2635 return false; 2636 } 2637 2638 return true; 2639 } 2640 2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2642 unsigned &RegNum, unsigned &RegWidth, 2643 bool RestoreOnFailure /*=false*/) { 2644 Reg = AMDGPU::NoRegister; 2645 2646 SmallVector<AsmToken, 1> Tokens; 2647 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2648 if (RestoreOnFailure) { 2649 while (!Tokens.empty()) { 2650 getLexer().UnLex(Tokens.pop_back_val()); 2651 } 2652 } 2653 return true; 2654 } 2655 return false; 2656 } 2657 2658 Optional<StringRef> 2659 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2660 switch (RegKind) { 2661 case IS_VGPR: 2662 return StringRef(".amdgcn.next_free_vgpr"); 2663 case IS_SGPR: 2664 return StringRef(".amdgcn.next_free_sgpr"); 2665 default: 2666 return None; 2667 } 2668 } 2669 2670 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2671 auto SymbolName = getGprCountSymbolName(RegKind); 2672 assert(SymbolName && "initializing invalid register kind"); 2673 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2674 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2675 } 2676 2677 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2678 unsigned DwordRegIndex, 2679 unsigned RegWidth) { 2680 // Symbols are only defined for GCN targets 2681 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2682 return true; 2683 2684 auto SymbolName = getGprCountSymbolName(RegKind); 2685 if (!SymbolName) 2686 return true; 2687 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2688 2689 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2690 int64_t OldCount; 2691 2692 if (!Sym->isVariable()) 2693 return !Error(getLoc(), 2694 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2695 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2696 return !Error( 2697 getLoc(), 2698 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2699 2700 if (OldCount <= NewMax) 2701 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2702 2703 return true; 2704 } 2705 2706 std::unique_ptr<AMDGPUOperand> 2707 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2708 const auto &Tok = getToken(); 2709 SMLoc StartLoc = Tok.getLoc(); 2710 SMLoc EndLoc = Tok.getEndLoc(); 2711 RegisterKind RegKind; 2712 unsigned Reg, RegNum, RegWidth; 2713 2714 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2715 return nullptr; 2716 } 2717 if (isHsaAbiVersion3Or4(&getSTI())) { 2718 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2719 return nullptr; 2720 } else 2721 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2722 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2723 } 2724 2725 OperandMatchResultTy 2726 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2727 // TODO: add syntactic sugar for 1/(2*PI) 2728 2729 assert(!isRegister()); 2730 assert(!isModifier()); 2731 2732 const auto& Tok = getToken(); 2733 const auto& NextTok = peekToken(); 2734 bool IsReal = Tok.is(AsmToken::Real); 2735 SMLoc S = getLoc(); 2736 bool Negate = false; 2737 2738 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2739 lex(); 2740 IsReal = true; 2741 Negate = true; 2742 } 2743 2744 if (IsReal) { 2745 // Floating-point expressions are not supported. 2746 // Can only allow floating-point literals with an 2747 // optional sign. 2748 2749 StringRef Num = getTokenStr(); 2750 lex(); 2751 2752 APFloat RealVal(APFloat::IEEEdouble()); 2753 auto roundMode = APFloat::rmNearestTiesToEven; 2754 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2755 return MatchOperand_ParseFail; 2756 } 2757 if (Negate) 2758 RealVal.changeSign(); 2759 2760 Operands.push_back( 2761 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2762 AMDGPUOperand::ImmTyNone, true)); 2763 2764 return MatchOperand_Success; 2765 2766 } else { 2767 int64_t IntVal; 2768 const MCExpr *Expr; 2769 SMLoc S = getLoc(); 2770 2771 if (HasSP3AbsModifier) { 2772 // This is a workaround for handling expressions 2773 // as arguments of SP3 'abs' modifier, for example: 2774 // |1.0| 2775 // |-1| 2776 // |1+x| 2777 // This syntax is not compatible with syntax of standard 2778 // MC expressions (due to the trailing '|'). 2779 SMLoc EndLoc; 2780 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2781 return MatchOperand_ParseFail; 2782 } else { 2783 if (Parser.parseExpression(Expr)) 2784 return MatchOperand_ParseFail; 2785 } 2786 2787 if (Expr->evaluateAsAbsolute(IntVal)) { 2788 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2789 } else { 2790 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2791 } 2792 2793 return MatchOperand_Success; 2794 } 2795 2796 return MatchOperand_NoMatch; 2797 } 2798 2799 OperandMatchResultTy 2800 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2801 if (!isRegister()) 2802 return MatchOperand_NoMatch; 2803 2804 if (auto R = parseRegister()) { 2805 assert(R->isReg()); 2806 Operands.push_back(std::move(R)); 2807 return MatchOperand_Success; 2808 } 2809 return MatchOperand_ParseFail; 2810 } 2811 2812 OperandMatchResultTy 2813 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2814 auto res = parseReg(Operands); 2815 if (res != MatchOperand_NoMatch) { 2816 return res; 2817 } else if (isModifier()) { 2818 return MatchOperand_NoMatch; 2819 } else { 2820 return parseImm(Operands, HasSP3AbsMod); 2821 } 2822 } 2823 2824 bool 2825 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2826 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2827 const auto &str = Token.getString(); 2828 return str == "abs" || str == "neg" || str == "sext"; 2829 } 2830 return false; 2831 } 2832 2833 bool 2834 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2835 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2836 } 2837 2838 bool 2839 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2840 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2841 } 2842 2843 bool 2844 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2845 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2846 } 2847 2848 // Check if this is an operand modifier or an opcode modifier 2849 // which may look like an expression but it is not. We should 2850 // avoid parsing these modifiers as expressions. Currently 2851 // recognized sequences are: 2852 // |...| 2853 // abs(...) 2854 // neg(...) 2855 // sext(...) 2856 // -reg 2857 // -|...| 2858 // -abs(...) 2859 // name:... 2860 // Note that simple opcode modifiers like 'gds' may be parsed as 2861 // expressions; this is a special case. See getExpressionAsToken. 2862 // 2863 bool 2864 AMDGPUAsmParser::isModifier() { 2865 2866 AsmToken Tok = getToken(); 2867 AsmToken NextToken[2]; 2868 peekTokens(NextToken); 2869 2870 return isOperandModifier(Tok, NextToken[0]) || 2871 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2872 isOpcodeModifierWithVal(Tok, NextToken[0]); 2873 } 2874 2875 // Check if the current token is an SP3 'neg' modifier. 2876 // Currently this modifier is allowed in the following context: 2877 // 2878 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2879 // 2. Before an 'abs' modifier: -abs(...) 2880 // 3. Before an SP3 'abs' modifier: -|...| 2881 // 2882 // In all other cases "-" is handled as a part 2883 // of an expression that follows the sign. 2884 // 2885 // Note: When "-" is followed by an integer literal, 2886 // this is interpreted as integer negation rather 2887 // than a floating-point NEG modifier applied to N. 2888 // Beside being contr-intuitive, such use of floating-point 2889 // NEG modifier would have resulted in different meaning 2890 // of integer literals used with VOP1/2/C and VOP3, 2891 // for example: 2892 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2893 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2894 // Negative fp literals with preceding "-" are 2895 // handled likewise for unifomtity 2896 // 2897 bool 2898 AMDGPUAsmParser::parseSP3NegModifier() { 2899 2900 AsmToken NextToken[2]; 2901 peekTokens(NextToken); 2902 2903 if (isToken(AsmToken::Minus) && 2904 (isRegister(NextToken[0], NextToken[1]) || 2905 NextToken[0].is(AsmToken::Pipe) || 2906 isId(NextToken[0], "abs"))) { 2907 lex(); 2908 return true; 2909 } 2910 2911 return false; 2912 } 2913 2914 OperandMatchResultTy 2915 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2916 bool AllowImm) { 2917 bool Neg, SP3Neg; 2918 bool Abs, SP3Abs; 2919 SMLoc Loc; 2920 2921 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2922 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2923 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2924 return MatchOperand_ParseFail; 2925 } 2926 2927 SP3Neg = parseSP3NegModifier(); 2928 2929 Loc = getLoc(); 2930 Neg = trySkipId("neg"); 2931 if (Neg && SP3Neg) { 2932 Error(Loc, "expected register or immediate"); 2933 return MatchOperand_ParseFail; 2934 } 2935 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2936 return MatchOperand_ParseFail; 2937 2938 Abs = trySkipId("abs"); 2939 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2940 return MatchOperand_ParseFail; 2941 2942 Loc = getLoc(); 2943 SP3Abs = trySkipToken(AsmToken::Pipe); 2944 if (Abs && SP3Abs) { 2945 Error(Loc, "expected register or immediate"); 2946 return MatchOperand_ParseFail; 2947 } 2948 2949 OperandMatchResultTy Res; 2950 if (AllowImm) { 2951 Res = parseRegOrImm(Operands, SP3Abs); 2952 } else { 2953 Res = parseReg(Operands); 2954 } 2955 if (Res != MatchOperand_Success) { 2956 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2957 } 2958 2959 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2960 return MatchOperand_ParseFail; 2961 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2962 return MatchOperand_ParseFail; 2963 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2964 return MatchOperand_ParseFail; 2965 2966 AMDGPUOperand::Modifiers Mods; 2967 Mods.Abs = Abs || SP3Abs; 2968 Mods.Neg = Neg || SP3Neg; 2969 2970 if (Mods.hasFPModifiers()) { 2971 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2972 if (Op.isExpr()) { 2973 Error(Op.getStartLoc(), "expected an absolute expression"); 2974 return MatchOperand_ParseFail; 2975 } 2976 Op.setModifiers(Mods); 2977 } 2978 return MatchOperand_Success; 2979 } 2980 2981 OperandMatchResultTy 2982 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2983 bool AllowImm) { 2984 bool Sext = trySkipId("sext"); 2985 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2986 return MatchOperand_ParseFail; 2987 2988 OperandMatchResultTy Res; 2989 if (AllowImm) { 2990 Res = parseRegOrImm(Operands); 2991 } else { 2992 Res = parseReg(Operands); 2993 } 2994 if (Res != MatchOperand_Success) { 2995 return Sext? MatchOperand_ParseFail : Res; 2996 } 2997 2998 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2999 return MatchOperand_ParseFail; 3000 3001 AMDGPUOperand::Modifiers Mods; 3002 Mods.Sext = Sext; 3003 3004 if (Mods.hasIntModifiers()) { 3005 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3006 if (Op.isExpr()) { 3007 Error(Op.getStartLoc(), "expected an absolute expression"); 3008 return MatchOperand_ParseFail; 3009 } 3010 Op.setModifiers(Mods); 3011 } 3012 3013 return MatchOperand_Success; 3014 } 3015 3016 OperandMatchResultTy 3017 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3018 return parseRegOrImmWithFPInputMods(Operands, false); 3019 } 3020 3021 OperandMatchResultTy 3022 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3023 return parseRegOrImmWithIntInputMods(Operands, false); 3024 } 3025 3026 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3027 auto Loc = getLoc(); 3028 if (trySkipId("off")) { 3029 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3030 AMDGPUOperand::ImmTyOff, false)); 3031 return MatchOperand_Success; 3032 } 3033 3034 if (!isRegister()) 3035 return MatchOperand_NoMatch; 3036 3037 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3038 if (Reg) { 3039 Operands.push_back(std::move(Reg)); 3040 return MatchOperand_Success; 3041 } 3042 3043 return MatchOperand_ParseFail; 3044 3045 } 3046 3047 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3049 3050 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3051 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3052 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3053 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3054 return Match_InvalidOperand; 3055 3056 if ((TSFlags & SIInstrFlags::VOP3) && 3057 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3058 getForcedEncodingSize() != 64) 3059 return Match_PreferE32; 3060 3061 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3062 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3063 // v_mac_f32/16 allow only dst_sel == DWORD; 3064 auto OpNum = 3065 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3066 const auto &Op = Inst.getOperand(OpNum); 3067 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3068 return Match_InvalidOperand; 3069 } 3070 } 3071 3072 return Match_Success; 3073 } 3074 3075 static ArrayRef<unsigned> getAllVariants() { 3076 static const unsigned Variants[] = { 3077 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3078 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3079 }; 3080 3081 return makeArrayRef(Variants); 3082 } 3083 3084 // What asm variants we should check 3085 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3086 if (getForcedEncodingSize() == 32) { 3087 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3088 return makeArrayRef(Variants); 3089 } 3090 3091 if (isForcedVOP3()) { 3092 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3093 return makeArrayRef(Variants); 3094 } 3095 3096 if (isForcedSDWA()) { 3097 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3098 AMDGPUAsmVariants::SDWA9}; 3099 return makeArrayRef(Variants); 3100 } 3101 3102 if (isForcedDPP()) { 3103 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3104 return makeArrayRef(Variants); 3105 } 3106 3107 return getAllVariants(); 3108 } 3109 3110 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3111 if (getForcedEncodingSize() == 32) 3112 return "e32"; 3113 3114 if (isForcedVOP3()) 3115 return "e64"; 3116 3117 if (isForcedSDWA()) 3118 return "sdwa"; 3119 3120 if (isForcedDPP()) 3121 return "dpp"; 3122 3123 return ""; 3124 } 3125 3126 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3127 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3128 const unsigned Num = Desc.getNumImplicitUses(); 3129 for (unsigned i = 0; i < Num; ++i) { 3130 unsigned Reg = Desc.ImplicitUses[i]; 3131 switch (Reg) { 3132 case AMDGPU::FLAT_SCR: 3133 case AMDGPU::VCC: 3134 case AMDGPU::VCC_LO: 3135 case AMDGPU::VCC_HI: 3136 case AMDGPU::M0: 3137 return Reg; 3138 default: 3139 break; 3140 } 3141 } 3142 return AMDGPU::NoRegister; 3143 } 3144 3145 // NB: This code is correct only when used to check constant 3146 // bus limitations because GFX7 support no f16 inline constants. 3147 // Note that there are no cases when a GFX7 opcode violates 3148 // constant bus limitations due to the use of an f16 constant. 3149 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3150 unsigned OpIdx) const { 3151 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3152 3153 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3154 return false; 3155 } 3156 3157 const MCOperand &MO = Inst.getOperand(OpIdx); 3158 3159 int64_t Val = MO.getImm(); 3160 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3161 3162 switch (OpSize) { // expected operand size 3163 case 8: 3164 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3165 case 4: 3166 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3167 case 2: { 3168 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3169 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3170 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3171 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3172 return AMDGPU::isInlinableIntLiteral(Val); 3173 3174 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3175 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3176 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3177 return AMDGPU::isInlinableIntLiteralV216(Val); 3178 3179 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3180 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3181 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3182 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3183 3184 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3185 } 3186 default: 3187 llvm_unreachable("invalid operand size"); 3188 } 3189 } 3190 3191 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3192 if (!isGFX10Plus()) 3193 return 1; 3194 3195 switch (Opcode) { 3196 // 64-bit shift instructions can use only one scalar value input 3197 case AMDGPU::V_LSHLREV_B64_e64: 3198 case AMDGPU::V_LSHLREV_B64_gfx10: 3199 case AMDGPU::V_LSHRREV_B64_e64: 3200 case AMDGPU::V_LSHRREV_B64_gfx10: 3201 case AMDGPU::V_ASHRREV_I64_e64: 3202 case AMDGPU::V_ASHRREV_I64_gfx10: 3203 case AMDGPU::V_LSHL_B64_e64: 3204 case AMDGPU::V_LSHR_B64_e64: 3205 case AMDGPU::V_ASHR_I64_e64: 3206 return 1; 3207 default: 3208 return 2; 3209 } 3210 } 3211 3212 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3213 const MCOperand &MO = Inst.getOperand(OpIdx); 3214 if (MO.isImm()) { 3215 return !isInlineConstant(Inst, OpIdx); 3216 } else if (MO.isReg()) { 3217 auto Reg = MO.getReg(); 3218 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3219 auto PReg = mc2PseudoReg(Reg); 3220 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3221 } else { 3222 return true; 3223 } 3224 } 3225 3226 bool 3227 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3228 const OperandVector &Operands) { 3229 const unsigned Opcode = Inst.getOpcode(); 3230 const MCInstrDesc &Desc = MII.get(Opcode); 3231 unsigned LastSGPR = AMDGPU::NoRegister; 3232 unsigned ConstantBusUseCount = 0; 3233 unsigned NumLiterals = 0; 3234 unsigned LiteralSize; 3235 3236 if (Desc.TSFlags & 3237 (SIInstrFlags::VOPC | 3238 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3239 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3240 SIInstrFlags::SDWA)) { 3241 // Check special imm operands (used by madmk, etc) 3242 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3243 ++ConstantBusUseCount; 3244 } 3245 3246 SmallDenseSet<unsigned> SGPRsUsed; 3247 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3248 if (SGPRUsed != AMDGPU::NoRegister) { 3249 SGPRsUsed.insert(SGPRUsed); 3250 ++ConstantBusUseCount; 3251 } 3252 3253 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3254 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3255 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3256 3257 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3258 3259 for (int OpIdx : OpIndices) { 3260 if (OpIdx == -1) break; 3261 3262 const MCOperand &MO = Inst.getOperand(OpIdx); 3263 if (usesConstantBus(Inst, OpIdx)) { 3264 if (MO.isReg()) { 3265 LastSGPR = mc2PseudoReg(MO.getReg()); 3266 // Pairs of registers with a partial intersections like these 3267 // s0, s[0:1] 3268 // flat_scratch_lo, flat_scratch 3269 // flat_scratch_lo, flat_scratch_hi 3270 // are theoretically valid but they are disabled anyway. 3271 // Note that this code mimics SIInstrInfo::verifyInstruction 3272 if (!SGPRsUsed.count(LastSGPR)) { 3273 SGPRsUsed.insert(LastSGPR); 3274 ++ConstantBusUseCount; 3275 } 3276 } else { // Expression or a literal 3277 3278 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3279 continue; // special operand like VINTERP attr_chan 3280 3281 // An instruction may use only one literal. 3282 // This has been validated on the previous step. 3283 // See validateVOP3Literal. 3284 // This literal may be used as more than one operand. 3285 // If all these operands are of the same size, 3286 // this literal counts as one scalar value. 3287 // Otherwise it counts as 2 scalar values. 3288 // See "GFX10 Shader Programming", section 3.6.2.3. 3289 3290 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3291 if (Size < 4) Size = 4; 3292 3293 if (NumLiterals == 0) { 3294 NumLiterals = 1; 3295 LiteralSize = Size; 3296 } else if (LiteralSize != Size) { 3297 NumLiterals = 2; 3298 } 3299 } 3300 } 3301 } 3302 } 3303 ConstantBusUseCount += NumLiterals; 3304 3305 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3306 return true; 3307 3308 SMLoc LitLoc = getLitLoc(Operands); 3309 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3310 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3311 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3312 return false; 3313 } 3314 3315 bool 3316 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3317 const OperandVector &Operands) { 3318 const unsigned Opcode = Inst.getOpcode(); 3319 const MCInstrDesc &Desc = MII.get(Opcode); 3320 3321 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3322 if (DstIdx == -1 || 3323 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3324 return true; 3325 } 3326 3327 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3328 3329 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3330 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3331 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3332 3333 assert(DstIdx != -1); 3334 const MCOperand &Dst = Inst.getOperand(DstIdx); 3335 assert(Dst.isReg()); 3336 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3337 3338 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3339 3340 for (int SrcIdx : SrcIndices) { 3341 if (SrcIdx == -1) break; 3342 const MCOperand &Src = Inst.getOperand(SrcIdx); 3343 if (Src.isReg()) { 3344 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3345 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3346 Error(getRegLoc(SrcReg, Operands), 3347 "destination must be different than all sources"); 3348 return false; 3349 } 3350 } 3351 } 3352 3353 return true; 3354 } 3355 3356 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3357 3358 const unsigned Opc = Inst.getOpcode(); 3359 const MCInstrDesc &Desc = MII.get(Opc); 3360 3361 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3362 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3363 assert(ClampIdx != -1); 3364 return Inst.getOperand(ClampIdx).getImm() == 0; 3365 } 3366 3367 return true; 3368 } 3369 3370 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3371 3372 const unsigned Opc = Inst.getOpcode(); 3373 const MCInstrDesc &Desc = MII.get(Opc); 3374 3375 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3376 return true; 3377 3378 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3379 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3380 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3381 3382 assert(VDataIdx != -1); 3383 3384 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3385 return true; 3386 3387 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3388 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3389 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3390 if (DMask == 0) 3391 DMask = 1; 3392 3393 unsigned DataSize = 3394 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3395 if (hasPackedD16()) { 3396 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3397 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3398 DataSize = (DataSize + 1) / 2; 3399 } 3400 3401 return (VDataSize / 4) == DataSize + TFESize; 3402 } 3403 3404 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3405 const unsigned Opc = Inst.getOpcode(); 3406 const MCInstrDesc &Desc = MII.get(Opc); 3407 3408 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3409 return true; 3410 3411 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3412 3413 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3414 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3415 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3416 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3417 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3418 3419 assert(VAddr0Idx != -1); 3420 assert(SrsrcIdx != -1); 3421 assert(SrsrcIdx > VAddr0Idx); 3422 3423 if (DimIdx == -1) 3424 return true; // intersect_ray 3425 3426 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3427 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3428 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3429 unsigned VAddrSize = 3430 IsNSA ? SrsrcIdx - VAddr0Idx 3431 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3432 3433 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3434 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3435 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3436 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3437 if (!IsNSA) { 3438 if (AddrSize > 8) 3439 AddrSize = 16; 3440 else if (AddrSize > 4) 3441 AddrSize = 8; 3442 } 3443 3444 return VAddrSize == AddrSize; 3445 } 3446 3447 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3448 3449 const unsigned Opc = Inst.getOpcode(); 3450 const MCInstrDesc &Desc = MII.get(Opc); 3451 3452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3453 return true; 3454 if (!Desc.mayLoad() || !Desc.mayStore()) 3455 return true; // Not atomic 3456 3457 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3458 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3459 3460 // This is an incomplete check because image_atomic_cmpswap 3461 // may only use 0x3 and 0xf while other atomic operations 3462 // may use 0x1 and 0x3. However these limitations are 3463 // verified when we check that dmask matches dst size. 3464 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3465 } 3466 3467 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3468 3469 const unsigned Opc = Inst.getOpcode(); 3470 const MCInstrDesc &Desc = MII.get(Opc); 3471 3472 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3473 return true; 3474 3475 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3476 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3477 3478 // GATHER4 instructions use dmask in a different fashion compared to 3479 // other MIMG instructions. The only useful DMASK values are 3480 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3481 // (red,red,red,red) etc.) The ISA document doesn't mention 3482 // this. 3483 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3484 } 3485 3486 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3487 const unsigned Opc = Inst.getOpcode(); 3488 const MCInstrDesc &Desc = MII.get(Opc); 3489 3490 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3491 return true; 3492 3493 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3494 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3495 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3496 3497 if (!BaseOpcode->MSAA) 3498 return true; 3499 3500 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3501 assert(DimIdx != -1); 3502 3503 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3504 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3505 3506 return DimInfo->MSAA; 3507 } 3508 3509 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3510 { 3511 switch (Opcode) { 3512 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3513 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3514 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3515 return true; 3516 default: 3517 return false; 3518 } 3519 } 3520 3521 // movrels* opcodes should only allow VGPRS as src0. 3522 // This is specified in .td description for vop1/vop3, 3523 // but sdwa is handled differently. See isSDWAOperand. 3524 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3525 const OperandVector &Operands) { 3526 3527 const unsigned Opc = Inst.getOpcode(); 3528 const MCInstrDesc &Desc = MII.get(Opc); 3529 3530 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3531 return true; 3532 3533 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3534 assert(Src0Idx != -1); 3535 3536 SMLoc ErrLoc; 3537 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3538 if (Src0.isReg()) { 3539 auto Reg = mc2PseudoReg(Src0.getReg()); 3540 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3541 if (!isSGPR(Reg, TRI)) 3542 return true; 3543 ErrLoc = getRegLoc(Reg, Operands); 3544 } else { 3545 ErrLoc = getConstLoc(Operands); 3546 } 3547 3548 Error(ErrLoc, "source operand must be a VGPR"); 3549 return false; 3550 } 3551 3552 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3553 const OperandVector &Operands) { 3554 3555 const unsigned Opc = Inst.getOpcode(); 3556 3557 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3558 return true; 3559 3560 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3561 assert(Src0Idx != -1); 3562 3563 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3564 if (!Src0.isReg()) 3565 return true; 3566 3567 auto Reg = mc2PseudoReg(Src0.getReg()); 3568 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3569 if (isSGPR(Reg, TRI)) { 3570 Error(getRegLoc(Reg, Operands), 3571 "source operand must be either a VGPR or an inline constant"); 3572 return false; 3573 } 3574 3575 return true; 3576 } 3577 3578 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3579 switch (Inst.getOpcode()) { 3580 default: 3581 return true; 3582 case V_DIV_SCALE_F32_gfx6_gfx7: 3583 case V_DIV_SCALE_F32_vi: 3584 case V_DIV_SCALE_F32_gfx10: 3585 case V_DIV_SCALE_F64_gfx6_gfx7: 3586 case V_DIV_SCALE_F64_vi: 3587 case V_DIV_SCALE_F64_gfx10: 3588 break; 3589 } 3590 3591 // TODO: Check that src0 = src1 or src2. 3592 3593 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3594 AMDGPU::OpName::src2_modifiers, 3595 AMDGPU::OpName::src2_modifiers}) { 3596 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3597 .getImm() & 3598 SISrcMods::ABS) { 3599 return false; 3600 } 3601 } 3602 3603 return true; 3604 } 3605 3606 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3607 3608 const unsigned Opc = Inst.getOpcode(); 3609 const MCInstrDesc &Desc = MII.get(Opc); 3610 3611 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3612 return true; 3613 3614 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3615 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3616 if (isCI() || isSI()) 3617 return false; 3618 } 3619 3620 return true; 3621 } 3622 3623 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3624 const unsigned Opc = Inst.getOpcode(); 3625 const MCInstrDesc &Desc = MII.get(Opc); 3626 3627 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3628 return true; 3629 3630 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3631 if (DimIdx < 0) 3632 return true; 3633 3634 long Imm = Inst.getOperand(DimIdx).getImm(); 3635 if (Imm < 0 || Imm >= 8) 3636 return false; 3637 3638 return true; 3639 } 3640 3641 static bool IsRevOpcode(const unsigned Opcode) 3642 { 3643 switch (Opcode) { 3644 case AMDGPU::V_SUBREV_F32_e32: 3645 case AMDGPU::V_SUBREV_F32_e64: 3646 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3647 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3648 case AMDGPU::V_SUBREV_F32_e32_vi: 3649 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3650 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3651 case AMDGPU::V_SUBREV_F32_e64_vi: 3652 3653 case AMDGPU::V_SUBREV_CO_U32_e32: 3654 case AMDGPU::V_SUBREV_CO_U32_e64: 3655 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3656 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3657 3658 case AMDGPU::V_SUBBREV_U32_e32: 3659 case AMDGPU::V_SUBBREV_U32_e64: 3660 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3661 case AMDGPU::V_SUBBREV_U32_e32_vi: 3662 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3663 case AMDGPU::V_SUBBREV_U32_e64_vi: 3664 3665 case AMDGPU::V_SUBREV_U32_e32: 3666 case AMDGPU::V_SUBREV_U32_e64: 3667 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3668 case AMDGPU::V_SUBREV_U32_e32_vi: 3669 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3670 case AMDGPU::V_SUBREV_U32_e64_vi: 3671 3672 case AMDGPU::V_SUBREV_F16_e32: 3673 case AMDGPU::V_SUBREV_F16_e64: 3674 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3675 case AMDGPU::V_SUBREV_F16_e32_vi: 3676 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3677 case AMDGPU::V_SUBREV_F16_e64_vi: 3678 3679 case AMDGPU::V_SUBREV_U16_e32: 3680 case AMDGPU::V_SUBREV_U16_e64: 3681 case AMDGPU::V_SUBREV_U16_e32_vi: 3682 case AMDGPU::V_SUBREV_U16_e64_vi: 3683 3684 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3685 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3686 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3687 3688 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3689 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3690 3691 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3692 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3693 3694 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3695 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3696 3697 case AMDGPU::V_LSHRREV_B32_e32: 3698 case AMDGPU::V_LSHRREV_B32_e64: 3699 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3700 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3701 case AMDGPU::V_LSHRREV_B32_e32_vi: 3702 case AMDGPU::V_LSHRREV_B32_e64_vi: 3703 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3704 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3705 3706 case AMDGPU::V_ASHRREV_I32_e32: 3707 case AMDGPU::V_ASHRREV_I32_e64: 3708 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3709 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3710 case AMDGPU::V_ASHRREV_I32_e32_vi: 3711 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3712 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3713 case AMDGPU::V_ASHRREV_I32_e64_vi: 3714 3715 case AMDGPU::V_LSHLREV_B32_e32: 3716 case AMDGPU::V_LSHLREV_B32_e64: 3717 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3718 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3719 case AMDGPU::V_LSHLREV_B32_e32_vi: 3720 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3721 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3722 case AMDGPU::V_LSHLREV_B32_e64_vi: 3723 3724 case AMDGPU::V_LSHLREV_B16_e32: 3725 case AMDGPU::V_LSHLREV_B16_e64: 3726 case AMDGPU::V_LSHLREV_B16_e32_vi: 3727 case AMDGPU::V_LSHLREV_B16_e64_vi: 3728 case AMDGPU::V_LSHLREV_B16_gfx10: 3729 3730 case AMDGPU::V_LSHRREV_B16_e32: 3731 case AMDGPU::V_LSHRREV_B16_e64: 3732 case AMDGPU::V_LSHRREV_B16_e32_vi: 3733 case AMDGPU::V_LSHRREV_B16_e64_vi: 3734 case AMDGPU::V_LSHRREV_B16_gfx10: 3735 3736 case AMDGPU::V_ASHRREV_I16_e32: 3737 case AMDGPU::V_ASHRREV_I16_e64: 3738 case AMDGPU::V_ASHRREV_I16_e32_vi: 3739 case AMDGPU::V_ASHRREV_I16_e64_vi: 3740 case AMDGPU::V_ASHRREV_I16_gfx10: 3741 3742 case AMDGPU::V_LSHLREV_B64_e64: 3743 case AMDGPU::V_LSHLREV_B64_gfx10: 3744 case AMDGPU::V_LSHLREV_B64_vi: 3745 3746 case AMDGPU::V_LSHRREV_B64_e64: 3747 case AMDGPU::V_LSHRREV_B64_gfx10: 3748 case AMDGPU::V_LSHRREV_B64_vi: 3749 3750 case AMDGPU::V_ASHRREV_I64_e64: 3751 case AMDGPU::V_ASHRREV_I64_gfx10: 3752 case AMDGPU::V_ASHRREV_I64_vi: 3753 3754 case AMDGPU::V_PK_LSHLREV_B16: 3755 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3756 case AMDGPU::V_PK_LSHLREV_B16_vi: 3757 3758 case AMDGPU::V_PK_LSHRREV_B16: 3759 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3760 case AMDGPU::V_PK_LSHRREV_B16_vi: 3761 case AMDGPU::V_PK_ASHRREV_I16: 3762 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3763 case AMDGPU::V_PK_ASHRREV_I16_vi: 3764 return true; 3765 default: 3766 return false; 3767 } 3768 } 3769 3770 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3771 3772 using namespace SIInstrFlags; 3773 const unsigned Opcode = Inst.getOpcode(); 3774 const MCInstrDesc &Desc = MII.get(Opcode); 3775 3776 // lds_direct register is defined so that it can be used 3777 // with 9-bit operands only. Ignore encodings which do not accept these. 3778 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3779 if ((Desc.TSFlags & Enc) == 0) 3780 return None; 3781 3782 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3783 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3784 if (SrcIdx == -1) 3785 break; 3786 const auto &Src = Inst.getOperand(SrcIdx); 3787 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3788 3789 if (isGFX90A()) 3790 return StringRef("lds_direct is not supported on this GPU"); 3791 3792 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3793 return StringRef("lds_direct cannot be used with this instruction"); 3794 3795 if (SrcName != OpName::src0) 3796 return StringRef("lds_direct may be used as src0 only"); 3797 } 3798 } 3799 3800 return None; 3801 } 3802 3803 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3804 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3805 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3806 if (Op.isFlatOffset()) 3807 return Op.getStartLoc(); 3808 } 3809 return getLoc(); 3810 } 3811 3812 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3813 const OperandVector &Operands) { 3814 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3815 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3816 return true; 3817 3818 auto Opcode = Inst.getOpcode(); 3819 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3820 assert(OpNum != -1); 3821 3822 const auto &Op = Inst.getOperand(OpNum); 3823 if (!hasFlatOffsets() && Op.getImm() != 0) { 3824 Error(getFlatOffsetLoc(Operands), 3825 "flat offset modifier is not supported on this GPU"); 3826 return false; 3827 } 3828 3829 // For FLAT segment the offset must be positive; 3830 // MSB is ignored and forced to zero. 3831 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3832 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3833 if (!isIntN(OffsetSize, Op.getImm())) { 3834 Error(getFlatOffsetLoc(Operands), 3835 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3836 return false; 3837 } 3838 } else { 3839 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3840 if (!isUIntN(OffsetSize, Op.getImm())) { 3841 Error(getFlatOffsetLoc(Operands), 3842 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3843 return false; 3844 } 3845 } 3846 3847 return true; 3848 } 3849 3850 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3851 // Start with second operand because SMEM Offset cannot be dst or src0. 3852 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3853 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3854 if (Op.isSMEMOffset()) 3855 return Op.getStartLoc(); 3856 } 3857 return getLoc(); 3858 } 3859 3860 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3861 const OperandVector &Operands) { 3862 if (isCI() || isSI()) 3863 return true; 3864 3865 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3866 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3867 return true; 3868 3869 auto Opcode = Inst.getOpcode(); 3870 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3871 if (OpNum == -1) 3872 return true; 3873 3874 const auto &Op = Inst.getOperand(OpNum); 3875 if (!Op.isImm()) 3876 return true; 3877 3878 uint64_t Offset = Op.getImm(); 3879 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3880 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3881 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3882 return true; 3883 3884 Error(getSMEMOffsetLoc(Operands), 3885 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3886 "expected a 21-bit signed offset"); 3887 3888 return false; 3889 } 3890 3891 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3892 unsigned Opcode = Inst.getOpcode(); 3893 const MCInstrDesc &Desc = MII.get(Opcode); 3894 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3895 return true; 3896 3897 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3898 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3899 3900 const int OpIndices[] = { Src0Idx, Src1Idx }; 3901 3902 unsigned NumExprs = 0; 3903 unsigned NumLiterals = 0; 3904 uint32_t LiteralValue; 3905 3906 for (int OpIdx : OpIndices) { 3907 if (OpIdx == -1) break; 3908 3909 const MCOperand &MO = Inst.getOperand(OpIdx); 3910 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3911 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3912 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3913 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3914 if (NumLiterals == 0 || LiteralValue != Value) { 3915 LiteralValue = Value; 3916 ++NumLiterals; 3917 } 3918 } else if (MO.isExpr()) { 3919 ++NumExprs; 3920 } 3921 } 3922 } 3923 3924 return NumLiterals + NumExprs <= 1; 3925 } 3926 3927 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3928 const unsigned Opc = Inst.getOpcode(); 3929 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3930 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3931 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3932 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3933 3934 if (OpSel & ~3) 3935 return false; 3936 } 3937 return true; 3938 } 3939 3940 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3941 const OperandVector &Operands) { 3942 const unsigned Opc = Inst.getOpcode(); 3943 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3944 if (DppCtrlIdx < 0) 3945 return true; 3946 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3947 3948 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3949 // DPP64 is supported for row_newbcast only. 3950 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3951 if (Src0Idx >= 0 && 3952 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3953 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3954 Error(S, "64 bit dpp only supports row_newbcast"); 3955 return false; 3956 } 3957 } 3958 3959 return true; 3960 } 3961 3962 // Check if VCC register matches wavefront size 3963 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3964 auto FB = getFeatureBits(); 3965 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3966 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3967 } 3968 3969 // VOP3 literal is only allowed in GFX10+ and only one can be used 3970 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3971 const OperandVector &Operands) { 3972 unsigned Opcode = Inst.getOpcode(); 3973 const MCInstrDesc &Desc = MII.get(Opcode); 3974 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3975 return true; 3976 3977 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3978 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3979 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3980 3981 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3982 3983 unsigned NumExprs = 0; 3984 unsigned NumLiterals = 0; 3985 uint32_t LiteralValue; 3986 3987 for (int OpIdx : OpIndices) { 3988 if (OpIdx == -1) break; 3989 3990 const MCOperand &MO = Inst.getOperand(OpIdx); 3991 if (!MO.isImm() && !MO.isExpr()) 3992 continue; 3993 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3994 continue; 3995 3996 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3997 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3998 Error(getConstLoc(Operands), 3999 "inline constants are not allowed for this operand"); 4000 return false; 4001 } 4002 4003 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4004 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4005 if (NumLiterals == 0 || LiteralValue != Value) { 4006 LiteralValue = Value; 4007 ++NumLiterals; 4008 } 4009 } else if (MO.isExpr()) { 4010 ++NumExprs; 4011 } 4012 } 4013 NumLiterals += NumExprs; 4014 4015 if (!NumLiterals) 4016 return true; 4017 4018 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4019 Error(getLitLoc(Operands), "literal operands are not supported"); 4020 return false; 4021 } 4022 4023 if (NumLiterals > 1) { 4024 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4025 return false; 4026 } 4027 4028 return true; 4029 } 4030 4031 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4032 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4033 const MCRegisterInfo *MRI) { 4034 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4035 if (OpIdx < 0) 4036 return -1; 4037 4038 const MCOperand &Op = Inst.getOperand(OpIdx); 4039 if (!Op.isReg()) 4040 return -1; 4041 4042 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4043 auto Reg = Sub ? Sub : Op.getReg(); 4044 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4045 return AGRP32.contains(Reg) ? 1 : 0; 4046 } 4047 4048 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4049 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4050 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4051 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4052 SIInstrFlags::DS)) == 0) 4053 return true; 4054 4055 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4056 : AMDGPU::OpName::vdata; 4057 4058 const MCRegisterInfo *MRI = getMRI(); 4059 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4060 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4061 4062 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4063 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4064 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4065 return false; 4066 } 4067 4068 auto FB = getFeatureBits(); 4069 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4070 if (DataAreg < 0 || DstAreg < 0) 4071 return true; 4072 return DstAreg == DataAreg; 4073 } 4074 4075 return DstAreg < 1 && DataAreg < 1; 4076 } 4077 4078 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4079 auto FB = getFeatureBits(); 4080 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4081 return true; 4082 4083 const MCRegisterInfo *MRI = getMRI(); 4084 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4085 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4086 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4087 const MCOperand &Op = Inst.getOperand(I); 4088 if (!Op.isReg()) 4089 continue; 4090 4091 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4092 if (!Sub) 4093 continue; 4094 4095 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4096 return false; 4097 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4098 return false; 4099 } 4100 4101 return true; 4102 } 4103 4104 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4105 const OperandVector &Operands, 4106 const SMLoc &IDLoc) { 4107 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4108 AMDGPU::OpName::cpol); 4109 if (CPolPos == -1) 4110 return true; 4111 4112 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4113 4114 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4115 if ((TSFlags & (SIInstrFlags::SMRD)) && 4116 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4117 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4118 return false; 4119 } 4120 4121 if (isGFX90A() && (CPol & CPol::SCC)) { 4122 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4123 StringRef CStr(S.getPointer()); 4124 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4125 Error(S, "scc is not supported on this GPU"); 4126 return false; 4127 } 4128 4129 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4130 return true; 4131 4132 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4133 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4134 Error(IDLoc, "instruction must use glc"); 4135 return false; 4136 } 4137 } else { 4138 if (CPol & CPol::GLC) { 4139 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4140 StringRef CStr(S.getPointer()); 4141 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4142 Error(S, "instruction must not use glc"); 4143 return false; 4144 } 4145 } 4146 4147 return true; 4148 } 4149 4150 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4151 const SMLoc &IDLoc, 4152 const OperandVector &Operands) { 4153 if (auto ErrMsg = validateLdsDirect(Inst)) { 4154 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4155 return false; 4156 } 4157 if (!validateSOPLiteral(Inst)) { 4158 Error(getLitLoc(Operands), 4159 "only one literal operand is allowed"); 4160 return false; 4161 } 4162 if (!validateVOP3Literal(Inst, Operands)) { 4163 return false; 4164 } 4165 if (!validateConstantBusLimitations(Inst, Operands)) { 4166 return false; 4167 } 4168 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4169 return false; 4170 } 4171 if (!validateIntClampSupported(Inst)) { 4172 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4173 "integer clamping is not supported on this GPU"); 4174 return false; 4175 } 4176 if (!validateOpSel(Inst)) { 4177 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4178 "invalid op_sel operand"); 4179 return false; 4180 } 4181 if (!validateDPP(Inst, Operands)) { 4182 return false; 4183 } 4184 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4185 if (!validateMIMGD16(Inst)) { 4186 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4187 "d16 modifier is not supported on this GPU"); 4188 return false; 4189 } 4190 if (!validateMIMGDim(Inst)) { 4191 Error(IDLoc, "dim modifier is required on this GPU"); 4192 return false; 4193 } 4194 if (!validateMIMGMSAA(Inst)) { 4195 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4196 "invalid dim; must be MSAA type"); 4197 return false; 4198 } 4199 if (!validateMIMGDataSize(Inst)) { 4200 Error(IDLoc, 4201 "image data size does not match dmask and tfe"); 4202 return false; 4203 } 4204 if (!validateMIMGAddrSize(Inst)) { 4205 Error(IDLoc, 4206 "image address size does not match dim and a16"); 4207 return false; 4208 } 4209 if (!validateMIMGAtomicDMask(Inst)) { 4210 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4211 "invalid atomic image dmask"); 4212 return false; 4213 } 4214 if (!validateMIMGGatherDMask(Inst)) { 4215 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4216 "invalid image_gather dmask: only one bit must be set"); 4217 return false; 4218 } 4219 if (!validateMovrels(Inst, Operands)) { 4220 return false; 4221 } 4222 if (!validateFlatOffset(Inst, Operands)) { 4223 return false; 4224 } 4225 if (!validateSMEMOffset(Inst, Operands)) { 4226 return false; 4227 } 4228 if (!validateMAIAccWrite(Inst, Operands)) { 4229 return false; 4230 } 4231 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4232 return false; 4233 } 4234 4235 if (!validateAGPRLdSt(Inst)) { 4236 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4237 ? "invalid register class: data and dst should be all VGPR or AGPR" 4238 : "invalid register class: agpr loads and stores not supported on this GPU" 4239 ); 4240 return false; 4241 } 4242 if (!validateVGPRAlign(Inst)) { 4243 Error(IDLoc, 4244 "invalid register class: vgpr tuples must be 64 bit aligned"); 4245 return false; 4246 } 4247 4248 if (!validateDivScale(Inst)) { 4249 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4250 return false; 4251 } 4252 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4253 return false; 4254 } 4255 4256 return true; 4257 } 4258 4259 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4260 const FeatureBitset &FBS, 4261 unsigned VariantID = 0); 4262 4263 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4264 const FeatureBitset &AvailableFeatures, 4265 unsigned VariantID); 4266 4267 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4268 const FeatureBitset &FBS) { 4269 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4270 } 4271 4272 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4273 const FeatureBitset &FBS, 4274 ArrayRef<unsigned> Variants) { 4275 for (auto Variant : Variants) { 4276 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4277 return true; 4278 } 4279 4280 return false; 4281 } 4282 4283 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4284 const SMLoc &IDLoc) { 4285 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4286 4287 // Check if requested instruction variant is supported. 4288 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4289 return false; 4290 4291 // This instruction is not supported. 4292 // Clear any other pending errors because they are no longer relevant. 4293 getParser().clearPendingErrors(); 4294 4295 // Requested instruction variant is not supported. 4296 // Check if any other variants are supported. 4297 StringRef VariantName = getMatchedVariantName(); 4298 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4299 return Error(IDLoc, 4300 Twine(VariantName, 4301 " variant of this instruction is not supported")); 4302 } 4303 4304 // Finally check if this instruction is supported on any other GPU. 4305 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4306 return Error(IDLoc, "instruction not supported on this GPU"); 4307 } 4308 4309 // Instruction not supported on any GPU. Probably a typo. 4310 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4311 return Error(IDLoc, "invalid instruction" + Suggestion); 4312 } 4313 4314 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4315 OperandVector &Operands, 4316 MCStreamer &Out, 4317 uint64_t &ErrorInfo, 4318 bool MatchingInlineAsm) { 4319 MCInst Inst; 4320 unsigned Result = Match_Success; 4321 for (auto Variant : getMatchedVariants()) { 4322 uint64_t EI; 4323 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4324 Variant); 4325 // We order match statuses from least to most specific. We use most specific 4326 // status as resulting 4327 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4328 if ((R == Match_Success) || 4329 (R == Match_PreferE32) || 4330 (R == Match_MissingFeature && Result != Match_PreferE32) || 4331 (R == Match_InvalidOperand && Result != Match_MissingFeature 4332 && Result != Match_PreferE32) || 4333 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4334 && Result != Match_MissingFeature 4335 && Result != Match_PreferE32)) { 4336 Result = R; 4337 ErrorInfo = EI; 4338 } 4339 if (R == Match_Success) 4340 break; 4341 } 4342 4343 if (Result == Match_Success) { 4344 if (!validateInstruction(Inst, IDLoc, Operands)) { 4345 return true; 4346 } 4347 Inst.setLoc(IDLoc); 4348 Out.emitInstruction(Inst, getSTI()); 4349 return false; 4350 } 4351 4352 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4353 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4354 return true; 4355 } 4356 4357 switch (Result) { 4358 default: break; 4359 case Match_MissingFeature: 4360 // It has been verified that the specified instruction 4361 // mnemonic is valid. A match was found but it requires 4362 // features which are not supported on this GPU. 4363 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4364 4365 case Match_InvalidOperand: { 4366 SMLoc ErrorLoc = IDLoc; 4367 if (ErrorInfo != ~0ULL) { 4368 if (ErrorInfo >= Operands.size()) { 4369 return Error(IDLoc, "too few operands for instruction"); 4370 } 4371 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4372 if (ErrorLoc == SMLoc()) 4373 ErrorLoc = IDLoc; 4374 } 4375 return Error(ErrorLoc, "invalid operand for instruction"); 4376 } 4377 4378 case Match_PreferE32: 4379 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4380 "should be encoded as e32"); 4381 case Match_MnemonicFail: 4382 llvm_unreachable("Invalid instructions should have been handled already"); 4383 } 4384 llvm_unreachable("Implement any new match types added!"); 4385 } 4386 4387 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4388 int64_t Tmp = -1; 4389 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4390 return true; 4391 } 4392 if (getParser().parseAbsoluteExpression(Tmp)) { 4393 return true; 4394 } 4395 Ret = static_cast<uint32_t>(Tmp); 4396 return false; 4397 } 4398 4399 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4400 uint32_t &Minor) { 4401 if (ParseAsAbsoluteExpression(Major)) 4402 return TokError("invalid major version"); 4403 4404 if (!trySkipToken(AsmToken::Comma)) 4405 return TokError("minor version number required, comma expected"); 4406 4407 if (ParseAsAbsoluteExpression(Minor)) 4408 return TokError("invalid minor version"); 4409 4410 return false; 4411 } 4412 4413 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4414 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4415 return TokError("directive only supported for amdgcn architecture"); 4416 4417 std::string TargetIDDirective; 4418 SMLoc TargetStart = getTok().getLoc(); 4419 if (getParser().parseEscapedString(TargetIDDirective)) 4420 return true; 4421 4422 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4423 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4424 return getParser().Error(TargetRange.Start, 4425 (Twine(".amdgcn_target directive's target id ") + 4426 Twine(TargetIDDirective) + 4427 Twine(" does not match the specified target id ") + 4428 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4429 4430 return false; 4431 } 4432 4433 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4434 return Error(Range.Start, "value out of range", Range); 4435 } 4436 4437 bool AMDGPUAsmParser::calculateGPRBlocks( 4438 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4439 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4440 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4441 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4442 // TODO(scott.linder): These calculations are duplicated from 4443 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4444 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4445 4446 unsigned NumVGPRs = NextFreeVGPR; 4447 unsigned NumSGPRs = NextFreeSGPR; 4448 4449 if (Version.Major >= 10) 4450 NumSGPRs = 0; 4451 else { 4452 unsigned MaxAddressableNumSGPRs = 4453 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4454 4455 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4456 NumSGPRs > MaxAddressableNumSGPRs) 4457 return OutOfRangeError(SGPRRange); 4458 4459 NumSGPRs += 4460 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4461 4462 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4463 NumSGPRs > MaxAddressableNumSGPRs) 4464 return OutOfRangeError(SGPRRange); 4465 4466 if (Features.test(FeatureSGPRInitBug)) 4467 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4468 } 4469 4470 VGPRBlocks = 4471 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4472 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4473 4474 return false; 4475 } 4476 4477 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4478 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4479 return TokError("directive only supported for amdgcn architecture"); 4480 4481 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4482 return TokError("directive only supported for amdhsa OS"); 4483 4484 StringRef KernelName; 4485 if (getParser().parseIdentifier(KernelName)) 4486 return true; 4487 4488 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4489 4490 StringSet<> Seen; 4491 4492 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4493 4494 SMRange VGPRRange; 4495 uint64_t NextFreeVGPR = 0; 4496 uint64_t AccumOffset = 0; 4497 SMRange SGPRRange; 4498 uint64_t NextFreeSGPR = 0; 4499 unsigned UserSGPRCount = 0; 4500 bool ReserveVCC = true; 4501 bool ReserveFlatScr = true; 4502 Optional<bool> EnableWavefrontSize32; 4503 4504 while (true) { 4505 while (trySkipToken(AsmToken::EndOfStatement)); 4506 4507 StringRef ID; 4508 SMRange IDRange = getTok().getLocRange(); 4509 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4510 return true; 4511 4512 if (ID == ".end_amdhsa_kernel") 4513 break; 4514 4515 if (Seen.find(ID) != Seen.end()) 4516 return TokError(".amdhsa_ directives cannot be repeated"); 4517 Seen.insert(ID); 4518 4519 SMLoc ValStart = getLoc(); 4520 int64_t IVal; 4521 if (getParser().parseAbsoluteExpression(IVal)) 4522 return true; 4523 SMLoc ValEnd = getLoc(); 4524 SMRange ValRange = SMRange(ValStart, ValEnd); 4525 4526 if (IVal < 0) 4527 return OutOfRangeError(ValRange); 4528 4529 uint64_t Val = IVal; 4530 4531 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4532 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4533 return OutOfRangeError(RANGE); \ 4534 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4535 4536 if (ID == ".amdhsa_group_segment_fixed_size") { 4537 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4538 return OutOfRangeError(ValRange); 4539 KD.group_segment_fixed_size = Val; 4540 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4541 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4542 return OutOfRangeError(ValRange); 4543 KD.private_segment_fixed_size = Val; 4544 } else if (ID == ".amdhsa_kernarg_size") { 4545 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4546 return OutOfRangeError(ValRange); 4547 KD.kernarg_size = Val; 4548 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4549 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4550 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4551 Val, ValRange); 4552 if (Val) 4553 UserSGPRCount += 4; 4554 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4555 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4556 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4557 ValRange); 4558 if (Val) 4559 UserSGPRCount += 2; 4560 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4561 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4562 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4563 ValRange); 4564 if (Val) 4565 UserSGPRCount += 2; 4566 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4567 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4568 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4569 Val, ValRange); 4570 if (Val) 4571 UserSGPRCount += 2; 4572 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4573 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4574 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4575 ValRange); 4576 if (Val) 4577 UserSGPRCount += 2; 4578 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4579 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4580 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4581 ValRange); 4582 if (Val) 4583 UserSGPRCount += 2; 4584 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4585 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4587 Val, ValRange); 4588 if (Val) 4589 UserSGPRCount += 1; 4590 } else if (ID == ".amdhsa_wavefront_size32") { 4591 if (IVersion.Major < 10) 4592 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4593 EnableWavefrontSize32 = Val; 4594 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4595 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4596 Val, ValRange); 4597 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4598 PARSE_BITS_ENTRY( 4599 KD.compute_pgm_rsrc2, 4600 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4601 ValRange); 4602 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4603 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4604 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4605 ValRange); 4606 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4607 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4608 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4609 ValRange); 4610 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4611 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4612 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4613 ValRange); 4614 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4615 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4616 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4617 ValRange); 4618 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4619 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4620 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4621 ValRange); 4622 } else if (ID == ".amdhsa_next_free_vgpr") { 4623 VGPRRange = ValRange; 4624 NextFreeVGPR = Val; 4625 } else if (ID == ".amdhsa_next_free_sgpr") { 4626 SGPRRange = ValRange; 4627 NextFreeSGPR = Val; 4628 } else if (ID == ".amdhsa_accum_offset") { 4629 if (!isGFX90A()) 4630 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4631 AccumOffset = Val; 4632 } else if (ID == ".amdhsa_reserve_vcc") { 4633 if (!isUInt<1>(Val)) 4634 return OutOfRangeError(ValRange); 4635 ReserveVCC = Val; 4636 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4637 if (IVersion.Major < 7) 4638 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4639 if (!isUInt<1>(Val)) 4640 return OutOfRangeError(ValRange); 4641 ReserveFlatScr = Val; 4642 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4643 if (IVersion.Major < 8) 4644 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4645 if (!isUInt<1>(Val)) 4646 return OutOfRangeError(ValRange); 4647 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4648 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4649 IDRange); 4650 } else if (ID == ".amdhsa_float_round_mode_32") { 4651 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4652 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4653 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4654 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4655 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4656 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4657 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4658 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4659 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4661 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4662 ValRange); 4663 } else if (ID == ".amdhsa_dx10_clamp") { 4664 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4665 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4666 } else if (ID == ".amdhsa_ieee_mode") { 4667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4668 Val, ValRange); 4669 } else if (ID == ".amdhsa_fp16_overflow") { 4670 if (IVersion.Major < 9) 4671 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4672 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4673 ValRange); 4674 } else if (ID == ".amdhsa_tg_split") { 4675 if (!isGFX90A()) 4676 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4678 ValRange); 4679 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4680 if (IVersion.Major < 10) 4681 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4683 ValRange); 4684 } else if (ID == ".amdhsa_memory_ordered") { 4685 if (IVersion.Major < 10) 4686 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4687 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4688 ValRange); 4689 } else if (ID == ".amdhsa_forward_progress") { 4690 if (IVersion.Major < 10) 4691 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4692 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4693 ValRange); 4694 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4695 PARSE_BITS_ENTRY( 4696 KD.compute_pgm_rsrc2, 4697 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4698 ValRange); 4699 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4700 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4701 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4702 Val, ValRange); 4703 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4704 PARSE_BITS_ENTRY( 4705 KD.compute_pgm_rsrc2, 4706 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4707 ValRange); 4708 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4709 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4710 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4711 Val, ValRange); 4712 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4713 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4714 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4715 Val, ValRange); 4716 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4717 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4718 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4719 Val, ValRange); 4720 } else if (ID == ".amdhsa_exception_int_div_zero") { 4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4722 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4723 Val, ValRange); 4724 } else { 4725 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4726 } 4727 4728 #undef PARSE_BITS_ENTRY 4729 } 4730 4731 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4732 return TokError(".amdhsa_next_free_vgpr directive is required"); 4733 4734 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4735 return TokError(".amdhsa_next_free_sgpr directive is required"); 4736 4737 unsigned VGPRBlocks; 4738 unsigned SGPRBlocks; 4739 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4740 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4741 EnableWavefrontSize32, NextFreeVGPR, 4742 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4743 SGPRBlocks)) 4744 return true; 4745 4746 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4747 VGPRBlocks)) 4748 return OutOfRangeError(VGPRRange); 4749 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4750 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4751 4752 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4753 SGPRBlocks)) 4754 return OutOfRangeError(SGPRRange); 4755 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4756 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4757 SGPRBlocks); 4758 4759 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4760 return TokError("too many user SGPRs enabled"); 4761 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4762 UserSGPRCount); 4763 4764 if (isGFX90A()) { 4765 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4766 return TokError(".amdhsa_accum_offset directive is required"); 4767 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4768 return TokError("accum_offset should be in range [4..256] in " 4769 "increments of 4"); 4770 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4771 return TokError("accum_offset exceeds total VGPR allocation"); 4772 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4773 (AccumOffset / 4 - 1)); 4774 } 4775 4776 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4777 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4778 ReserveFlatScr); 4779 return false; 4780 } 4781 4782 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4783 uint32_t Major; 4784 uint32_t Minor; 4785 4786 if (ParseDirectiveMajorMinor(Major, Minor)) 4787 return true; 4788 4789 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4790 return false; 4791 } 4792 4793 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4794 uint32_t Major; 4795 uint32_t Minor; 4796 uint32_t Stepping; 4797 StringRef VendorName; 4798 StringRef ArchName; 4799 4800 // If this directive has no arguments, then use the ISA version for the 4801 // targeted GPU. 4802 if (isToken(AsmToken::EndOfStatement)) { 4803 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4804 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4805 ISA.Stepping, 4806 "AMD", "AMDGPU"); 4807 return false; 4808 } 4809 4810 if (ParseDirectiveMajorMinor(Major, Minor)) 4811 return true; 4812 4813 if (!trySkipToken(AsmToken::Comma)) 4814 return TokError("stepping version number required, comma expected"); 4815 4816 if (ParseAsAbsoluteExpression(Stepping)) 4817 return TokError("invalid stepping version"); 4818 4819 if (!trySkipToken(AsmToken::Comma)) 4820 return TokError("vendor name required, comma expected"); 4821 4822 if (!parseString(VendorName, "invalid vendor name")) 4823 return true; 4824 4825 if (!trySkipToken(AsmToken::Comma)) 4826 return TokError("arch name required, comma expected"); 4827 4828 if (!parseString(ArchName, "invalid arch name")) 4829 return true; 4830 4831 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4832 VendorName, ArchName); 4833 return false; 4834 } 4835 4836 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4837 amd_kernel_code_t &Header) { 4838 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4839 // assembly for backwards compatibility. 4840 if (ID == "max_scratch_backing_memory_byte_size") { 4841 Parser.eatToEndOfStatement(); 4842 return false; 4843 } 4844 4845 SmallString<40> ErrStr; 4846 raw_svector_ostream Err(ErrStr); 4847 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4848 return TokError(Err.str()); 4849 } 4850 Lex(); 4851 4852 if (ID == "enable_wavefront_size32") { 4853 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4854 if (!isGFX10Plus()) 4855 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4856 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4857 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4858 } else { 4859 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4860 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4861 } 4862 } 4863 4864 if (ID == "wavefront_size") { 4865 if (Header.wavefront_size == 5) { 4866 if (!isGFX10Plus()) 4867 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4868 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4869 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4870 } else if (Header.wavefront_size == 6) { 4871 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4872 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4873 } 4874 } 4875 4876 if (ID == "enable_wgp_mode") { 4877 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4878 !isGFX10Plus()) 4879 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4880 } 4881 4882 if (ID == "enable_mem_ordered") { 4883 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4884 !isGFX10Plus()) 4885 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4886 } 4887 4888 if (ID == "enable_fwd_progress") { 4889 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4890 !isGFX10Plus()) 4891 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4892 } 4893 4894 return false; 4895 } 4896 4897 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4898 amd_kernel_code_t Header; 4899 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4900 4901 while (true) { 4902 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4903 // will set the current token to EndOfStatement. 4904 while(trySkipToken(AsmToken::EndOfStatement)); 4905 4906 StringRef ID; 4907 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4908 return true; 4909 4910 if (ID == ".end_amd_kernel_code_t") 4911 break; 4912 4913 if (ParseAMDKernelCodeTValue(ID, Header)) 4914 return true; 4915 } 4916 4917 getTargetStreamer().EmitAMDKernelCodeT(Header); 4918 4919 return false; 4920 } 4921 4922 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4923 StringRef KernelName; 4924 if (!parseId(KernelName, "expected symbol name")) 4925 return true; 4926 4927 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4928 ELF::STT_AMDGPU_HSA_KERNEL); 4929 4930 KernelScope.initialize(getContext()); 4931 return false; 4932 } 4933 4934 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4935 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4936 return Error(getLoc(), 4937 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4938 "architectures"); 4939 } 4940 4941 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4942 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4943 return Error(getParser().getTok().getLoc(), "target id must match options"); 4944 4945 getTargetStreamer().EmitISAVersion(); 4946 Lex(); 4947 4948 return false; 4949 } 4950 4951 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4952 const char *AssemblerDirectiveBegin; 4953 const char *AssemblerDirectiveEnd; 4954 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4955 isHsaAbiVersion3Or4(&getSTI()) 4956 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4957 HSAMD::V3::AssemblerDirectiveEnd) 4958 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4959 HSAMD::AssemblerDirectiveEnd); 4960 4961 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4962 return Error(getLoc(), 4963 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4964 "not available on non-amdhsa OSes")).str()); 4965 } 4966 4967 std::string HSAMetadataString; 4968 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4969 HSAMetadataString)) 4970 return true; 4971 4972 if (isHsaAbiVersion3Or4(&getSTI())) { 4973 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4974 return Error(getLoc(), "invalid HSA metadata"); 4975 } else { 4976 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4977 return Error(getLoc(), "invalid HSA metadata"); 4978 } 4979 4980 return false; 4981 } 4982 4983 /// Common code to parse out a block of text (typically YAML) between start and 4984 /// end directives. 4985 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4986 const char *AssemblerDirectiveEnd, 4987 std::string &CollectString) { 4988 4989 raw_string_ostream CollectStream(CollectString); 4990 4991 getLexer().setSkipSpace(false); 4992 4993 bool FoundEnd = false; 4994 while (!isToken(AsmToken::Eof)) { 4995 while (isToken(AsmToken::Space)) { 4996 CollectStream << getTokenStr(); 4997 Lex(); 4998 } 4999 5000 if (trySkipId(AssemblerDirectiveEnd)) { 5001 FoundEnd = true; 5002 break; 5003 } 5004 5005 CollectStream << Parser.parseStringToEndOfStatement() 5006 << getContext().getAsmInfo()->getSeparatorString(); 5007 5008 Parser.eatToEndOfStatement(); 5009 } 5010 5011 getLexer().setSkipSpace(true); 5012 5013 if (isToken(AsmToken::Eof) && !FoundEnd) { 5014 return TokError(Twine("expected directive ") + 5015 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5016 } 5017 5018 CollectStream.flush(); 5019 return false; 5020 } 5021 5022 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5023 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5024 std::string String; 5025 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5026 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5027 return true; 5028 5029 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5030 if (!PALMetadata->setFromString(String)) 5031 return Error(getLoc(), "invalid PAL metadata"); 5032 return false; 5033 } 5034 5035 /// Parse the assembler directive for old linear-format PAL metadata. 5036 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5037 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5038 return Error(getLoc(), 5039 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5040 "not available on non-amdpal OSes")).str()); 5041 } 5042 5043 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5044 PALMetadata->setLegacy(); 5045 for (;;) { 5046 uint32_t Key, Value; 5047 if (ParseAsAbsoluteExpression(Key)) { 5048 return TokError(Twine("invalid value in ") + 5049 Twine(PALMD::AssemblerDirective)); 5050 } 5051 if (!trySkipToken(AsmToken::Comma)) { 5052 return TokError(Twine("expected an even number of values in ") + 5053 Twine(PALMD::AssemblerDirective)); 5054 } 5055 if (ParseAsAbsoluteExpression(Value)) { 5056 return TokError(Twine("invalid value in ") + 5057 Twine(PALMD::AssemblerDirective)); 5058 } 5059 PALMetadata->setRegister(Key, Value); 5060 if (!trySkipToken(AsmToken::Comma)) 5061 break; 5062 } 5063 return false; 5064 } 5065 5066 /// ParseDirectiveAMDGPULDS 5067 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5068 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5069 if (getParser().checkForValidSection()) 5070 return true; 5071 5072 StringRef Name; 5073 SMLoc NameLoc = getLoc(); 5074 if (getParser().parseIdentifier(Name)) 5075 return TokError("expected identifier in directive"); 5076 5077 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5078 if (parseToken(AsmToken::Comma, "expected ','")) 5079 return true; 5080 5081 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5082 5083 int64_t Size; 5084 SMLoc SizeLoc = getLoc(); 5085 if (getParser().parseAbsoluteExpression(Size)) 5086 return true; 5087 if (Size < 0) 5088 return Error(SizeLoc, "size must be non-negative"); 5089 if (Size > LocalMemorySize) 5090 return Error(SizeLoc, "size is too large"); 5091 5092 int64_t Alignment = 4; 5093 if (trySkipToken(AsmToken::Comma)) { 5094 SMLoc AlignLoc = getLoc(); 5095 if (getParser().parseAbsoluteExpression(Alignment)) 5096 return true; 5097 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5098 return Error(AlignLoc, "alignment must be a power of two"); 5099 5100 // Alignment larger than the size of LDS is possible in theory, as long 5101 // as the linker manages to place to symbol at address 0, but we do want 5102 // to make sure the alignment fits nicely into a 32-bit integer. 5103 if (Alignment >= 1u << 31) 5104 return Error(AlignLoc, "alignment is too large"); 5105 } 5106 5107 if (parseToken(AsmToken::EndOfStatement, 5108 "unexpected token in '.amdgpu_lds' directive")) 5109 return true; 5110 5111 Symbol->redefineIfPossible(); 5112 if (!Symbol->isUndefined()) 5113 return Error(NameLoc, "invalid symbol redefinition"); 5114 5115 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5116 return false; 5117 } 5118 5119 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5120 StringRef IDVal = DirectiveID.getString(); 5121 5122 if (isHsaAbiVersion3Or4(&getSTI())) { 5123 if (IDVal == ".amdhsa_kernel") 5124 return ParseDirectiveAMDHSAKernel(); 5125 5126 // TODO: Restructure/combine with PAL metadata directive. 5127 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5128 return ParseDirectiveHSAMetadata(); 5129 } else { 5130 if (IDVal == ".hsa_code_object_version") 5131 return ParseDirectiveHSACodeObjectVersion(); 5132 5133 if (IDVal == ".hsa_code_object_isa") 5134 return ParseDirectiveHSACodeObjectISA(); 5135 5136 if (IDVal == ".amd_kernel_code_t") 5137 return ParseDirectiveAMDKernelCodeT(); 5138 5139 if (IDVal == ".amdgpu_hsa_kernel") 5140 return ParseDirectiveAMDGPUHsaKernel(); 5141 5142 if (IDVal == ".amd_amdgpu_isa") 5143 return ParseDirectiveISAVersion(); 5144 5145 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5146 return ParseDirectiveHSAMetadata(); 5147 } 5148 5149 if (IDVal == ".amdgcn_target") 5150 return ParseDirectiveAMDGCNTarget(); 5151 5152 if (IDVal == ".amdgpu_lds") 5153 return ParseDirectiveAMDGPULDS(); 5154 5155 if (IDVal == PALMD::AssemblerDirectiveBegin) 5156 return ParseDirectivePALMetadataBegin(); 5157 5158 if (IDVal == PALMD::AssemblerDirective) 5159 return ParseDirectivePALMetadata(); 5160 5161 return true; 5162 } 5163 5164 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5165 unsigned RegNo) { 5166 5167 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5168 R.isValid(); ++R) { 5169 if (*R == RegNo) 5170 return isGFX9Plus(); 5171 } 5172 5173 // GFX10 has 2 more SGPRs 104 and 105. 5174 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5175 R.isValid(); ++R) { 5176 if (*R == RegNo) 5177 return hasSGPR104_SGPR105(); 5178 } 5179 5180 switch (RegNo) { 5181 case AMDGPU::SRC_SHARED_BASE: 5182 case AMDGPU::SRC_SHARED_LIMIT: 5183 case AMDGPU::SRC_PRIVATE_BASE: 5184 case AMDGPU::SRC_PRIVATE_LIMIT: 5185 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5186 return isGFX9Plus(); 5187 case AMDGPU::TBA: 5188 case AMDGPU::TBA_LO: 5189 case AMDGPU::TBA_HI: 5190 case AMDGPU::TMA: 5191 case AMDGPU::TMA_LO: 5192 case AMDGPU::TMA_HI: 5193 return !isGFX9Plus(); 5194 case AMDGPU::XNACK_MASK: 5195 case AMDGPU::XNACK_MASK_LO: 5196 case AMDGPU::XNACK_MASK_HI: 5197 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5198 case AMDGPU::SGPR_NULL: 5199 return isGFX10Plus(); 5200 default: 5201 break; 5202 } 5203 5204 if (isCI()) 5205 return true; 5206 5207 if (isSI() || isGFX10Plus()) { 5208 // No flat_scr on SI. 5209 // On GFX10 flat scratch is not a valid register operand and can only be 5210 // accessed with s_setreg/s_getreg. 5211 switch (RegNo) { 5212 case AMDGPU::FLAT_SCR: 5213 case AMDGPU::FLAT_SCR_LO: 5214 case AMDGPU::FLAT_SCR_HI: 5215 return false; 5216 default: 5217 return true; 5218 } 5219 } 5220 5221 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5222 // SI/CI have. 5223 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5224 R.isValid(); ++R) { 5225 if (*R == RegNo) 5226 return hasSGPR102_SGPR103(); 5227 } 5228 5229 return true; 5230 } 5231 5232 OperandMatchResultTy 5233 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5234 OperandMode Mode) { 5235 // Try to parse with a custom parser 5236 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5237 5238 // If we successfully parsed the operand or if there as an error parsing, 5239 // we are done. 5240 // 5241 // If we are parsing after we reach EndOfStatement then this means we 5242 // are appending default values to the Operands list. This is only done 5243 // by custom parser, so we shouldn't continue on to the generic parsing. 5244 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5245 isToken(AsmToken::EndOfStatement)) 5246 return ResTy; 5247 5248 SMLoc RBraceLoc; 5249 SMLoc LBraceLoc = getLoc(); 5250 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5251 unsigned Prefix = Operands.size(); 5252 5253 for (;;) { 5254 auto Loc = getLoc(); 5255 ResTy = parseReg(Operands); 5256 if (ResTy == MatchOperand_NoMatch) 5257 Error(Loc, "expected a register"); 5258 if (ResTy != MatchOperand_Success) 5259 return MatchOperand_ParseFail; 5260 5261 RBraceLoc = getLoc(); 5262 if (trySkipToken(AsmToken::RBrac)) 5263 break; 5264 5265 if (!skipToken(AsmToken::Comma, 5266 "expected a comma or a closing square bracket")) { 5267 return MatchOperand_ParseFail; 5268 } 5269 } 5270 5271 if (Operands.size() - Prefix > 1) { 5272 Operands.insert(Operands.begin() + Prefix, 5273 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5274 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5275 } 5276 5277 return MatchOperand_Success; 5278 } 5279 5280 return parseRegOrImm(Operands); 5281 } 5282 5283 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5284 // Clear any forced encodings from the previous instruction. 5285 setForcedEncodingSize(0); 5286 setForcedDPP(false); 5287 setForcedSDWA(false); 5288 5289 if (Name.endswith("_e64")) { 5290 setForcedEncodingSize(64); 5291 return Name.substr(0, Name.size() - 4); 5292 } else if (Name.endswith("_e32")) { 5293 setForcedEncodingSize(32); 5294 return Name.substr(0, Name.size() - 4); 5295 } else if (Name.endswith("_dpp")) { 5296 setForcedDPP(true); 5297 return Name.substr(0, Name.size() - 4); 5298 } else if (Name.endswith("_sdwa")) { 5299 setForcedSDWA(true); 5300 return Name.substr(0, Name.size() - 5); 5301 } 5302 return Name; 5303 } 5304 5305 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5306 StringRef Name, 5307 SMLoc NameLoc, OperandVector &Operands) { 5308 // Add the instruction mnemonic 5309 Name = parseMnemonicSuffix(Name); 5310 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5311 5312 bool IsMIMG = Name.startswith("image_"); 5313 5314 while (!trySkipToken(AsmToken::EndOfStatement)) { 5315 OperandMode Mode = OperandMode_Default; 5316 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5317 Mode = OperandMode_NSA; 5318 CPolSeen = 0; 5319 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5320 5321 if (Res != MatchOperand_Success) { 5322 checkUnsupportedInstruction(Name, NameLoc); 5323 if (!Parser.hasPendingError()) { 5324 // FIXME: use real operand location rather than the current location. 5325 StringRef Msg = 5326 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5327 "not a valid operand."; 5328 Error(getLoc(), Msg); 5329 } 5330 while (!trySkipToken(AsmToken::EndOfStatement)) { 5331 lex(); 5332 } 5333 return true; 5334 } 5335 5336 // Eat the comma or space if there is one. 5337 trySkipToken(AsmToken::Comma); 5338 } 5339 5340 return false; 5341 } 5342 5343 //===----------------------------------------------------------------------===// 5344 // Utility functions 5345 //===----------------------------------------------------------------------===// 5346 5347 OperandMatchResultTy 5348 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5349 5350 if (!trySkipId(Prefix, AsmToken::Colon)) 5351 return MatchOperand_NoMatch; 5352 5353 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5354 } 5355 5356 OperandMatchResultTy 5357 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5358 AMDGPUOperand::ImmTy ImmTy, 5359 bool (*ConvertResult)(int64_t&)) { 5360 SMLoc S = getLoc(); 5361 int64_t Value = 0; 5362 5363 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5364 if (Res != MatchOperand_Success) 5365 return Res; 5366 5367 if (ConvertResult && !ConvertResult(Value)) { 5368 Error(S, "invalid " + StringRef(Prefix) + " value."); 5369 } 5370 5371 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5372 return MatchOperand_Success; 5373 } 5374 5375 OperandMatchResultTy 5376 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5377 OperandVector &Operands, 5378 AMDGPUOperand::ImmTy ImmTy, 5379 bool (*ConvertResult)(int64_t&)) { 5380 SMLoc S = getLoc(); 5381 if (!trySkipId(Prefix, AsmToken::Colon)) 5382 return MatchOperand_NoMatch; 5383 5384 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5385 return MatchOperand_ParseFail; 5386 5387 unsigned Val = 0; 5388 const unsigned MaxSize = 4; 5389 5390 // FIXME: How to verify the number of elements matches the number of src 5391 // operands? 5392 for (int I = 0; ; ++I) { 5393 int64_t Op; 5394 SMLoc Loc = getLoc(); 5395 if (!parseExpr(Op)) 5396 return MatchOperand_ParseFail; 5397 5398 if (Op != 0 && Op != 1) { 5399 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5400 return MatchOperand_ParseFail; 5401 } 5402 5403 Val |= (Op << I); 5404 5405 if (trySkipToken(AsmToken::RBrac)) 5406 break; 5407 5408 if (I + 1 == MaxSize) { 5409 Error(getLoc(), "expected a closing square bracket"); 5410 return MatchOperand_ParseFail; 5411 } 5412 5413 if (!skipToken(AsmToken::Comma, "expected a comma")) 5414 return MatchOperand_ParseFail; 5415 } 5416 5417 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5418 return MatchOperand_Success; 5419 } 5420 5421 OperandMatchResultTy 5422 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5423 AMDGPUOperand::ImmTy ImmTy) { 5424 int64_t Bit; 5425 SMLoc S = getLoc(); 5426 5427 if (trySkipId(Name)) { 5428 Bit = 1; 5429 } else if (trySkipId("no", Name)) { 5430 Bit = 0; 5431 } else { 5432 return MatchOperand_NoMatch; 5433 } 5434 5435 if (Name == "r128" && !hasMIMG_R128()) { 5436 Error(S, "r128 modifier is not supported on this GPU"); 5437 return MatchOperand_ParseFail; 5438 } 5439 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5440 Error(S, "a16 modifier is not supported on this GPU"); 5441 return MatchOperand_ParseFail; 5442 } 5443 5444 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5445 ImmTy = AMDGPUOperand::ImmTyR128A16; 5446 5447 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5448 return MatchOperand_Success; 5449 } 5450 5451 OperandMatchResultTy 5452 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5453 unsigned CPolOn = 0; 5454 unsigned CPolOff = 0; 5455 SMLoc S = getLoc(); 5456 5457 if (trySkipId("glc")) 5458 CPolOn = AMDGPU::CPol::GLC; 5459 else if (trySkipId("noglc")) 5460 CPolOff = AMDGPU::CPol::GLC; 5461 else if (trySkipId("slc")) 5462 CPolOn = AMDGPU::CPol::SLC; 5463 else if (trySkipId("noslc")) 5464 CPolOff = AMDGPU::CPol::SLC; 5465 else if (trySkipId("dlc")) 5466 CPolOn = AMDGPU::CPol::DLC; 5467 else if (trySkipId("nodlc")) 5468 CPolOff = AMDGPU::CPol::DLC; 5469 else if (trySkipId("scc")) 5470 CPolOn = AMDGPU::CPol::SCC; 5471 else if (trySkipId("noscc")) 5472 CPolOff = AMDGPU::CPol::SCC; 5473 else 5474 return MatchOperand_NoMatch; 5475 5476 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5477 Error(S, "dlc modifier is not supported on this GPU"); 5478 return MatchOperand_ParseFail; 5479 } 5480 5481 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5482 Error(S, "scc modifier is not supported on this GPU"); 5483 return MatchOperand_ParseFail; 5484 } 5485 5486 if (CPolSeen & (CPolOn | CPolOff)) { 5487 Error(S, "duplicate cache policy modifier"); 5488 return MatchOperand_ParseFail; 5489 } 5490 5491 CPolSeen |= (CPolOn | CPolOff); 5492 5493 for (unsigned I = 1; I != Operands.size(); ++I) { 5494 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5495 if (Op.isCPol()) { 5496 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5497 return MatchOperand_Success; 5498 } 5499 } 5500 5501 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5502 AMDGPUOperand::ImmTyCPol)); 5503 5504 return MatchOperand_Success; 5505 } 5506 5507 static void addOptionalImmOperand( 5508 MCInst& Inst, const OperandVector& Operands, 5509 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5510 AMDGPUOperand::ImmTy ImmT, 5511 int64_t Default = 0) { 5512 auto i = OptionalIdx.find(ImmT); 5513 if (i != OptionalIdx.end()) { 5514 unsigned Idx = i->second; 5515 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5516 } else { 5517 Inst.addOperand(MCOperand::createImm(Default)); 5518 } 5519 } 5520 5521 OperandMatchResultTy 5522 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5523 StringRef &Value, 5524 SMLoc &StringLoc) { 5525 if (!trySkipId(Prefix, AsmToken::Colon)) 5526 return MatchOperand_NoMatch; 5527 5528 StringLoc = getLoc(); 5529 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5530 : MatchOperand_ParseFail; 5531 } 5532 5533 //===----------------------------------------------------------------------===// 5534 // MTBUF format 5535 //===----------------------------------------------------------------------===// 5536 5537 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5538 int64_t MaxVal, 5539 int64_t &Fmt) { 5540 int64_t Val; 5541 SMLoc Loc = getLoc(); 5542 5543 auto Res = parseIntWithPrefix(Pref, Val); 5544 if (Res == MatchOperand_ParseFail) 5545 return false; 5546 if (Res == MatchOperand_NoMatch) 5547 return true; 5548 5549 if (Val < 0 || Val > MaxVal) { 5550 Error(Loc, Twine("out of range ", StringRef(Pref))); 5551 return false; 5552 } 5553 5554 Fmt = Val; 5555 return true; 5556 } 5557 5558 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5559 // values to live in a joint format operand in the MCInst encoding. 5560 OperandMatchResultTy 5561 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5562 using namespace llvm::AMDGPU::MTBUFFormat; 5563 5564 int64_t Dfmt = DFMT_UNDEF; 5565 int64_t Nfmt = NFMT_UNDEF; 5566 5567 // dfmt and nfmt can appear in either order, and each is optional. 5568 for (int I = 0; I < 2; ++I) { 5569 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5570 return MatchOperand_ParseFail; 5571 5572 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5573 return MatchOperand_ParseFail; 5574 } 5575 // Skip optional comma between dfmt/nfmt 5576 // but guard against 2 commas following each other. 5577 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5578 !peekToken().is(AsmToken::Comma)) { 5579 trySkipToken(AsmToken::Comma); 5580 } 5581 } 5582 5583 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5584 return MatchOperand_NoMatch; 5585 5586 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5587 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5588 5589 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5590 return MatchOperand_Success; 5591 } 5592 5593 OperandMatchResultTy 5594 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5595 using namespace llvm::AMDGPU::MTBUFFormat; 5596 5597 int64_t Fmt = UFMT_UNDEF; 5598 5599 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5600 return MatchOperand_ParseFail; 5601 5602 if (Fmt == UFMT_UNDEF) 5603 return MatchOperand_NoMatch; 5604 5605 Format = Fmt; 5606 return MatchOperand_Success; 5607 } 5608 5609 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5610 int64_t &Nfmt, 5611 StringRef FormatStr, 5612 SMLoc Loc) { 5613 using namespace llvm::AMDGPU::MTBUFFormat; 5614 int64_t Format; 5615 5616 Format = getDfmt(FormatStr); 5617 if (Format != DFMT_UNDEF) { 5618 Dfmt = Format; 5619 return true; 5620 } 5621 5622 Format = getNfmt(FormatStr, getSTI()); 5623 if (Format != NFMT_UNDEF) { 5624 Nfmt = Format; 5625 return true; 5626 } 5627 5628 Error(Loc, "unsupported format"); 5629 return false; 5630 } 5631 5632 OperandMatchResultTy 5633 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5634 SMLoc FormatLoc, 5635 int64_t &Format) { 5636 using namespace llvm::AMDGPU::MTBUFFormat; 5637 5638 int64_t Dfmt = DFMT_UNDEF; 5639 int64_t Nfmt = NFMT_UNDEF; 5640 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5641 return MatchOperand_ParseFail; 5642 5643 if (trySkipToken(AsmToken::Comma)) { 5644 StringRef Str; 5645 SMLoc Loc = getLoc(); 5646 if (!parseId(Str, "expected a format string") || 5647 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5648 return MatchOperand_ParseFail; 5649 } 5650 if (Dfmt == DFMT_UNDEF) { 5651 Error(Loc, "duplicate numeric format"); 5652 return MatchOperand_ParseFail; 5653 } else if (Nfmt == NFMT_UNDEF) { 5654 Error(Loc, "duplicate data format"); 5655 return MatchOperand_ParseFail; 5656 } 5657 } 5658 5659 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5660 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5661 5662 if (isGFX10Plus()) { 5663 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5664 if (Ufmt == UFMT_UNDEF) { 5665 Error(FormatLoc, "unsupported format"); 5666 return MatchOperand_ParseFail; 5667 } 5668 Format = Ufmt; 5669 } else { 5670 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5671 } 5672 5673 return MatchOperand_Success; 5674 } 5675 5676 OperandMatchResultTy 5677 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5678 SMLoc Loc, 5679 int64_t &Format) { 5680 using namespace llvm::AMDGPU::MTBUFFormat; 5681 5682 auto Id = getUnifiedFormat(FormatStr); 5683 if (Id == UFMT_UNDEF) 5684 return MatchOperand_NoMatch; 5685 5686 if (!isGFX10Plus()) { 5687 Error(Loc, "unified format is not supported on this GPU"); 5688 return MatchOperand_ParseFail; 5689 } 5690 5691 Format = Id; 5692 return MatchOperand_Success; 5693 } 5694 5695 OperandMatchResultTy 5696 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5697 using namespace llvm::AMDGPU::MTBUFFormat; 5698 SMLoc Loc = getLoc(); 5699 5700 if (!parseExpr(Format)) 5701 return MatchOperand_ParseFail; 5702 if (!isValidFormatEncoding(Format, getSTI())) { 5703 Error(Loc, "out of range format"); 5704 return MatchOperand_ParseFail; 5705 } 5706 5707 return MatchOperand_Success; 5708 } 5709 5710 OperandMatchResultTy 5711 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5712 using namespace llvm::AMDGPU::MTBUFFormat; 5713 5714 if (!trySkipId("format", AsmToken::Colon)) 5715 return MatchOperand_NoMatch; 5716 5717 if (trySkipToken(AsmToken::LBrac)) { 5718 StringRef FormatStr; 5719 SMLoc Loc = getLoc(); 5720 if (!parseId(FormatStr, "expected a format string")) 5721 return MatchOperand_ParseFail; 5722 5723 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5724 if (Res == MatchOperand_NoMatch) 5725 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5726 if (Res != MatchOperand_Success) 5727 return Res; 5728 5729 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5730 return MatchOperand_ParseFail; 5731 5732 return MatchOperand_Success; 5733 } 5734 5735 return parseNumericFormat(Format); 5736 } 5737 5738 OperandMatchResultTy 5739 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5740 using namespace llvm::AMDGPU::MTBUFFormat; 5741 5742 int64_t Format = getDefaultFormatEncoding(getSTI()); 5743 OperandMatchResultTy Res; 5744 SMLoc Loc = getLoc(); 5745 5746 // Parse legacy format syntax. 5747 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5748 if (Res == MatchOperand_ParseFail) 5749 return Res; 5750 5751 bool FormatFound = (Res == MatchOperand_Success); 5752 5753 Operands.push_back( 5754 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5755 5756 if (FormatFound) 5757 trySkipToken(AsmToken::Comma); 5758 5759 if (isToken(AsmToken::EndOfStatement)) { 5760 // We are expecting an soffset operand, 5761 // but let matcher handle the error. 5762 return MatchOperand_Success; 5763 } 5764 5765 // Parse soffset. 5766 Res = parseRegOrImm(Operands); 5767 if (Res != MatchOperand_Success) 5768 return Res; 5769 5770 trySkipToken(AsmToken::Comma); 5771 5772 if (!FormatFound) { 5773 Res = parseSymbolicOrNumericFormat(Format); 5774 if (Res == MatchOperand_ParseFail) 5775 return Res; 5776 if (Res == MatchOperand_Success) { 5777 auto Size = Operands.size(); 5778 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5779 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5780 Op.setImm(Format); 5781 } 5782 return MatchOperand_Success; 5783 } 5784 5785 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5786 Error(getLoc(), "duplicate format"); 5787 return MatchOperand_ParseFail; 5788 } 5789 return MatchOperand_Success; 5790 } 5791 5792 //===----------------------------------------------------------------------===// 5793 // ds 5794 //===----------------------------------------------------------------------===// 5795 5796 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5797 const OperandVector &Operands) { 5798 OptionalImmIndexMap OptionalIdx; 5799 5800 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5801 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5802 5803 // Add the register arguments 5804 if (Op.isReg()) { 5805 Op.addRegOperands(Inst, 1); 5806 continue; 5807 } 5808 5809 // Handle optional arguments 5810 OptionalIdx[Op.getImmTy()] = i; 5811 } 5812 5813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5816 5817 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5818 } 5819 5820 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5821 bool IsGdsHardcoded) { 5822 OptionalImmIndexMap OptionalIdx; 5823 5824 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5825 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5826 5827 // Add the register arguments 5828 if (Op.isReg()) { 5829 Op.addRegOperands(Inst, 1); 5830 continue; 5831 } 5832 5833 if (Op.isToken() && Op.getToken() == "gds") { 5834 IsGdsHardcoded = true; 5835 continue; 5836 } 5837 5838 // Handle optional arguments 5839 OptionalIdx[Op.getImmTy()] = i; 5840 } 5841 5842 AMDGPUOperand::ImmTy OffsetType = 5843 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5844 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5845 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5846 AMDGPUOperand::ImmTyOffset; 5847 5848 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5849 5850 if (!IsGdsHardcoded) { 5851 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5852 } 5853 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5854 } 5855 5856 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5857 OptionalImmIndexMap OptionalIdx; 5858 5859 unsigned OperandIdx[4]; 5860 unsigned EnMask = 0; 5861 int SrcIdx = 0; 5862 5863 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5864 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5865 5866 // Add the register arguments 5867 if (Op.isReg()) { 5868 assert(SrcIdx < 4); 5869 OperandIdx[SrcIdx] = Inst.size(); 5870 Op.addRegOperands(Inst, 1); 5871 ++SrcIdx; 5872 continue; 5873 } 5874 5875 if (Op.isOff()) { 5876 assert(SrcIdx < 4); 5877 OperandIdx[SrcIdx] = Inst.size(); 5878 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5879 ++SrcIdx; 5880 continue; 5881 } 5882 5883 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5884 Op.addImmOperands(Inst, 1); 5885 continue; 5886 } 5887 5888 if (Op.isToken() && Op.getToken() == "done") 5889 continue; 5890 5891 // Handle optional arguments 5892 OptionalIdx[Op.getImmTy()] = i; 5893 } 5894 5895 assert(SrcIdx == 4); 5896 5897 bool Compr = false; 5898 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5899 Compr = true; 5900 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5901 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5902 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5903 } 5904 5905 for (auto i = 0; i < SrcIdx; ++i) { 5906 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5907 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5908 } 5909 } 5910 5911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5913 5914 Inst.addOperand(MCOperand::createImm(EnMask)); 5915 } 5916 5917 //===----------------------------------------------------------------------===// 5918 // s_waitcnt 5919 //===----------------------------------------------------------------------===// 5920 5921 static bool 5922 encodeCnt( 5923 const AMDGPU::IsaVersion ISA, 5924 int64_t &IntVal, 5925 int64_t CntVal, 5926 bool Saturate, 5927 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5928 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5929 { 5930 bool Failed = false; 5931 5932 IntVal = encode(ISA, IntVal, CntVal); 5933 if (CntVal != decode(ISA, IntVal)) { 5934 if (Saturate) { 5935 IntVal = encode(ISA, IntVal, -1); 5936 } else { 5937 Failed = true; 5938 } 5939 } 5940 return Failed; 5941 } 5942 5943 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5944 5945 SMLoc CntLoc = getLoc(); 5946 StringRef CntName = getTokenStr(); 5947 5948 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5949 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5950 return false; 5951 5952 int64_t CntVal; 5953 SMLoc ValLoc = getLoc(); 5954 if (!parseExpr(CntVal)) 5955 return false; 5956 5957 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5958 5959 bool Failed = true; 5960 bool Sat = CntName.endswith("_sat"); 5961 5962 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5963 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5964 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5965 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5966 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5967 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5968 } else { 5969 Error(CntLoc, "invalid counter name " + CntName); 5970 return false; 5971 } 5972 5973 if (Failed) { 5974 Error(ValLoc, "too large value for " + CntName); 5975 return false; 5976 } 5977 5978 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5979 return false; 5980 5981 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5982 if (isToken(AsmToken::EndOfStatement)) { 5983 Error(getLoc(), "expected a counter name"); 5984 return false; 5985 } 5986 } 5987 5988 return true; 5989 } 5990 5991 OperandMatchResultTy 5992 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5993 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5994 int64_t Waitcnt = getWaitcntBitMask(ISA); 5995 SMLoc S = getLoc(); 5996 5997 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5998 while (!isToken(AsmToken::EndOfStatement)) { 5999 if (!parseCnt(Waitcnt)) 6000 return MatchOperand_ParseFail; 6001 } 6002 } else { 6003 if (!parseExpr(Waitcnt)) 6004 return MatchOperand_ParseFail; 6005 } 6006 6007 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6008 return MatchOperand_Success; 6009 } 6010 6011 bool 6012 AMDGPUOperand::isSWaitCnt() const { 6013 return isImm(); 6014 } 6015 6016 //===----------------------------------------------------------------------===// 6017 // hwreg 6018 //===----------------------------------------------------------------------===// 6019 6020 bool 6021 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6022 OperandInfoTy &Offset, 6023 OperandInfoTy &Width) { 6024 using namespace llvm::AMDGPU::Hwreg; 6025 6026 // The register may be specified by name or using a numeric code 6027 HwReg.Loc = getLoc(); 6028 if (isToken(AsmToken::Identifier) && 6029 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6030 HwReg.IsSymbolic = true; 6031 lex(); // skip register name 6032 } else if (!parseExpr(HwReg.Id, "a register name")) { 6033 return false; 6034 } 6035 6036 if (trySkipToken(AsmToken::RParen)) 6037 return true; 6038 6039 // parse optional params 6040 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6041 return false; 6042 6043 Offset.Loc = getLoc(); 6044 if (!parseExpr(Offset.Id)) 6045 return false; 6046 6047 if (!skipToken(AsmToken::Comma, "expected a comma")) 6048 return false; 6049 6050 Width.Loc = getLoc(); 6051 return parseExpr(Width.Id) && 6052 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6053 } 6054 6055 bool 6056 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6057 const OperandInfoTy &Offset, 6058 const OperandInfoTy &Width) { 6059 6060 using namespace llvm::AMDGPU::Hwreg; 6061 6062 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6063 Error(HwReg.Loc, 6064 "specified hardware register is not supported on this GPU"); 6065 return false; 6066 } 6067 if (!isValidHwreg(HwReg.Id)) { 6068 Error(HwReg.Loc, 6069 "invalid code of hardware register: only 6-bit values are legal"); 6070 return false; 6071 } 6072 if (!isValidHwregOffset(Offset.Id)) { 6073 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6074 return false; 6075 } 6076 if (!isValidHwregWidth(Width.Id)) { 6077 Error(Width.Loc, 6078 "invalid bitfield width: only values from 1 to 32 are legal"); 6079 return false; 6080 } 6081 return true; 6082 } 6083 6084 OperandMatchResultTy 6085 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6086 using namespace llvm::AMDGPU::Hwreg; 6087 6088 int64_t ImmVal = 0; 6089 SMLoc Loc = getLoc(); 6090 6091 if (trySkipId("hwreg", AsmToken::LParen)) { 6092 OperandInfoTy HwReg(ID_UNKNOWN_); 6093 OperandInfoTy Offset(OFFSET_DEFAULT_); 6094 OperandInfoTy Width(WIDTH_DEFAULT_); 6095 if (parseHwregBody(HwReg, Offset, Width) && 6096 validateHwreg(HwReg, Offset, Width)) { 6097 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6098 } else { 6099 return MatchOperand_ParseFail; 6100 } 6101 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6102 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6103 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6104 return MatchOperand_ParseFail; 6105 } 6106 } else { 6107 return MatchOperand_ParseFail; 6108 } 6109 6110 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6111 return MatchOperand_Success; 6112 } 6113 6114 bool AMDGPUOperand::isHwreg() const { 6115 return isImmTy(ImmTyHwreg); 6116 } 6117 6118 //===----------------------------------------------------------------------===// 6119 // sendmsg 6120 //===----------------------------------------------------------------------===// 6121 6122 bool 6123 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6124 OperandInfoTy &Op, 6125 OperandInfoTy &Stream) { 6126 using namespace llvm::AMDGPU::SendMsg; 6127 6128 Msg.Loc = getLoc(); 6129 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6130 Msg.IsSymbolic = true; 6131 lex(); // skip message name 6132 } else if (!parseExpr(Msg.Id, "a message name")) { 6133 return false; 6134 } 6135 6136 if (trySkipToken(AsmToken::Comma)) { 6137 Op.IsDefined = true; 6138 Op.Loc = getLoc(); 6139 if (isToken(AsmToken::Identifier) && 6140 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6141 lex(); // skip operation name 6142 } else if (!parseExpr(Op.Id, "an operation name")) { 6143 return false; 6144 } 6145 6146 if (trySkipToken(AsmToken::Comma)) { 6147 Stream.IsDefined = true; 6148 Stream.Loc = getLoc(); 6149 if (!parseExpr(Stream.Id)) 6150 return false; 6151 } 6152 } 6153 6154 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6155 } 6156 6157 bool 6158 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6159 const OperandInfoTy &Op, 6160 const OperandInfoTy &Stream) { 6161 using namespace llvm::AMDGPU::SendMsg; 6162 6163 // Validation strictness depends on whether message is specified 6164 // in a symbolc or in a numeric form. In the latter case 6165 // only encoding possibility is checked. 6166 bool Strict = Msg.IsSymbolic; 6167 6168 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6169 Error(Msg.Loc, "invalid message id"); 6170 return false; 6171 } 6172 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6173 if (Op.IsDefined) { 6174 Error(Op.Loc, "message does not support operations"); 6175 } else { 6176 Error(Msg.Loc, "missing message operation"); 6177 } 6178 return false; 6179 } 6180 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6181 Error(Op.Loc, "invalid operation id"); 6182 return false; 6183 } 6184 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6185 Error(Stream.Loc, "message operation does not support streams"); 6186 return false; 6187 } 6188 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6189 Error(Stream.Loc, "invalid message stream id"); 6190 return false; 6191 } 6192 return true; 6193 } 6194 6195 OperandMatchResultTy 6196 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6197 using namespace llvm::AMDGPU::SendMsg; 6198 6199 int64_t ImmVal = 0; 6200 SMLoc Loc = getLoc(); 6201 6202 if (trySkipId("sendmsg", AsmToken::LParen)) { 6203 OperandInfoTy Msg(ID_UNKNOWN_); 6204 OperandInfoTy Op(OP_NONE_); 6205 OperandInfoTy Stream(STREAM_ID_NONE_); 6206 if (parseSendMsgBody(Msg, Op, Stream) && 6207 validateSendMsg(Msg, Op, Stream)) { 6208 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6209 } else { 6210 return MatchOperand_ParseFail; 6211 } 6212 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6213 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6214 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6215 return MatchOperand_ParseFail; 6216 } 6217 } else { 6218 return MatchOperand_ParseFail; 6219 } 6220 6221 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6222 return MatchOperand_Success; 6223 } 6224 6225 bool AMDGPUOperand::isSendMsg() const { 6226 return isImmTy(ImmTySendMsg); 6227 } 6228 6229 //===----------------------------------------------------------------------===// 6230 // v_interp 6231 //===----------------------------------------------------------------------===// 6232 6233 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6234 StringRef Str; 6235 SMLoc S = getLoc(); 6236 6237 if (!parseId(Str)) 6238 return MatchOperand_NoMatch; 6239 6240 int Slot = StringSwitch<int>(Str) 6241 .Case("p10", 0) 6242 .Case("p20", 1) 6243 .Case("p0", 2) 6244 .Default(-1); 6245 6246 if (Slot == -1) { 6247 Error(S, "invalid interpolation slot"); 6248 return MatchOperand_ParseFail; 6249 } 6250 6251 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6252 AMDGPUOperand::ImmTyInterpSlot)); 6253 return MatchOperand_Success; 6254 } 6255 6256 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6257 StringRef Str; 6258 SMLoc S = getLoc(); 6259 6260 if (!parseId(Str)) 6261 return MatchOperand_NoMatch; 6262 6263 if (!Str.startswith("attr")) { 6264 Error(S, "invalid interpolation attribute"); 6265 return MatchOperand_ParseFail; 6266 } 6267 6268 StringRef Chan = Str.take_back(2); 6269 int AttrChan = StringSwitch<int>(Chan) 6270 .Case(".x", 0) 6271 .Case(".y", 1) 6272 .Case(".z", 2) 6273 .Case(".w", 3) 6274 .Default(-1); 6275 if (AttrChan == -1) { 6276 Error(S, "invalid or missing interpolation attribute channel"); 6277 return MatchOperand_ParseFail; 6278 } 6279 6280 Str = Str.drop_back(2).drop_front(4); 6281 6282 uint8_t Attr; 6283 if (Str.getAsInteger(10, Attr)) { 6284 Error(S, "invalid or missing interpolation attribute number"); 6285 return MatchOperand_ParseFail; 6286 } 6287 6288 if (Attr > 63) { 6289 Error(S, "out of bounds interpolation attribute number"); 6290 return MatchOperand_ParseFail; 6291 } 6292 6293 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6294 6295 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6296 AMDGPUOperand::ImmTyInterpAttr)); 6297 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6298 AMDGPUOperand::ImmTyAttrChan)); 6299 return MatchOperand_Success; 6300 } 6301 6302 //===----------------------------------------------------------------------===// 6303 // exp 6304 //===----------------------------------------------------------------------===// 6305 6306 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6307 using namespace llvm::AMDGPU::Exp; 6308 6309 StringRef Str; 6310 SMLoc S = getLoc(); 6311 6312 if (!parseId(Str)) 6313 return MatchOperand_NoMatch; 6314 6315 unsigned Id = getTgtId(Str); 6316 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6317 Error(S, (Id == ET_INVALID) ? 6318 "invalid exp target" : 6319 "exp target is not supported on this GPU"); 6320 return MatchOperand_ParseFail; 6321 } 6322 6323 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6324 AMDGPUOperand::ImmTyExpTgt)); 6325 return MatchOperand_Success; 6326 } 6327 6328 //===----------------------------------------------------------------------===// 6329 // parser helpers 6330 //===----------------------------------------------------------------------===// 6331 6332 bool 6333 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6334 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6335 } 6336 6337 bool 6338 AMDGPUAsmParser::isId(const StringRef Id) const { 6339 return isId(getToken(), Id); 6340 } 6341 6342 bool 6343 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6344 return getTokenKind() == Kind; 6345 } 6346 6347 bool 6348 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6349 if (isId(Id)) { 6350 lex(); 6351 return true; 6352 } 6353 return false; 6354 } 6355 6356 bool 6357 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6358 if (isToken(AsmToken::Identifier)) { 6359 StringRef Tok = getTokenStr(); 6360 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6361 lex(); 6362 return true; 6363 } 6364 } 6365 return false; 6366 } 6367 6368 bool 6369 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6370 if (isId(Id) && peekToken().is(Kind)) { 6371 lex(); 6372 lex(); 6373 return true; 6374 } 6375 return false; 6376 } 6377 6378 bool 6379 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6380 if (isToken(Kind)) { 6381 lex(); 6382 return true; 6383 } 6384 return false; 6385 } 6386 6387 bool 6388 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6389 const StringRef ErrMsg) { 6390 if (!trySkipToken(Kind)) { 6391 Error(getLoc(), ErrMsg); 6392 return false; 6393 } 6394 return true; 6395 } 6396 6397 bool 6398 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6399 SMLoc S = getLoc(); 6400 6401 const MCExpr *Expr; 6402 if (Parser.parseExpression(Expr)) 6403 return false; 6404 6405 if (Expr->evaluateAsAbsolute(Imm)) 6406 return true; 6407 6408 if (Expected.empty()) { 6409 Error(S, "expected absolute expression"); 6410 } else { 6411 Error(S, Twine("expected ", Expected) + 6412 Twine(" or an absolute expression")); 6413 } 6414 return false; 6415 } 6416 6417 bool 6418 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6419 SMLoc S = getLoc(); 6420 6421 const MCExpr *Expr; 6422 if (Parser.parseExpression(Expr)) 6423 return false; 6424 6425 int64_t IntVal; 6426 if (Expr->evaluateAsAbsolute(IntVal)) { 6427 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6428 } else { 6429 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6430 } 6431 return true; 6432 } 6433 6434 bool 6435 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6436 if (isToken(AsmToken::String)) { 6437 Val = getToken().getStringContents(); 6438 lex(); 6439 return true; 6440 } else { 6441 Error(getLoc(), ErrMsg); 6442 return false; 6443 } 6444 } 6445 6446 bool 6447 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6448 if (isToken(AsmToken::Identifier)) { 6449 Val = getTokenStr(); 6450 lex(); 6451 return true; 6452 } else { 6453 if (!ErrMsg.empty()) 6454 Error(getLoc(), ErrMsg); 6455 return false; 6456 } 6457 } 6458 6459 AsmToken 6460 AMDGPUAsmParser::getToken() const { 6461 return Parser.getTok(); 6462 } 6463 6464 AsmToken 6465 AMDGPUAsmParser::peekToken() { 6466 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6467 } 6468 6469 void 6470 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6471 auto TokCount = getLexer().peekTokens(Tokens); 6472 6473 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6474 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6475 } 6476 6477 AsmToken::TokenKind 6478 AMDGPUAsmParser::getTokenKind() const { 6479 return getLexer().getKind(); 6480 } 6481 6482 SMLoc 6483 AMDGPUAsmParser::getLoc() const { 6484 return getToken().getLoc(); 6485 } 6486 6487 StringRef 6488 AMDGPUAsmParser::getTokenStr() const { 6489 return getToken().getString(); 6490 } 6491 6492 void 6493 AMDGPUAsmParser::lex() { 6494 Parser.Lex(); 6495 } 6496 6497 SMLoc 6498 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6499 const OperandVector &Operands) const { 6500 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6501 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6502 if (Test(Op)) 6503 return Op.getStartLoc(); 6504 } 6505 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6506 } 6507 6508 SMLoc 6509 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6510 const OperandVector &Operands) const { 6511 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6512 return getOperandLoc(Test, Operands); 6513 } 6514 6515 SMLoc 6516 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6517 const OperandVector &Operands) const { 6518 auto Test = [=](const AMDGPUOperand& Op) { 6519 return Op.isRegKind() && Op.getReg() == Reg; 6520 }; 6521 return getOperandLoc(Test, Operands); 6522 } 6523 6524 SMLoc 6525 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6526 auto Test = [](const AMDGPUOperand& Op) { 6527 return Op.IsImmKindLiteral() || Op.isExpr(); 6528 }; 6529 return getOperandLoc(Test, Operands); 6530 } 6531 6532 SMLoc 6533 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6534 auto Test = [](const AMDGPUOperand& Op) { 6535 return Op.isImmKindConst(); 6536 }; 6537 return getOperandLoc(Test, Operands); 6538 } 6539 6540 //===----------------------------------------------------------------------===// 6541 // swizzle 6542 //===----------------------------------------------------------------------===// 6543 6544 LLVM_READNONE 6545 static unsigned 6546 encodeBitmaskPerm(const unsigned AndMask, 6547 const unsigned OrMask, 6548 const unsigned XorMask) { 6549 using namespace llvm::AMDGPU::Swizzle; 6550 6551 return BITMASK_PERM_ENC | 6552 (AndMask << BITMASK_AND_SHIFT) | 6553 (OrMask << BITMASK_OR_SHIFT) | 6554 (XorMask << BITMASK_XOR_SHIFT); 6555 } 6556 6557 bool 6558 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6559 const unsigned MinVal, 6560 const unsigned MaxVal, 6561 const StringRef ErrMsg, 6562 SMLoc &Loc) { 6563 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6564 return false; 6565 } 6566 Loc = getLoc(); 6567 if (!parseExpr(Op)) { 6568 return false; 6569 } 6570 if (Op < MinVal || Op > MaxVal) { 6571 Error(Loc, ErrMsg); 6572 return false; 6573 } 6574 6575 return true; 6576 } 6577 6578 bool 6579 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6580 const unsigned MinVal, 6581 const unsigned MaxVal, 6582 const StringRef ErrMsg) { 6583 SMLoc Loc; 6584 for (unsigned i = 0; i < OpNum; ++i) { 6585 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6586 return false; 6587 } 6588 6589 return true; 6590 } 6591 6592 bool 6593 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6594 using namespace llvm::AMDGPU::Swizzle; 6595 6596 int64_t Lane[LANE_NUM]; 6597 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6598 "expected a 2-bit lane id")) { 6599 Imm = QUAD_PERM_ENC; 6600 for (unsigned I = 0; I < LANE_NUM; ++I) { 6601 Imm |= Lane[I] << (LANE_SHIFT * I); 6602 } 6603 return true; 6604 } 6605 return false; 6606 } 6607 6608 bool 6609 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6610 using namespace llvm::AMDGPU::Swizzle; 6611 6612 SMLoc Loc; 6613 int64_t GroupSize; 6614 int64_t LaneIdx; 6615 6616 if (!parseSwizzleOperand(GroupSize, 6617 2, 32, 6618 "group size must be in the interval [2,32]", 6619 Loc)) { 6620 return false; 6621 } 6622 if (!isPowerOf2_64(GroupSize)) { 6623 Error(Loc, "group size must be a power of two"); 6624 return false; 6625 } 6626 if (parseSwizzleOperand(LaneIdx, 6627 0, GroupSize - 1, 6628 "lane id must be in the interval [0,group size - 1]", 6629 Loc)) { 6630 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6631 return true; 6632 } 6633 return false; 6634 } 6635 6636 bool 6637 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6638 using namespace llvm::AMDGPU::Swizzle; 6639 6640 SMLoc Loc; 6641 int64_t GroupSize; 6642 6643 if (!parseSwizzleOperand(GroupSize, 6644 2, 32, 6645 "group size must be in the interval [2,32]", 6646 Loc)) { 6647 return false; 6648 } 6649 if (!isPowerOf2_64(GroupSize)) { 6650 Error(Loc, "group size must be a power of two"); 6651 return false; 6652 } 6653 6654 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6655 return true; 6656 } 6657 6658 bool 6659 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6660 using namespace llvm::AMDGPU::Swizzle; 6661 6662 SMLoc Loc; 6663 int64_t GroupSize; 6664 6665 if (!parseSwizzleOperand(GroupSize, 6666 1, 16, 6667 "group size must be in the interval [1,16]", 6668 Loc)) { 6669 return false; 6670 } 6671 if (!isPowerOf2_64(GroupSize)) { 6672 Error(Loc, "group size must be a power of two"); 6673 return false; 6674 } 6675 6676 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6677 return true; 6678 } 6679 6680 bool 6681 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6682 using namespace llvm::AMDGPU::Swizzle; 6683 6684 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6685 return false; 6686 } 6687 6688 StringRef Ctl; 6689 SMLoc StrLoc = getLoc(); 6690 if (!parseString(Ctl)) { 6691 return false; 6692 } 6693 if (Ctl.size() != BITMASK_WIDTH) { 6694 Error(StrLoc, "expected a 5-character mask"); 6695 return false; 6696 } 6697 6698 unsigned AndMask = 0; 6699 unsigned OrMask = 0; 6700 unsigned XorMask = 0; 6701 6702 for (size_t i = 0; i < Ctl.size(); ++i) { 6703 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6704 switch(Ctl[i]) { 6705 default: 6706 Error(StrLoc, "invalid mask"); 6707 return false; 6708 case '0': 6709 break; 6710 case '1': 6711 OrMask |= Mask; 6712 break; 6713 case 'p': 6714 AndMask |= Mask; 6715 break; 6716 case 'i': 6717 AndMask |= Mask; 6718 XorMask |= Mask; 6719 break; 6720 } 6721 } 6722 6723 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6724 return true; 6725 } 6726 6727 bool 6728 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6729 6730 SMLoc OffsetLoc = getLoc(); 6731 6732 if (!parseExpr(Imm, "a swizzle macro")) { 6733 return false; 6734 } 6735 if (!isUInt<16>(Imm)) { 6736 Error(OffsetLoc, "expected a 16-bit offset"); 6737 return false; 6738 } 6739 return true; 6740 } 6741 6742 bool 6743 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6744 using namespace llvm::AMDGPU::Swizzle; 6745 6746 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6747 6748 SMLoc ModeLoc = getLoc(); 6749 bool Ok = false; 6750 6751 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6752 Ok = parseSwizzleQuadPerm(Imm); 6753 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6754 Ok = parseSwizzleBitmaskPerm(Imm); 6755 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6756 Ok = parseSwizzleBroadcast(Imm); 6757 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6758 Ok = parseSwizzleSwap(Imm); 6759 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6760 Ok = parseSwizzleReverse(Imm); 6761 } else { 6762 Error(ModeLoc, "expected a swizzle mode"); 6763 } 6764 6765 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6766 } 6767 6768 return false; 6769 } 6770 6771 OperandMatchResultTy 6772 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6773 SMLoc S = getLoc(); 6774 int64_t Imm = 0; 6775 6776 if (trySkipId("offset")) { 6777 6778 bool Ok = false; 6779 if (skipToken(AsmToken::Colon, "expected a colon")) { 6780 if (trySkipId("swizzle")) { 6781 Ok = parseSwizzleMacro(Imm); 6782 } else { 6783 Ok = parseSwizzleOffset(Imm); 6784 } 6785 } 6786 6787 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6788 6789 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6790 } else { 6791 // Swizzle "offset" operand is optional. 6792 // If it is omitted, try parsing other optional operands. 6793 return parseOptionalOpr(Operands); 6794 } 6795 } 6796 6797 bool 6798 AMDGPUOperand::isSwizzle() const { 6799 return isImmTy(ImmTySwizzle); 6800 } 6801 6802 //===----------------------------------------------------------------------===// 6803 // VGPR Index Mode 6804 //===----------------------------------------------------------------------===// 6805 6806 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6807 6808 using namespace llvm::AMDGPU::VGPRIndexMode; 6809 6810 if (trySkipToken(AsmToken::RParen)) { 6811 return OFF; 6812 } 6813 6814 int64_t Imm = 0; 6815 6816 while (true) { 6817 unsigned Mode = 0; 6818 SMLoc S = getLoc(); 6819 6820 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6821 if (trySkipId(IdSymbolic[ModeId])) { 6822 Mode = 1 << ModeId; 6823 break; 6824 } 6825 } 6826 6827 if (Mode == 0) { 6828 Error(S, (Imm == 0)? 6829 "expected a VGPR index mode or a closing parenthesis" : 6830 "expected a VGPR index mode"); 6831 return UNDEF; 6832 } 6833 6834 if (Imm & Mode) { 6835 Error(S, "duplicate VGPR index mode"); 6836 return UNDEF; 6837 } 6838 Imm |= Mode; 6839 6840 if (trySkipToken(AsmToken::RParen)) 6841 break; 6842 if (!skipToken(AsmToken::Comma, 6843 "expected a comma or a closing parenthesis")) 6844 return UNDEF; 6845 } 6846 6847 return Imm; 6848 } 6849 6850 OperandMatchResultTy 6851 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6852 6853 using namespace llvm::AMDGPU::VGPRIndexMode; 6854 6855 int64_t Imm = 0; 6856 SMLoc S = getLoc(); 6857 6858 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6859 Imm = parseGPRIdxMacro(); 6860 if (Imm == UNDEF) 6861 return MatchOperand_ParseFail; 6862 } else { 6863 if (getParser().parseAbsoluteExpression(Imm)) 6864 return MatchOperand_ParseFail; 6865 if (Imm < 0 || !isUInt<4>(Imm)) { 6866 Error(S, "invalid immediate: only 4-bit values are legal"); 6867 return MatchOperand_ParseFail; 6868 } 6869 } 6870 6871 Operands.push_back( 6872 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6873 return MatchOperand_Success; 6874 } 6875 6876 bool AMDGPUOperand::isGPRIdxMode() const { 6877 return isImmTy(ImmTyGprIdxMode); 6878 } 6879 6880 //===----------------------------------------------------------------------===// 6881 // sopp branch targets 6882 //===----------------------------------------------------------------------===// 6883 6884 OperandMatchResultTy 6885 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6886 6887 // Make sure we are not parsing something 6888 // that looks like a label or an expression but is not. 6889 // This will improve error messages. 6890 if (isRegister() || isModifier()) 6891 return MatchOperand_NoMatch; 6892 6893 if (!parseExpr(Operands)) 6894 return MatchOperand_ParseFail; 6895 6896 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6897 assert(Opr.isImm() || Opr.isExpr()); 6898 SMLoc Loc = Opr.getStartLoc(); 6899 6900 // Currently we do not support arbitrary expressions as branch targets. 6901 // Only labels and absolute expressions are accepted. 6902 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6903 Error(Loc, "expected an absolute expression or a label"); 6904 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6905 Error(Loc, "expected a 16-bit signed jump offset"); 6906 } 6907 6908 return MatchOperand_Success; 6909 } 6910 6911 //===----------------------------------------------------------------------===// 6912 // Boolean holding registers 6913 //===----------------------------------------------------------------------===// 6914 6915 OperandMatchResultTy 6916 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6917 return parseReg(Operands); 6918 } 6919 6920 //===----------------------------------------------------------------------===// 6921 // mubuf 6922 //===----------------------------------------------------------------------===// 6923 6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6925 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6926 } 6927 6928 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6929 const OperandVector &Operands, 6930 bool IsAtomic, 6931 bool IsLds) { 6932 bool IsLdsOpcode = IsLds; 6933 bool HasLdsModifier = false; 6934 OptionalImmIndexMap OptionalIdx; 6935 unsigned FirstOperandIdx = 1; 6936 bool IsAtomicReturn = false; 6937 6938 if (IsAtomic) { 6939 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6940 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6941 if (!Op.isCPol()) 6942 continue; 6943 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6944 break; 6945 } 6946 6947 if (!IsAtomicReturn) { 6948 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6949 if (NewOpc != -1) 6950 Inst.setOpcode(NewOpc); 6951 } 6952 6953 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6954 SIInstrFlags::IsAtomicRet; 6955 } 6956 6957 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6958 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6959 6960 // Add the register arguments 6961 if (Op.isReg()) { 6962 Op.addRegOperands(Inst, 1); 6963 // Insert a tied src for atomic return dst. 6964 // This cannot be postponed as subsequent calls to 6965 // addImmOperands rely on correct number of MC operands. 6966 if (IsAtomicReturn && i == FirstOperandIdx) 6967 Op.addRegOperands(Inst, 1); 6968 continue; 6969 } 6970 6971 // Handle the case where soffset is an immediate 6972 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6973 Op.addImmOperands(Inst, 1); 6974 continue; 6975 } 6976 6977 HasLdsModifier |= Op.isLDS(); 6978 6979 // Handle tokens like 'offen' which are sometimes hard-coded into the 6980 // asm string. There are no MCInst operands for these. 6981 if (Op.isToken()) { 6982 continue; 6983 } 6984 assert(Op.isImm()); 6985 6986 // Handle optional arguments 6987 OptionalIdx[Op.getImmTy()] = i; 6988 } 6989 6990 // This is a workaround for an llvm quirk which may result in an 6991 // incorrect instruction selection. Lds and non-lds versions of 6992 // MUBUF instructions are identical except that lds versions 6993 // have mandatory 'lds' modifier. However this modifier follows 6994 // optional modifiers and llvm asm matcher regards this 'lds' 6995 // modifier as an optional one. As a result, an lds version 6996 // of opcode may be selected even if it has no 'lds' modifier. 6997 if (IsLdsOpcode && !HasLdsModifier) { 6998 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6999 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7000 Inst.setOpcode(NoLdsOpcode); 7001 IsLdsOpcode = false; 7002 } 7003 } 7004 7005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7007 7008 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7009 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7010 } 7011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7012 } 7013 7014 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7015 OptionalImmIndexMap OptionalIdx; 7016 7017 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7018 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7019 7020 // Add the register arguments 7021 if (Op.isReg()) { 7022 Op.addRegOperands(Inst, 1); 7023 continue; 7024 } 7025 7026 // Handle the case where soffset is an immediate 7027 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7028 Op.addImmOperands(Inst, 1); 7029 continue; 7030 } 7031 7032 // Handle tokens like 'offen' which are sometimes hard-coded into the 7033 // asm string. There are no MCInst operands for these. 7034 if (Op.isToken()) { 7035 continue; 7036 } 7037 assert(Op.isImm()); 7038 7039 // Handle optional arguments 7040 OptionalIdx[Op.getImmTy()] = i; 7041 } 7042 7043 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7044 AMDGPUOperand::ImmTyOffset); 7045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7049 } 7050 7051 //===----------------------------------------------------------------------===// 7052 // mimg 7053 //===----------------------------------------------------------------------===// 7054 7055 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7056 bool IsAtomic) { 7057 unsigned I = 1; 7058 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7059 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7060 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7061 } 7062 7063 if (IsAtomic) { 7064 // Add src, same as dst 7065 assert(Desc.getNumDefs() == 1); 7066 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7067 } 7068 7069 OptionalImmIndexMap OptionalIdx; 7070 7071 for (unsigned E = Operands.size(); I != E; ++I) { 7072 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7073 7074 // Add the register arguments 7075 if (Op.isReg()) { 7076 Op.addRegOperands(Inst, 1); 7077 } else if (Op.isImmModifier()) { 7078 OptionalIdx[Op.getImmTy()] = I; 7079 } else if (!Op.isToken()) { 7080 llvm_unreachable("unexpected operand type"); 7081 } 7082 } 7083 7084 bool IsGFX10Plus = isGFX10Plus(); 7085 7086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7087 if (IsGFX10Plus) 7088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7090 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7091 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7092 if (IsGFX10Plus) 7093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7094 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7097 if (!IsGFX10Plus) 7098 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7100 } 7101 7102 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7103 cvtMIMG(Inst, Operands, true); 7104 } 7105 7106 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7107 OptionalImmIndexMap OptionalIdx; 7108 bool IsAtomicReturn = false; 7109 7110 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7111 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7112 if (!Op.isCPol()) 7113 continue; 7114 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7115 break; 7116 } 7117 7118 if (!IsAtomicReturn) { 7119 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7120 if (NewOpc != -1) 7121 Inst.setOpcode(NewOpc); 7122 } 7123 7124 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7125 SIInstrFlags::IsAtomicRet; 7126 7127 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7128 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7129 7130 // Add the register arguments 7131 if (Op.isReg()) { 7132 Op.addRegOperands(Inst, 1); 7133 if (IsAtomicReturn && i == 1) 7134 Op.addRegOperands(Inst, 1); 7135 continue; 7136 } 7137 7138 // Handle the case where soffset is an immediate 7139 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7140 Op.addImmOperands(Inst, 1); 7141 continue; 7142 } 7143 7144 // Handle tokens like 'offen' which are sometimes hard-coded into the 7145 // asm string. There are no MCInst operands for these. 7146 if (Op.isToken()) { 7147 continue; 7148 } 7149 assert(Op.isImm()); 7150 7151 // Handle optional arguments 7152 OptionalIdx[Op.getImmTy()] = i; 7153 } 7154 7155 if ((int)Inst.getNumOperands() <= 7156 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7159 } 7160 7161 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7162 const OperandVector &Operands) { 7163 for (unsigned I = 1; I < Operands.size(); ++I) { 7164 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7165 if (Operand.isReg()) 7166 Operand.addRegOperands(Inst, 1); 7167 } 7168 7169 Inst.addOperand(MCOperand::createImm(1)); // a16 7170 } 7171 7172 //===----------------------------------------------------------------------===// 7173 // smrd 7174 //===----------------------------------------------------------------------===// 7175 7176 bool AMDGPUOperand::isSMRDOffset8() const { 7177 return isImm() && isUInt<8>(getImm()); 7178 } 7179 7180 bool AMDGPUOperand::isSMEMOffset() const { 7181 return isImm(); // Offset range is checked later by validator. 7182 } 7183 7184 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7185 // 32-bit literals are only supported on CI and we only want to use them 7186 // when the offset is > 8-bits. 7187 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7188 } 7189 7190 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7191 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7192 } 7193 7194 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7195 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7196 } 7197 7198 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7199 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7200 } 7201 7202 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7203 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7204 } 7205 7206 //===----------------------------------------------------------------------===// 7207 // vop3 7208 //===----------------------------------------------------------------------===// 7209 7210 static bool ConvertOmodMul(int64_t &Mul) { 7211 if (Mul != 1 && Mul != 2 && Mul != 4) 7212 return false; 7213 7214 Mul >>= 1; 7215 return true; 7216 } 7217 7218 static bool ConvertOmodDiv(int64_t &Div) { 7219 if (Div == 1) { 7220 Div = 0; 7221 return true; 7222 } 7223 7224 if (Div == 2) { 7225 Div = 3; 7226 return true; 7227 } 7228 7229 return false; 7230 } 7231 7232 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7233 // This is intentional and ensures compatibility with sp3. 7234 // See bug 35397 for details. 7235 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7236 if (BoundCtrl == 0 || BoundCtrl == 1) { 7237 BoundCtrl = 1; 7238 return true; 7239 } 7240 return false; 7241 } 7242 7243 // Note: the order in this table matches the order of operands in AsmString. 7244 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7245 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7246 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7247 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7248 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7249 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7250 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7251 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7252 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7253 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7254 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7255 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7256 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7257 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7258 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7259 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7260 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7261 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7262 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7263 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7264 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7265 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7266 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7267 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7268 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7269 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7270 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7271 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7272 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7273 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7274 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7275 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7276 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7277 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7278 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7279 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7280 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7281 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7282 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7283 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7284 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7285 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7286 }; 7287 7288 void AMDGPUAsmParser::onBeginOfFile() { 7289 if (!getParser().getStreamer().getTargetStreamer() || 7290 getSTI().getTargetTriple().getArch() == Triple::r600) 7291 return; 7292 7293 if (!getTargetStreamer().getTargetID()) 7294 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7295 7296 if (isHsaAbiVersion3Or4(&getSTI())) 7297 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7298 } 7299 7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7301 7302 OperandMatchResultTy res = parseOptionalOpr(Operands); 7303 7304 // This is a hack to enable hardcoded mandatory operands which follow 7305 // optional operands. 7306 // 7307 // Current design assumes that all operands after the first optional operand 7308 // are also optional. However implementation of some instructions violates 7309 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7310 // 7311 // To alleviate this problem, we have to (implicitly) parse extra operands 7312 // to make sure autogenerated parser of custom operands never hit hardcoded 7313 // mandatory operands. 7314 7315 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7316 if (res != MatchOperand_Success || 7317 isToken(AsmToken::EndOfStatement)) 7318 break; 7319 7320 trySkipToken(AsmToken::Comma); 7321 res = parseOptionalOpr(Operands); 7322 } 7323 7324 return res; 7325 } 7326 7327 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7328 OperandMatchResultTy res; 7329 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7330 // try to parse any optional operand here 7331 if (Op.IsBit) { 7332 res = parseNamedBit(Op.Name, Operands, Op.Type); 7333 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7334 res = parseOModOperand(Operands); 7335 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7336 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7337 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7338 res = parseSDWASel(Operands, Op.Name, Op.Type); 7339 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7340 res = parseSDWADstUnused(Operands); 7341 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7342 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7343 Op.Type == AMDGPUOperand::ImmTyNegLo || 7344 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7345 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7346 Op.ConvertResult); 7347 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7348 res = parseDim(Operands); 7349 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7350 res = parseCPol(Operands); 7351 } else { 7352 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7353 } 7354 if (res != MatchOperand_NoMatch) { 7355 return res; 7356 } 7357 } 7358 return MatchOperand_NoMatch; 7359 } 7360 7361 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7362 StringRef Name = getTokenStr(); 7363 if (Name == "mul") { 7364 return parseIntWithPrefix("mul", Operands, 7365 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7366 } 7367 7368 if (Name == "div") { 7369 return parseIntWithPrefix("div", Operands, 7370 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7371 } 7372 7373 return MatchOperand_NoMatch; 7374 } 7375 7376 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7377 cvtVOP3P(Inst, Operands); 7378 7379 int Opc = Inst.getOpcode(); 7380 7381 int SrcNum; 7382 const int Ops[] = { AMDGPU::OpName::src0, 7383 AMDGPU::OpName::src1, 7384 AMDGPU::OpName::src2 }; 7385 for (SrcNum = 0; 7386 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7387 ++SrcNum); 7388 assert(SrcNum > 0); 7389 7390 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7391 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7392 7393 if ((OpSel & (1 << SrcNum)) != 0) { 7394 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7395 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7396 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7397 } 7398 } 7399 7400 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7401 // 1. This operand is input modifiers 7402 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7403 // 2. This is not last operand 7404 && Desc.NumOperands > (OpNum + 1) 7405 // 3. Next operand is register class 7406 && Desc.OpInfo[OpNum + 1].RegClass != -1 7407 // 4. Next register is not tied to any other operand 7408 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7409 } 7410 7411 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7412 { 7413 OptionalImmIndexMap OptionalIdx; 7414 unsigned Opc = Inst.getOpcode(); 7415 7416 unsigned I = 1; 7417 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7418 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7419 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7420 } 7421 7422 for (unsigned E = Operands.size(); I != E; ++I) { 7423 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7424 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7425 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7426 } else if (Op.isInterpSlot() || 7427 Op.isInterpAttr() || 7428 Op.isAttrChan()) { 7429 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7430 } else if (Op.isImmModifier()) { 7431 OptionalIdx[Op.getImmTy()] = I; 7432 } else { 7433 llvm_unreachable("unhandled operand type"); 7434 } 7435 } 7436 7437 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7438 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7439 } 7440 7441 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7442 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7443 } 7444 7445 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7447 } 7448 } 7449 7450 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7451 OptionalImmIndexMap &OptionalIdx) { 7452 unsigned Opc = Inst.getOpcode(); 7453 7454 unsigned I = 1; 7455 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7456 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7457 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7458 } 7459 7460 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7461 // This instruction has src modifiers 7462 for (unsigned E = Operands.size(); I != E; ++I) { 7463 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7464 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7465 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7466 } else if (Op.isImmModifier()) { 7467 OptionalIdx[Op.getImmTy()] = I; 7468 } else if (Op.isRegOrImm()) { 7469 Op.addRegOrImmOperands(Inst, 1); 7470 } else { 7471 llvm_unreachable("unhandled operand type"); 7472 } 7473 } 7474 } else { 7475 // No src modifiers 7476 for (unsigned E = Operands.size(); I != E; ++I) { 7477 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7478 if (Op.isMod()) { 7479 OptionalIdx[Op.getImmTy()] = I; 7480 } else { 7481 Op.addRegOrImmOperands(Inst, 1); 7482 } 7483 } 7484 } 7485 7486 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7487 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7488 } 7489 7490 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7492 } 7493 7494 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7495 // it has src2 register operand that is tied to dst operand 7496 // we don't allow modifiers for this operand in assembler so src2_modifiers 7497 // should be 0. 7498 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7499 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7500 Opc == AMDGPU::V_MAC_F32_e64_vi || 7501 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7502 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7503 Opc == AMDGPU::V_MAC_F16_e64_vi || 7504 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7505 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7506 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7507 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7508 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7509 auto it = Inst.begin(); 7510 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7511 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7512 ++it; 7513 // Copy the operand to ensure it's not invalidated when Inst grows. 7514 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7515 } 7516 } 7517 7518 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7519 OptionalImmIndexMap OptionalIdx; 7520 cvtVOP3(Inst, Operands, OptionalIdx); 7521 } 7522 7523 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7524 OptionalImmIndexMap &OptIdx) { 7525 const int Opc = Inst.getOpcode(); 7526 const MCInstrDesc &Desc = MII.get(Opc); 7527 7528 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7529 7530 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7531 assert(!IsPacked); 7532 Inst.addOperand(Inst.getOperand(0)); 7533 } 7534 7535 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7536 // instruction, and then figure out where to actually put the modifiers 7537 7538 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7539 if (OpSelIdx != -1) { 7540 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7541 } 7542 7543 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7544 if (OpSelHiIdx != -1) { 7545 int DefaultVal = IsPacked ? -1 : 0; 7546 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7547 DefaultVal); 7548 } 7549 7550 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7551 if (NegLoIdx != -1) { 7552 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7553 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7554 } 7555 7556 const int Ops[] = { AMDGPU::OpName::src0, 7557 AMDGPU::OpName::src1, 7558 AMDGPU::OpName::src2 }; 7559 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7560 AMDGPU::OpName::src1_modifiers, 7561 AMDGPU::OpName::src2_modifiers }; 7562 7563 unsigned OpSel = 0; 7564 unsigned OpSelHi = 0; 7565 unsigned NegLo = 0; 7566 unsigned NegHi = 0; 7567 7568 if (OpSelIdx != -1) 7569 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7570 7571 if (OpSelHiIdx != -1) 7572 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7573 7574 if (NegLoIdx != -1) { 7575 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7576 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7577 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7578 } 7579 7580 for (int J = 0; J < 3; ++J) { 7581 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7582 if (OpIdx == -1) 7583 break; 7584 7585 uint32_t ModVal = 0; 7586 7587 if ((OpSel & (1 << J)) != 0) 7588 ModVal |= SISrcMods::OP_SEL_0; 7589 7590 if ((OpSelHi & (1 << J)) != 0) 7591 ModVal |= SISrcMods::OP_SEL_1; 7592 7593 if ((NegLo & (1 << J)) != 0) 7594 ModVal |= SISrcMods::NEG; 7595 7596 if ((NegHi & (1 << J)) != 0) 7597 ModVal |= SISrcMods::NEG_HI; 7598 7599 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7600 7601 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7602 } 7603 } 7604 7605 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7606 OptionalImmIndexMap OptIdx; 7607 cvtVOP3(Inst, Operands, OptIdx); 7608 cvtVOP3P(Inst, Operands, OptIdx); 7609 } 7610 7611 //===----------------------------------------------------------------------===// 7612 // dpp 7613 //===----------------------------------------------------------------------===// 7614 7615 bool AMDGPUOperand::isDPP8() const { 7616 return isImmTy(ImmTyDPP8); 7617 } 7618 7619 bool AMDGPUOperand::isDPPCtrl() const { 7620 using namespace AMDGPU::DPP; 7621 7622 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7623 if (result) { 7624 int64_t Imm = getImm(); 7625 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7626 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7627 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7628 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7629 (Imm == DppCtrl::WAVE_SHL1) || 7630 (Imm == DppCtrl::WAVE_ROL1) || 7631 (Imm == DppCtrl::WAVE_SHR1) || 7632 (Imm == DppCtrl::WAVE_ROR1) || 7633 (Imm == DppCtrl::ROW_MIRROR) || 7634 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7635 (Imm == DppCtrl::BCAST15) || 7636 (Imm == DppCtrl::BCAST31) || 7637 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7638 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7639 } 7640 return false; 7641 } 7642 7643 //===----------------------------------------------------------------------===// 7644 // mAI 7645 //===----------------------------------------------------------------------===// 7646 7647 bool AMDGPUOperand::isBLGP() const { 7648 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7649 } 7650 7651 bool AMDGPUOperand::isCBSZ() const { 7652 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7653 } 7654 7655 bool AMDGPUOperand::isABID() const { 7656 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7657 } 7658 7659 bool AMDGPUOperand::isS16Imm() const { 7660 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7661 } 7662 7663 bool AMDGPUOperand::isU16Imm() const { 7664 return isImm() && isUInt<16>(getImm()); 7665 } 7666 7667 //===----------------------------------------------------------------------===// 7668 // dim 7669 //===----------------------------------------------------------------------===// 7670 7671 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7672 // We want to allow "dim:1D" etc., 7673 // but the initial 1 is tokenized as an integer. 7674 std::string Token; 7675 if (isToken(AsmToken::Integer)) { 7676 SMLoc Loc = getToken().getEndLoc(); 7677 Token = std::string(getTokenStr()); 7678 lex(); 7679 if (getLoc() != Loc) 7680 return false; 7681 } 7682 7683 StringRef Suffix; 7684 if (!parseId(Suffix)) 7685 return false; 7686 Token += Suffix; 7687 7688 StringRef DimId = Token; 7689 if (DimId.startswith("SQ_RSRC_IMG_")) 7690 DimId = DimId.drop_front(12); 7691 7692 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7693 if (!DimInfo) 7694 return false; 7695 7696 Encoding = DimInfo->Encoding; 7697 return true; 7698 } 7699 7700 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7701 if (!isGFX10Plus()) 7702 return MatchOperand_NoMatch; 7703 7704 SMLoc S = getLoc(); 7705 7706 if (!trySkipId("dim", AsmToken::Colon)) 7707 return MatchOperand_NoMatch; 7708 7709 unsigned Encoding; 7710 SMLoc Loc = getLoc(); 7711 if (!parseDimId(Encoding)) { 7712 Error(Loc, "invalid dim value"); 7713 return MatchOperand_ParseFail; 7714 } 7715 7716 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7717 AMDGPUOperand::ImmTyDim)); 7718 return MatchOperand_Success; 7719 } 7720 7721 //===----------------------------------------------------------------------===// 7722 // dpp 7723 //===----------------------------------------------------------------------===// 7724 7725 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7726 SMLoc S = getLoc(); 7727 7728 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7729 return MatchOperand_NoMatch; 7730 7731 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7732 7733 int64_t Sels[8]; 7734 7735 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7736 return MatchOperand_ParseFail; 7737 7738 for (size_t i = 0; i < 8; ++i) { 7739 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7740 return MatchOperand_ParseFail; 7741 7742 SMLoc Loc = getLoc(); 7743 if (getParser().parseAbsoluteExpression(Sels[i])) 7744 return MatchOperand_ParseFail; 7745 if (0 > Sels[i] || 7 < Sels[i]) { 7746 Error(Loc, "expected a 3-bit value"); 7747 return MatchOperand_ParseFail; 7748 } 7749 } 7750 7751 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7752 return MatchOperand_ParseFail; 7753 7754 unsigned DPP8 = 0; 7755 for (size_t i = 0; i < 8; ++i) 7756 DPP8 |= (Sels[i] << (i * 3)); 7757 7758 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7759 return MatchOperand_Success; 7760 } 7761 7762 bool 7763 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7764 const OperandVector &Operands) { 7765 if (Ctrl == "row_newbcast") 7766 return isGFX90A(); 7767 7768 if (Ctrl == "row_share" || 7769 Ctrl == "row_xmask") 7770 return isGFX10Plus(); 7771 7772 if (Ctrl == "wave_shl" || 7773 Ctrl == "wave_shr" || 7774 Ctrl == "wave_rol" || 7775 Ctrl == "wave_ror" || 7776 Ctrl == "row_bcast") 7777 return isVI() || isGFX9(); 7778 7779 return Ctrl == "row_mirror" || 7780 Ctrl == "row_half_mirror" || 7781 Ctrl == "quad_perm" || 7782 Ctrl == "row_shl" || 7783 Ctrl == "row_shr" || 7784 Ctrl == "row_ror"; 7785 } 7786 7787 int64_t 7788 AMDGPUAsmParser::parseDPPCtrlPerm() { 7789 // quad_perm:[%d,%d,%d,%d] 7790 7791 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7792 return -1; 7793 7794 int64_t Val = 0; 7795 for (int i = 0; i < 4; ++i) { 7796 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7797 return -1; 7798 7799 int64_t Temp; 7800 SMLoc Loc = getLoc(); 7801 if (getParser().parseAbsoluteExpression(Temp)) 7802 return -1; 7803 if (Temp < 0 || Temp > 3) { 7804 Error(Loc, "expected a 2-bit value"); 7805 return -1; 7806 } 7807 7808 Val += (Temp << i * 2); 7809 } 7810 7811 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7812 return -1; 7813 7814 return Val; 7815 } 7816 7817 int64_t 7818 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7819 using namespace AMDGPU::DPP; 7820 7821 // sel:%d 7822 7823 int64_t Val; 7824 SMLoc Loc = getLoc(); 7825 7826 if (getParser().parseAbsoluteExpression(Val)) 7827 return -1; 7828 7829 struct DppCtrlCheck { 7830 int64_t Ctrl; 7831 int Lo; 7832 int Hi; 7833 }; 7834 7835 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7836 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7837 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7838 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7839 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7840 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7841 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7842 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7843 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7844 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7845 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7846 .Default({-1, 0, 0}); 7847 7848 bool Valid; 7849 if (Check.Ctrl == -1) { 7850 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7851 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7852 } else { 7853 Valid = Check.Lo <= Val && Val <= Check.Hi; 7854 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7855 } 7856 7857 if (!Valid) { 7858 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7859 return -1; 7860 } 7861 7862 return Val; 7863 } 7864 7865 OperandMatchResultTy 7866 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7867 using namespace AMDGPU::DPP; 7868 7869 if (!isToken(AsmToken::Identifier) || 7870 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7871 return MatchOperand_NoMatch; 7872 7873 SMLoc S = getLoc(); 7874 int64_t Val = -1; 7875 StringRef Ctrl; 7876 7877 parseId(Ctrl); 7878 7879 if (Ctrl == "row_mirror") { 7880 Val = DppCtrl::ROW_MIRROR; 7881 } else if (Ctrl == "row_half_mirror") { 7882 Val = DppCtrl::ROW_HALF_MIRROR; 7883 } else { 7884 if (skipToken(AsmToken::Colon, "expected a colon")) { 7885 if (Ctrl == "quad_perm") { 7886 Val = parseDPPCtrlPerm(); 7887 } else { 7888 Val = parseDPPCtrlSel(Ctrl); 7889 } 7890 } 7891 } 7892 7893 if (Val == -1) 7894 return MatchOperand_ParseFail; 7895 7896 Operands.push_back( 7897 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7898 return MatchOperand_Success; 7899 } 7900 7901 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7902 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7903 } 7904 7905 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7906 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7907 } 7908 7909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7910 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7911 } 7912 7913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7914 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7915 } 7916 7917 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7918 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7919 } 7920 7921 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7922 OptionalImmIndexMap OptionalIdx; 7923 7924 unsigned I = 1; 7925 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7926 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7927 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7928 } 7929 7930 int Fi = 0; 7931 for (unsigned E = Operands.size(); I != E; ++I) { 7932 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7933 MCOI::TIED_TO); 7934 if (TiedTo != -1) { 7935 assert((unsigned)TiedTo < Inst.getNumOperands()); 7936 // handle tied old or src2 for MAC instructions 7937 Inst.addOperand(Inst.getOperand(TiedTo)); 7938 } 7939 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7940 // Add the register arguments 7941 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7942 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7943 // Skip it. 7944 continue; 7945 } 7946 7947 if (IsDPP8) { 7948 if (Op.isDPP8()) { 7949 Op.addImmOperands(Inst, 1); 7950 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7951 Op.addRegWithFPInputModsOperands(Inst, 2); 7952 } else if (Op.isFI()) { 7953 Fi = Op.getImm(); 7954 } else if (Op.isReg()) { 7955 Op.addRegOperands(Inst, 1); 7956 } else { 7957 llvm_unreachable("Invalid operand type"); 7958 } 7959 } else { 7960 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7961 Op.addRegWithFPInputModsOperands(Inst, 2); 7962 } else if (Op.isDPPCtrl()) { 7963 Op.addImmOperands(Inst, 1); 7964 } else if (Op.isImm()) { 7965 // Handle optional arguments 7966 OptionalIdx[Op.getImmTy()] = I; 7967 } else { 7968 llvm_unreachable("Invalid operand type"); 7969 } 7970 } 7971 } 7972 7973 if (IsDPP8) { 7974 using namespace llvm::AMDGPU::DPP; 7975 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7976 } else { 7977 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7978 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7980 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7982 } 7983 } 7984 } 7985 7986 //===----------------------------------------------------------------------===// 7987 // sdwa 7988 //===----------------------------------------------------------------------===// 7989 7990 OperandMatchResultTy 7991 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7992 AMDGPUOperand::ImmTy Type) { 7993 using namespace llvm::AMDGPU::SDWA; 7994 7995 SMLoc S = getLoc(); 7996 StringRef Value; 7997 OperandMatchResultTy res; 7998 7999 SMLoc StringLoc; 8000 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8001 if (res != MatchOperand_Success) { 8002 return res; 8003 } 8004 8005 int64_t Int; 8006 Int = StringSwitch<int64_t>(Value) 8007 .Case("BYTE_0", SdwaSel::BYTE_0) 8008 .Case("BYTE_1", SdwaSel::BYTE_1) 8009 .Case("BYTE_2", SdwaSel::BYTE_2) 8010 .Case("BYTE_3", SdwaSel::BYTE_3) 8011 .Case("WORD_0", SdwaSel::WORD_0) 8012 .Case("WORD_1", SdwaSel::WORD_1) 8013 .Case("DWORD", SdwaSel::DWORD) 8014 .Default(0xffffffff); 8015 8016 if (Int == 0xffffffff) { 8017 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8018 return MatchOperand_ParseFail; 8019 } 8020 8021 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8022 return MatchOperand_Success; 8023 } 8024 8025 OperandMatchResultTy 8026 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8027 using namespace llvm::AMDGPU::SDWA; 8028 8029 SMLoc S = getLoc(); 8030 StringRef Value; 8031 OperandMatchResultTy res; 8032 8033 SMLoc StringLoc; 8034 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8035 if (res != MatchOperand_Success) { 8036 return res; 8037 } 8038 8039 int64_t Int; 8040 Int = StringSwitch<int64_t>(Value) 8041 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8042 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8043 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8044 .Default(0xffffffff); 8045 8046 if (Int == 0xffffffff) { 8047 Error(StringLoc, "invalid dst_unused value"); 8048 return MatchOperand_ParseFail; 8049 } 8050 8051 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8052 return MatchOperand_Success; 8053 } 8054 8055 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8056 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8057 } 8058 8059 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8060 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8061 } 8062 8063 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8064 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8065 } 8066 8067 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8068 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8069 } 8070 8071 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8072 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8073 } 8074 8075 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8076 uint64_t BasicInstType, 8077 bool SkipDstVcc, 8078 bool SkipSrcVcc) { 8079 using namespace llvm::AMDGPU::SDWA; 8080 8081 OptionalImmIndexMap OptionalIdx; 8082 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8083 bool SkippedVcc = false; 8084 8085 unsigned I = 1; 8086 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8087 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8088 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8089 } 8090 8091 for (unsigned E = Operands.size(); I != E; ++I) { 8092 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8093 if (SkipVcc && !SkippedVcc && Op.isReg() && 8094 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8095 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8096 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8097 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8098 // Skip VCC only if we didn't skip it on previous iteration. 8099 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8100 if (BasicInstType == SIInstrFlags::VOP2 && 8101 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8102 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8103 SkippedVcc = true; 8104 continue; 8105 } else if (BasicInstType == SIInstrFlags::VOPC && 8106 Inst.getNumOperands() == 0) { 8107 SkippedVcc = true; 8108 continue; 8109 } 8110 } 8111 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8112 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8113 } else if (Op.isImm()) { 8114 // Handle optional arguments 8115 OptionalIdx[Op.getImmTy()] = I; 8116 } else { 8117 llvm_unreachable("Invalid operand type"); 8118 } 8119 SkippedVcc = false; 8120 } 8121 8122 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8123 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8124 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8125 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8126 switch (BasicInstType) { 8127 case SIInstrFlags::VOP1: 8128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8129 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8130 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8131 } 8132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8135 break; 8136 8137 case SIInstrFlags::VOP2: 8138 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8139 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8141 } 8142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8146 break; 8147 8148 case SIInstrFlags::VOPC: 8149 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8150 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8153 break; 8154 8155 default: 8156 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8157 } 8158 } 8159 8160 // special case v_mac_{f16, f32}: 8161 // it has src2 register operand that is tied to dst operand 8162 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8163 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8164 auto it = Inst.begin(); 8165 std::advance( 8166 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8167 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8168 } 8169 } 8170 8171 //===----------------------------------------------------------------------===// 8172 // mAI 8173 //===----------------------------------------------------------------------===// 8174 8175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8176 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8177 } 8178 8179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8180 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8181 } 8182 8183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8184 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8185 } 8186 8187 /// Force static initialization. 8188 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8189 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8190 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8191 } 8192 8193 #define GET_REGISTER_MATCHER 8194 #define GET_MATCHER_IMPLEMENTATION 8195 #define GET_MNEMONIC_SPELL_CHECKER 8196 #define GET_MNEMONIC_CHECKER 8197 #include "AMDGPUGenAsmMatcher.inc" 8198 8199 // This fuction should be defined after auto-generated include so that we have 8200 // MatchClassKind enum defined 8201 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8202 unsigned Kind) { 8203 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8204 // But MatchInstructionImpl() expects to meet token and fails to validate 8205 // operand. This method checks if we are given immediate operand but expect to 8206 // get corresponding token. 8207 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8208 switch (Kind) { 8209 case MCK_addr64: 8210 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8211 case MCK_gds: 8212 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8213 case MCK_lds: 8214 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8215 case MCK_idxen: 8216 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8217 case MCK_offen: 8218 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8219 case MCK_SSrcB32: 8220 // When operands have expression values, they will return true for isToken, 8221 // because it is not possible to distinguish between a token and an 8222 // expression at parse time. MatchInstructionImpl() will always try to 8223 // match an operand as a token, when isToken returns true, and when the 8224 // name of the expression is not a valid token, the match will fail, 8225 // so we need to handle it here. 8226 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8227 case MCK_SSrcF32: 8228 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8229 case MCK_SoppBrTarget: 8230 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8231 case MCK_VReg32OrOff: 8232 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8233 case MCK_InterpSlot: 8234 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8235 case MCK_Attr: 8236 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8237 case MCK_AttrChan: 8238 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8239 case MCK_ImmSMEMOffset: 8240 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8241 case MCK_SReg_64: 8242 case MCK_SReg_64_XEXEC: 8243 // Null is defined as a 32-bit register but 8244 // it should also be enabled with 64-bit operands. 8245 // The following code enables it for SReg_64 operands 8246 // used as source and destination. Remaining source 8247 // operands are handled in isInlinableImm. 8248 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8249 default: 8250 return Match_InvalidOperand; 8251 } 8252 } 8253 8254 //===----------------------------------------------------------------------===// 8255 // endpgm 8256 //===----------------------------------------------------------------------===// 8257 8258 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8259 SMLoc S = getLoc(); 8260 int64_t Imm = 0; 8261 8262 if (!parseExpr(Imm)) { 8263 // The operand is optional, if not present default to 0 8264 Imm = 0; 8265 } 8266 8267 if (!isUInt<16>(Imm)) { 8268 Error(S, "expected a 16-bit value"); 8269 return MatchOperand_ParseFail; 8270 } 8271 8272 Operands.push_back( 8273 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8274 return MatchOperand_Success; 8275 } 8276 8277 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8278