1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyDLC, 118 ImmTySCCB, 119 ImmTyGLC, 120 ImmTySLC, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 254 } 255 256 bool isRegOrImmWithInt16InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 258 } 259 260 bool isRegOrImmWithInt32InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 262 } 263 264 bool isRegOrImmWithInt64InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 266 } 267 268 bool isRegOrImmWithFP16InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 270 } 271 272 bool isRegOrImmWithFP32InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 274 } 275 276 bool isRegOrImmWithFP64InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 278 } 279 280 bool isVReg() const { 281 return isRegClass(AMDGPU::VGPR_32RegClassID) || 282 isRegClass(AMDGPU::VReg_64RegClassID) || 283 isRegClass(AMDGPU::VReg_96RegClassID) || 284 isRegClass(AMDGPU::VReg_128RegClassID) || 285 isRegClass(AMDGPU::VReg_160RegClassID) || 286 isRegClass(AMDGPU::VReg_192RegClassID) || 287 isRegClass(AMDGPU::VReg_256RegClassID) || 288 isRegClass(AMDGPU::VReg_512RegClassID) || 289 isRegClass(AMDGPU::VReg_1024RegClassID); 290 } 291 292 bool isVReg32() const { 293 return isRegClass(AMDGPU::VGPR_32RegClassID); 294 } 295 296 bool isVReg32OrOff() const { 297 return isOff() || isVReg32(); 298 } 299 300 bool isNull() const { 301 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 302 } 303 304 bool isVRegWithInputMods() const; 305 306 bool isSDWAOperand(MVT type) const; 307 bool isSDWAFP16Operand() const; 308 bool isSDWAFP32Operand() const; 309 bool isSDWAInt16Operand() const; 310 bool isSDWAInt32Operand() const; 311 312 bool isImmTy(ImmTy ImmT) const { 313 return isImm() && Imm.Type == ImmT; 314 } 315 316 bool isImmModifier() const { 317 return isImm() && Imm.Type != ImmTyNone; 318 } 319 320 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 321 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 322 bool isDMask() const { return isImmTy(ImmTyDMask); } 323 bool isDim() const { return isImmTy(ImmTyDim); } 324 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 325 bool isDA() const { return isImmTy(ImmTyDA); } 326 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 327 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 328 bool isLWE() const { return isImmTy(ImmTyLWE); } 329 bool isOff() const { return isImmTy(ImmTyOff); } 330 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 331 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 332 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 333 bool isOffen() const { return isImmTy(ImmTyOffen); } 334 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 335 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 336 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 337 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 338 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 339 340 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 341 bool isGDS() const { return isImmTy(ImmTyGDS); } 342 bool isLDS() const { return isImmTy(ImmTyLDS); } 343 bool isDLC() const { return isImmTy(ImmTyDLC); } 344 bool isSCCB() const { return isImmTy(ImmTySCCB); } 345 bool isGLC() const { return isImmTy(ImmTyGLC); } 346 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 347 // value of the GLC operand. 348 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 349 bool isSLC() const { return isImmTy(ImmTySLC); } 350 bool isSWZ() const { return isImmTy(ImmTySWZ); } 351 bool isTFE() const { return isImmTy(ImmTyTFE); } 352 bool isD16() const { return isImmTy(ImmTyD16); } 353 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 354 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 355 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 356 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 357 bool isFI() const { return isImmTy(ImmTyDppFi); } 358 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 359 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 360 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 361 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 362 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 363 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 364 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 365 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 366 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 367 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 368 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 369 bool isHigh() const { return isImmTy(ImmTyHigh); } 370 371 bool isMod() const { 372 return isClampSI() || isOModSI(); 373 } 374 375 bool isRegOrImm() const { 376 return isReg() || isImm(); 377 } 378 379 bool isRegClass(unsigned RCID) const; 380 381 bool isInlineValue() const; 382 383 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 384 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 385 } 386 387 bool isSCSrcB16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 389 } 390 391 bool isSCSrcV2B16() const { 392 return isSCSrcB16(); 393 } 394 395 bool isSCSrcB32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 397 } 398 399 bool isSCSrcB64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 401 } 402 403 bool isBoolReg() const; 404 405 bool isSCSrcF16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 407 } 408 409 bool isSCSrcV2F16() const { 410 return isSCSrcF16(); 411 } 412 413 bool isSCSrcF32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 415 } 416 417 bool isSCSrcF64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 419 } 420 421 bool isSSrcB32() const { 422 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 423 } 424 425 bool isSSrcB16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::i16); 427 } 428 429 bool isSSrcV2B16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcB16(); 432 } 433 434 bool isSSrcB64() const { 435 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 436 // See isVSrc64(). 437 return isSCSrcB64() || isLiteralImm(MVT::i64); 438 } 439 440 bool isSSrcF32() const { 441 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 442 } 443 444 bool isSSrcF64() const { 445 return isSCSrcB64() || isLiteralImm(MVT::f64); 446 } 447 448 bool isSSrcF16() const { 449 return isSCSrcB16() || isLiteralImm(MVT::f16); 450 } 451 452 bool isSSrcV2F16() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF16(); 455 } 456 457 bool isSSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSSrcF32(); 460 } 461 462 bool isSCSrcV2FP32() const { 463 llvm_unreachable("cannot happen"); 464 return isSCSrcF32(); 465 } 466 467 bool isSSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSSrcB32(); 470 } 471 472 bool isSCSrcV2INT32() const { 473 llvm_unreachable("cannot happen"); 474 return isSCSrcB32(); 475 } 476 477 bool isSSrcOrLdsB32() const { 478 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 479 isLiteralImm(MVT::i32) || isExpr(); 480 } 481 482 bool isVCSrcB32() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 484 } 485 486 bool isVCSrcB64() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 488 } 489 490 bool isVCSrcB16() const { 491 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 492 } 493 494 bool isVCSrcV2B16() const { 495 return isVCSrcB16(); 496 } 497 498 bool isVCSrcF32() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 500 } 501 502 bool isVCSrcF64() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 504 } 505 506 bool isVCSrcF16() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 508 } 509 510 bool isVCSrcV2F16() const { 511 return isVCSrcF16(); 512 } 513 514 bool isVSrcB32() const { 515 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 516 } 517 518 bool isVSrcB64() const { 519 return isVCSrcF64() || isLiteralImm(MVT::i64); 520 } 521 522 bool isVSrcB16() const { 523 return isVCSrcB16() || isLiteralImm(MVT::i16); 524 } 525 526 bool isVSrcV2B16() const { 527 return isVSrcB16() || isLiteralImm(MVT::v2i16); 528 } 529 530 bool isVCSrcV2FP32() const { 531 return isVCSrcF64(); 532 } 533 534 bool isVSrcV2FP32() const { 535 return isVSrcF64() || isLiteralImm(MVT::v2f32); 536 } 537 538 bool isVCSrcV2INT32() const { 539 return isVCSrcB64(); 540 } 541 542 bool isVSrcV2INT32() const { 543 return isVSrcB64() || isLiteralImm(MVT::v2i32); 544 } 545 546 bool isVSrcF32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 548 } 549 550 bool isVSrcF64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::f64); 552 } 553 554 bool isVSrcF16() const { 555 return isVCSrcF16() || isLiteralImm(MVT::f16); 556 } 557 558 bool isVSrcV2F16() const { 559 return isVSrcF16() || isLiteralImm(MVT::v2f16); 560 } 561 562 bool isVISrcB32() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 564 } 565 566 bool isVISrcB16() const { 567 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 568 } 569 570 bool isVISrcV2B16() const { 571 return isVISrcB16(); 572 } 573 574 bool isVISrcF32() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 576 } 577 578 bool isVISrcF16() const { 579 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 580 } 581 582 bool isVISrcV2F16() const { 583 return isVISrcF16() || isVISrcB32(); 584 } 585 586 bool isVISrc_64B64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 588 } 589 590 bool isVISrc_64F64() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 592 } 593 594 bool isVISrc_64V2FP32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 596 } 597 598 bool isVISrc_64V2INT32() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 600 } 601 602 bool isVISrc_256B64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 604 } 605 606 bool isVISrc_256F64() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 608 } 609 610 bool isVISrc_128B16() const { 611 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 612 } 613 614 bool isVISrc_128V2B16() const { 615 return isVISrc_128B16(); 616 } 617 618 bool isVISrc_128B32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 620 } 621 622 bool isVISrc_128F32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2FP32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 628 } 629 630 bool isVISrc_256V2INT32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 636 } 637 638 bool isVISrc_512B16() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 640 } 641 642 bool isVISrc_512V2B16() const { 643 return isVISrc_512B16(); 644 } 645 646 bool isVISrc_512F32() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 648 } 649 650 bool isVISrc_512F16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 652 } 653 654 bool isVISrc_512V2F16() const { 655 return isVISrc_512F16() || isVISrc_512B32(); 656 } 657 658 bool isVISrc_1024B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_1024B16() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 664 } 665 666 bool isVISrc_1024V2B16() const { 667 return isVISrc_1024B16(); 668 } 669 670 bool isVISrc_1024F32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 672 } 673 674 bool isVISrc_1024F16() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 676 } 677 678 bool isVISrc_1024V2F16() const { 679 return isVISrc_1024F16() || isVISrc_1024B32(); 680 } 681 682 bool isAISrcB32() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 684 } 685 686 bool isAISrcB16() const { 687 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 688 } 689 690 bool isAISrcV2B16() const { 691 return isAISrcB16(); 692 } 693 694 bool isAISrcF32() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 696 } 697 698 bool isAISrcF16() const { 699 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 700 } 701 702 bool isAISrcV2F16() const { 703 return isAISrcF16() || isAISrcB32(); 704 } 705 706 bool isAISrc_64B64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 708 } 709 710 bool isAISrc_64F64() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 712 } 713 714 bool isAISrc_128B32() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 716 } 717 718 bool isAISrc_128B16() const { 719 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 720 } 721 722 bool isAISrc_128V2B16() const { 723 return isAISrc_128B16(); 724 } 725 726 bool isAISrc_128F32() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 728 } 729 730 bool isAISrc_128F16() const { 731 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 732 } 733 734 bool isAISrc_128V2F16() const { 735 return isAISrc_128F16() || isAISrc_128B32(); 736 } 737 738 bool isVISrc_128F16() const { 739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 740 } 741 742 bool isVISrc_128V2F16() const { 743 return isVISrc_128F16() || isVISrc_128B32(); 744 } 745 746 bool isAISrc_256B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_256F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_512B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_512B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_512V2B16() const { 763 return isAISrc_512B16(); 764 } 765 766 bool isAISrc_512F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_512F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_512V2F16() const { 775 return isAISrc_512F16() || isAISrc_512B32(); 776 } 777 778 bool isAISrc_1024B32() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 780 } 781 782 bool isAISrc_1024B16() const { 783 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 784 } 785 786 bool isAISrc_1024V2B16() const { 787 return isAISrc_1024B16(); 788 } 789 790 bool isAISrc_1024F32() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 792 } 793 794 bool isAISrc_1024F16() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 796 } 797 798 bool isAISrc_1024V2F16() const { 799 return isAISrc_1024F16() || isAISrc_1024B32(); 800 } 801 802 bool isKImmFP32() const { 803 return isLiteralImm(MVT::f32); 804 } 805 806 bool isKImmFP16() const { 807 return isLiteralImm(MVT::f16); 808 } 809 810 bool isMem() const override { 811 return false; 812 } 813 814 bool isExpr() const { 815 return Kind == Expression; 816 } 817 818 bool isSoppBrTarget() const { 819 return isExpr() || isImm(); 820 } 821 822 bool isSWaitCnt() const; 823 bool isHwreg() const; 824 bool isSendMsg() const; 825 bool isSwizzle() const; 826 bool isSMRDOffset8() const; 827 bool isSMEMOffset() const; 828 bool isSMRDLiteralOffset() const; 829 bool isDPP8() const; 830 bool isDPPCtrl() const; 831 bool isBLGP() const; 832 bool isCBSZ() const; 833 bool isABID() const; 834 bool isGPRIdxMode() const; 835 bool isS16Imm() const; 836 bool isU16Imm() const; 837 bool isEndpgm() const; 838 839 StringRef getExpressionAsToken() const { 840 assert(isExpr()); 841 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 842 return S->getSymbol().getName(); 843 } 844 845 StringRef getToken() const { 846 assert(isToken()); 847 848 if (Kind == Expression) 849 return getExpressionAsToken(); 850 851 return StringRef(Tok.Data, Tok.Length); 852 } 853 854 int64_t getImm() const { 855 assert(isImm()); 856 return Imm.Val; 857 } 858 859 void setImm(int64_t Val) { 860 assert(isImm()); 861 Imm.Val = Val; 862 } 863 864 ImmTy getImmTy() const { 865 assert(isImm()); 866 return Imm.Type; 867 } 868 869 unsigned getReg() const override { 870 assert(isRegKind()); 871 return Reg.RegNo; 872 } 873 874 SMLoc getStartLoc() const override { 875 return StartLoc; 876 } 877 878 SMLoc getEndLoc() const override { 879 return EndLoc; 880 } 881 882 SMRange getLocRange() const { 883 return SMRange(StartLoc, EndLoc); 884 } 885 886 Modifiers getModifiers() const { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 return isRegKind() ? Reg.Mods : Imm.Mods; 889 } 890 891 void setModifiers(Modifiers Mods) { 892 assert(isRegKind() || isImmTy(ImmTyNone)); 893 if (isRegKind()) 894 Reg.Mods = Mods; 895 else 896 Imm.Mods = Mods; 897 } 898 899 bool hasModifiers() const { 900 return getModifiers().hasModifiers(); 901 } 902 903 bool hasFPModifiers() const { 904 return getModifiers().hasFPModifiers(); 905 } 906 907 bool hasIntModifiers() const { 908 return getModifiers().hasIntModifiers(); 909 } 910 911 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 912 913 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 914 915 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 916 917 template <unsigned Bitwidth> 918 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 919 920 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<16>(Inst, N); 922 } 923 924 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<32>(Inst, N); 926 } 927 928 void addRegOperands(MCInst &Inst, unsigned N) const; 929 930 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 931 addRegOperands(Inst, N); 932 } 933 934 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 935 if (isRegKind()) 936 addRegOperands(Inst, N); 937 else if (isExpr()) 938 Inst.addOperand(MCOperand::createExpr(Expr)); 939 else 940 addImmOperands(Inst, N); 941 } 942 943 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 944 Modifiers Mods = getModifiers(); 945 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 946 if (isRegKind()) { 947 addRegOperands(Inst, N); 948 } else { 949 addImmOperands(Inst, N, false); 950 } 951 } 952 953 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasIntModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 959 assert(!hasFPModifiers()); 960 addRegOrImmWithInputModsOperands(Inst, N); 961 } 962 963 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 964 Modifiers Mods = getModifiers(); 965 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 966 assert(isRegKind()); 967 addRegOperands(Inst, N); 968 } 969 970 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasIntModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasFPModifiers()); 977 addRegWithInputModsOperands(Inst, N); 978 } 979 980 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 981 if (isImm()) 982 addImmOperands(Inst, N); 983 else { 984 assert(isExpr()); 985 Inst.addOperand(MCOperand::createExpr(Expr)); 986 } 987 } 988 989 static void printImmTy(raw_ostream& OS, ImmTy Type) { 990 switch (Type) { 991 case ImmTyNone: OS << "None"; break; 992 case ImmTyGDS: OS << "GDS"; break; 993 case ImmTyLDS: OS << "LDS"; break; 994 case ImmTyOffen: OS << "Offen"; break; 995 case ImmTyIdxen: OS << "Idxen"; break; 996 case ImmTyAddr64: OS << "Addr64"; break; 997 case ImmTyOffset: OS << "Offset"; break; 998 case ImmTyInstOffset: OS << "InstOffset"; break; 999 case ImmTyOffset0: OS << "Offset0"; break; 1000 case ImmTyOffset1: OS << "Offset1"; break; 1001 case ImmTyDLC: OS << "DLC"; break; 1002 case ImmTySCCB: OS << "SCCB"; break; 1003 case ImmTyGLC: OS << "GLC"; break; 1004 case ImmTySLC: OS << "SLC"; break; 1005 case ImmTySWZ: OS << "SWZ"; break; 1006 case ImmTyTFE: OS << "TFE"; break; 1007 case ImmTyD16: OS << "D16"; break; 1008 case ImmTyFORMAT: OS << "FORMAT"; break; 1009 case ImmTyClampSI: OS << "ClampSI"; break; 1010 case ImmTyOModSI: OS << "OModSI"; break; 1011 case ImmTyDPP8: OS << "DPP8"; break; 1012 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1013 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1014 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1015 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1016 case ImmTyDppFi: OS << "FI"; break; 1017 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1018 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1019 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1020 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1021 case ImmTyDMask: OS << "DMask"; break; 1022 case ImmTyDim: OS << "Dim"; break; 1023 case ImmTyUNorm: OS << "UNorm"; break; 1024 case ImmTyDA: OS << "DA"; break; 1025 case ImmTyR128A16: OS << "R128A16"; break; 1026 case ImmTyA16: OS << "A16"; break; 1027 case ImmTyLWE: OS << "LWE"; break; 1028 case ImmTyOff: OS << "Off"; break; 1029 case ImmTyExpTgt: OS << "ExpTgt"; break; 1030 case ImmTyExpCompr: OS << "ExpCompr"; break; 1031 case ImmTyExpVM: OS << "ExpVM"; break; 1032 case ImmTyHwreg: OS << "Hwreg"; break; 1033 case ImmTySendMsg: OS << "SendMsg"; break; 1034 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1035 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1036 case ImmTyAttrChan: OS << "AttrChan"; break; 1037 case ImmTyOpSel: OS << "OpSel"; break; 1038 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1039 case ImmTyNegLo: OS << "NegLo"; break; 1040 case ImmTyNegHi: OS << "NegHi"; break; 1041 case ImmTySwizzle: OS << "Swizzle"; break; 1042 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1043 case ImmTyHigh: OS << "High"; break; 1044 case ImmTyBLGP: OS << "BLGP"; break; 1045 case ImmTyCBSZ: OS << "CBSZ"; break; 1046 case ImmTyABID: OS << "ABID"; break; 1047 case ImmTyEndpgm: OS << "Endpgm"; break; 1048 } 1049 } 1050 1051 void print(raw_ostream &OS) const override { 1052 switch (Kind) { 1053 case Register: 1054 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1055 break; 1056 case Immediate: 1057 OS << '<' << getImm(); 1058 if (getImmTy() != ImmTyNone) { 1059 OS << " type: "; printImmTy(OS, getImmTy()); 1060 } 1061 OS << " mods: " << Imm.Mods << '>'; 1062 break; 1063 case Token: 1064 OS << '\'' << getToken() << '\''; 1065 break; 1066 case Expression: 1067 OS << "<expr " << *Expr << '>'; 1068 break; 1069 } 1070 } 1071 1072 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1073 int64_t Val, SMLoc Loc, 1074 ImmTy Type = ImmTyNone, 1075 bool IsFPImm = false) { 1076 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1077 Op->Imm.Val = Val; 1078 Op->Imm.IsFPImm = IsFPImm; 1079 Op->Imm.Kind = ImmKindTyNone; 1080 Op->Imm.Type = Type; 1081 Op->Imm.Mods = Modifiers(); 1082 Op->StartLoc = Loc; 1083 Op->EndLoc = Loc; 1084 return Op; 1085 } 1086 1087 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1088 StringRef Str, SMLoc Loc, 1089 bool HasExplicitEncodingSize = true) { 1090 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1091 Res->Tok.Data = Str.data(); 1092 Res->Tok.Length = Str.size(); 1093 Res->StartLoc = Loc; 1094 Res->EndLoc = Loc; 1095 return Res; 1096 } 1097 1098 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1099 unsigned RegNo, SMLoc S, 1100 SMLoc E) { 1101 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1102 Op->Reg.RegNo = RegNo; 1103 Op->Reg.Mods = Modifiers(); 1104 Op->StartLoc = S; 1105 Op->EndLoc = E; 1106 return Op; 1107 } 1108 1109 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1110 const class MCExpr *Expr, SMLoc S) { 1111 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1112 Op->Expr = Expr; 1113 Op->StartLoc = S; 1114 Op->EndLoc = S; 1115 return Op; 1116 } 1117 }; 1118 1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1120 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1121 return OS; 1122 } 1123 1124 //===----------------------------------------------------------------------===// 1125 // AsmParser 1126 //===----------------------------------------------------------------------===// 1127 1128 // Holds info related to the current kernel, e.g. count of SGPRs used. 1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1130 // .amdgpu_hsa_kernel or at EOF. 1131 class KernelScopeInfo { 1132 int SgprIndexUnusedMin = -1; 1133 int VgprIndexUnusedMin = -1; 1134 MCContext *Ctx = nullptr; 1135 1136 void usesSgprAt(int i) { 1137 if (i >= SgprIndexUnusedMin) { 1138 SgprIndexUnusedMin = ++i; 1139 if (Ctx) { 1140 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1141 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1142 } 1143 } 1144 } 1145 1146 void usesVgprAt(int i) { 1147 if (i >= VgprIndexUnusedMin) { 1148 VgprIndexUnusedMin = ++i; 1149 if (Ctx) { 1150 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 public: 1157 KernelScopeInfo() = default; 1158 1159 void initialize(MCContext &Context) { 1160 Ctx = &Context; 1161 usesSgprAt(SgprIndexUnusedMin = -1); 1162 usesVgprAt(VgprIndexUnusedMin = -1); 1163 } 1164 1165 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1166 switch (RegKind) { 1167 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1168 case IS_AGPR: // fall through 1169 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1170 default: break; 1171 } 1172 } 1173 }; 1174 1175 class AMDGPUAsmParser : public MCTargetAsmParser { 1176 MCAsmParser &Parser; 1177 1178 // Number of extra operands parsed after the first optional operand. 1179 // This may be necessary to skip hardcoded mandatory operands. 1180 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1181 1182 unsigned ForcedEncodingSize = 0; 1183 bool ForcedDPP = false; 1184 bool ForcedSDWA = false; 1185 KernelScopeInfo KernelScope; 1186 1187 /// @name Auto-generated Match Functions 1188 /// { 1189 1190 #define GET_ASSEMBLER_HEADER 1191 #include "AMDGPUGenAsmMatcher.inc" 1192 1193 /// } 1194 1195 private: 1196 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1197 bool OutOfRangeError(SMRange Range); 1198 /// Calculate VGPR/SGPR blocks required for given target, reserved 1199 /// registers, and user-specified NextFreeXGPR values. 1200 /// 1201 /// \param Features [in] Target features, used for bug corrections. 1202 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1203 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1204 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1205 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1206 /// descriptor field, if valid. 1207 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1208 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1209 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1210 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1211 /// \param VGPRBlocks [out] Result VGPR block count. 1212 /// \param SGPRBlocks [out] Result SGPR block count. 1213 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1214 bool FlatScrUsed, bool XNACKUsed, 1215 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1216 SMRange VGPRRange, unsigned NextFreeSGPR, 1217 SMRange SGPRRange, unsigned &VGPRBlocks, 1218 unsigned &SGPRBlocks); 1219 bool ParseDirectiveAMDGCNTarget(); 1220 bool ParseDirectiveAMDHSAKernel(); 1221 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1222 bool ParseDirectiveHSACodeObjectVersion(); 1223 bool ParseDirectiveHSACodeObjectISA(); 1224 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1225 bool ParseDirectiveAMDKernelCodeT(); 1226 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1227 bool ParseDirectiveAMDGPUHsaKernel(); 1228 1229 bool ParseDirectiveISAVersion(); 1230 bool ParseDirectiveHSAMetadata(); 1231 bool ParseDirectivePALMetadataBegin(); 1232 bool ParseDirectivePALMetadata(); 1233 bool ParseDirectiveAMDGPULDS(); 1234 1235 /// Common code to parse out a block of text (typically YAML) between start and 1236 /// end directives. 1237 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1238 const char *AssemblerDirectiveEnd, 1239 std::string &CollectString); 1240 1241 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1242 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1243 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1244 unsigned &RegNum, unsigned &RegWidth, 1245 bool RestoreOnFailure = false); 1246 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1247 unsigned &RegNum, unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, 1251 SmallVectorImpl<AsmToken> &Tokens); 1252 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1253 unsigned &RegWidth, 1254 SmallVectorImpl<AsmToken> &Tokens); 1255 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1256 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1257 bool ParseRegRange(unsigned& Num, unsigned& Width); 1258 unsigned getRegularReg(RegisterKind RegKind, 1259 unsigned RegNum, 1260 unsigned RegWidth, 1261 SMLoc Loc); 1262 1263 bool isRegister(); 1264 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1265 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1266 void initializeGprCountSymbol(RegisterKind RegKind); 1267 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1268 unsigned RegWidth); 1269 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1270 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1271 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1272 bool IsGdsHardcoded); 1273 1274 public: 1275 enum AMDGPUMatchResultTy { 1276 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1277 }; 1278 enum OperandMode { 1279 OperandMode_Default, 1280 OperandMode_NSA, 1281 }; 1282 1283 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1284 1285 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1286 const MCInstrInfo &MII, 1287 const MCTargetOptions &Options) 1288 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1289 MCAsmParserExtension::Initialize(Parser); 1290 1291 if (getFeatureBits().none()) { 1292 // Set default features. 1293 copySTI().ToggleFeature("southern-islands"); 1294 } 1295 1296 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1297 1298 { 1299 // TODO: make those pre-defined variables read-only. 1300 // Currently there is none suitable machinery in the core llvm-mc for this. 1301 // MCSymbol::isRedefinable is intended for another purpose, and 1302 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1303 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1304 MCContext &Ctx = getContext(); 1305 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1306 MCSymbol *Sym = 1307 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1313 } else { 1314 MCSymbol *Sym = 1315 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1316 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1317 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1318 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1319 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1320 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1321 } 1322 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1323 initializeGprCountSymbol(IS_VGPR); 1324 initializeGprCountSymbol(IS_SGPR); 1325 } else 1326 KernelScope.initialize(getContext()); 1327 } 1328 } 1329 1330 bool hasXNACK() const { 1331 return AMDGPU::hasXNACK(getSTI()); 1332 } 1333 1334 bool hasMIMG_R128() const { 1335 return AMDGPU::hasMIMG_R128(getSTI()); 1336 } 1337 1338 bool hasPackedD16() const { 1339 return AMDGPU::hasPackedD16(getSTI()); 1340 } 1341 1342 bool hasGFX10A16() const { 1343 return AMDGPU::hasGFX10A16(getSTI()); 1344 } 1345 1346 bool isSI() const { 1347 return AMDGPU::isSI(getSTI()); 1348 } 1349 1350 bool isCI() const { 1351 return AMDGPU::isCI(getSTI()); 1352 } 1353 1354 bool isVI() const { 1355 return AMDGPU::isVI(getSTI()); 1356 } 1357 1358 bool isGFX9() const { 1359 return AMDGPU::isGFX9(getSTI()); 1360 } 1361 1362 bool isGFX90A() const { 1363 return AMDGPU::isGFX90A(getSTI()); 1364 } 1365 1366 bool isGFX9Plus() const { 1367 return AMDGPU::isGFX9Plus(getSTI()); 1368 } 1369 1370 bool isGFX10() const { 1371 return AMDGPU::isGFX10(getSTI()); 1372 } 1373 1374 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1375 1376 bool isGFX10_BEncoding() const { 1377 return AMDGPU::isGFX10_BEncoding(getSTI()); 1378 } 1379 1380 bool hasInv2PiInlineImm() const { 1381 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1382 } 1383 1384 bool hasFlatOffsets() const { 1385 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1386 } 1387 1388 bool hasSGPR102_SGPR103() const { 1389 return !isVI() && !isGFX9(); 1390 } 1391 1392 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1393 1394 bool hasIntClamp() const { 1395 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1396 } 1397 1398 AMDGPUTargetStreamer &getTargetStreamer() { 1399 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1400 return static_cast<AMDGPUTargetStreamer &>(TS); 1401 } 1402 1403 const MCRegisterInfo *getMRI() const { 1404 // We need this const_cast because for some reason getContext() is not const 1405 // in MCAsmParser. 1406 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1407 } 1408 1409 const MCInstrInfo *getMII() const { 1410 return &MII; 1411 } 1412 1413 const FeatureBitset &getFeatureBits() const { 1414 return getSTI().getFeatureBits(); 1415 } 1416 1417 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1418 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1419 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1420 1421 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1422 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1423 bool isForcedDPP() const { return ForcedDPP; } 1424 bool isForcedSDWA() const { return ForcedSDWA; } 1425 ArrayRef<unsigned> getMatchedVariants() const; 1426 StringRef getMatchedVariantName() const; 1427 1428 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1429 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1430 bool RestoreOnFailure); 1431 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1432 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1433 SMLoc &EndLoc) override; 1434 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1435 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1436 unsigned Kind) override; 1437 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1438 OperandVector &Operands, MCStreamer &Out, 1439 uint64_t &ErrorInfo, 1440 bool MatchingInlineAsm) override; 1441 bool ParseDirective(AsmToken DirectiveID) override; 1442 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1443 OperandMode Mode = OperandMode_Default); 1444 StringRef parseMnemonicSuffix(StringRef Name); 1445 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1446 SMLoc NameLoc, OperandVector &Operands) override; 1447 //bool ProcessInstruction(MCInst &Inst); 1448 1449 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1450 1451 OperandMatchResultTy 1452 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1453 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1454 bool (*ConvertResult)(int64_t &) = nullptr); 1455 1456 OperandMatchResultTy 1457 parseOperandArrayWithPrefix(const char *Prefix, 1458 OperandVector &Operands, 1459 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1460 bool (*ConvertResult)(int64_t&) = nullptr); 1461 1462 OperandMatchResultTy 1463 parseNamedBit(StringRef Name, OperandVector &Operands, 1464 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1465 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1466 StringRef &Value, 1467 SMLoc &StringLoc); 1468 1469 bool isModifier(); 1470 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1474 bool parseSP3NegModifier(); 1475 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1476 OperandMatchResultTy parseReg(OperandVector &Operands); 1477 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1478 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1479 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1480 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1481 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1482 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1483 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1484 OperandMatchResultTy parseUfmt(int64_t &Format); 1485 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1486 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1487 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1488 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1489 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1490 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1491 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1492 1493 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1494 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1495 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1496 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1497 1498 bool parseCnt(int64_t &IntVal); 1499 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1500 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1501 1502 private: 1503 struct OperandInfoTy { 1504 SMLoc Loc; 1505 int64_t Id; 1506 bool IsSymbolic = false; 1507 bool IsDefined = false; 1508 1509 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1510 }; 1511 1512 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1513 bool validateSendMsg(const OperandInfoTy &Msg, 1514 const OperandInfoTy &Op, 1515 const OperandInfoTy &Stream); 1516 1517 bool parseHwregBody(OperandInfoTy &HwReg, 1518 OperandInfoTy &Offset, 1519 OperandInfoTy &Width); 1520 bool validateHwreg(const OperandInfoTy &HwReg, 1521 const OperandInfoTy &Offset, 1522 const OperandInfoTy &Width); 1523 1524 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1525 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1526 1527 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1528 const OperandVector &Operands) const; 1529 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1530 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1531 SMLoc getLitLoc(const OperandVector &Operands) const; 1532 SMLoc getConstLoc(const OperandVector &Operands) const; 1533 1534 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1535 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateSOPLiteral(const MCInst &Inst) const; 1538 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateIntClampSupported(const MCInst &Inst); 1541 bool validateMIMGAtomicDMask(const MCInst &Inst); 1542 bool validateMIMGGatherDMask(const MCInst &Inst); 1543 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateMIMGDataSize(const MCInst &Inst); 1545 bool validateMIMGAddrSize(const MCInst &Inst); 1546 bool validateMIMGD16(const MCInst &Inst); 1547 bool validateMIMGDim(const MCInst &Inst); 1548 bool validateLdsDirect(const MCInst &Inst); 1549 bool validateOpSel(const MCInst &Inst); 1550 bool validateVccOperand(unsigned Reg) const; 1551 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1552 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1553 bool validateAGPRLdSt(const MCInst &Inst) const; 1554 bool validateVGPRAlign(const MCInst &Inst) const; 1555 bool validateDivScale(const MCInst &Inst); 1556 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1557 const SMLoc &IDLoc); 1558 unsigned getConstantBusLimit(unsigned Opcode) const; 1559 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1560 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1561 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1562 1563 bool isSupportedMnemo(StringRef Mnemo, 1564 const FeatureBitset &FBS); 1565 bool isSupportedMnemo(StringRef Mnemo, 1566 const FeatureBitset &FBS, 1567 ArrayRef<unsigned> Variants); 1568 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1569 1570 bool isId(const StringRef Id) const; 1571 bool isId(const AsmToken &Token, const StringRef Id) const; 1572 bool isToken(const AsmToken::TokenKind Kind) const; 1573 bool trySkipId(const StringRef Id); 1574 bool trySkipId(const StringRef Pref, const StringRef Id); 1575 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1576 bool trySkipToken(const AsmToken::TokenKind Kind); 1577 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1578 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1579 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1580 1581 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1582 AsmToken::TokenKind getTokenKind() const; 1583 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1584 bool parseExpr(OperandVector &Operands); 1585 StringRef getTokenStr() const; 1586 AsmToken peekToken(); 1587 AsmToken getToken() const; 1588 SMLoc getLoc() const; 1589 void lex(); 1590 1591 public: 1592 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1593 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1594 1595 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1596 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1597 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1598 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1599 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1600 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1601 1602 bool parseSwizzleOperand(int64_t &Op, 1603 const unsigned MinVal, 1604 const unsigned MaxVal, 1605 const StringRef ErrMsg, 1606 SMLoc &Loc); 1607 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1608 const unsigned MinVal, 1609 const unsigned MaxVal, 1610 const StringRef ErrMsg); 1611 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1612 bool parseSwizzleOffset(int64_t &Imm); 1613 bool parseSwizzleMacro(int64_t &Imm); 1614 bool parseSwizzleQuadPerm(int64_t &Imm); 1615 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1616 bool parseSwizzleBroadcast(int64_t &Imm); 1617 bool parseSwizzleSwap(int64_t &Imm); 1618 bool parseSwizzleReverse(int64_t &Imm); 1619 1620 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1621 int64_t parseGPRIdxMacro(); 1622 1623 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1624 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1625 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1626 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1627 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1628 1629 AMDGPUOperand::Ptr defaultDLC() const; 1630 AMDGPUOperand::Ptr defaultSCCB() const; 1631 AMDGPUOperand::Ptr defaultGLC() const; 1632 AMDGPUOperand::Ptr defaultGLC_1() const; 1633 AMDGPUOperand::Ptr defaultSLC() const; 1634 1635 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1636 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1637 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1638 AMDGPUOperand::Ptr defaultFlatOffset() const; 1639 1640 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1641 1642 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1643 OptionalImmIndexMap &OptionalIdx); 1644 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1645 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1646 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1647 1648 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1651 bool IsAtomic = false); 1652 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1653 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1654 1655 bool parseDimId(unsigned &Encoding); 1656 OperandMatchResultTy parseDim(OperandVector &Operands); 1657 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1658 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1659 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1660 int64_t parseDPPCtrlSel(StringRef Ctrl); 1661 int64_t parseDPPCtrlPerm(); 1662 AMDGPUOperand::Ptr defaultRowMask() const; 1663 AMDGPUOperand::Ptr defaultBankMask() const; 1664 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1665 AMDGPUOperand::Ptr defaultFI() const; 1666 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1667 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1668 1669 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1670 AMDGPUOperand::ImmTy Type); 1671 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1672 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1675 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1676 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1678 uint64_t BasicInstType, 1679 bool SkipDstVcc = false, 1680 bool SkipSrcVcc = false); 1681 1682 AMDGPUOperand::Ptr defaultBLGP() const; 1683 AMDGPUOperand::Ptr defaultCBSZ() const; 1684 AMDGPUOperand::Ptr defaultABID() const; 1685 1686 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1687 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1688 }; 1689 1690 struct OptionalOperand { 1691 const char *Name; 1692 AMDGPUOperand::ImmTy Type; 1693 bool IsBit; 1694 bool (*ConvertResult)(int64_t&); 1695 }; 1696 1697 } // end anonymous namespace 1698 1699 // May be called with integer type with equivalent bitwidth. 1700 static const fltSemantics *getFltSemantics(unsigned Size) { 1701 switch (Size) { 1702 case 4: 1703 return &APFloat::IEEEsingle(); 1704 case 8: 1705 return &APFloat::IEEEdouble(); 1706 case 2: 1707 return &APFloat::IEEEhalf(); 1708 default: 1709 llvm_unreachable("unsupported fp type"); 1710 } 1711 } 1712 1713 static const fltSemantics *getFltSemantics(MVT VT) { 1714 return getFltSemantics(VT.getSizeInBits() / 8); 1715 } 1716 1717 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1718 switch (OperandType) { 1719 case AMDGPU::OPERAND_REG_IMM_INT32: 1720 case AMDGPU::OPERAND_REG_IMM_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1723 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1724 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1725 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1726 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1728 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1729 return &APFloat::IEEEsingle(); 1730 case AMDGPU::OPERAND_REG_IMM_INT64: 1731 case AMDGPU::OPERAND_REG_IMM_FP64: 1732 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1733 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1735 return &APFloat::IEEEdouble(); 1736 case AMDGPU::OPERAND_REG_IMM_INT16: 1737 case AMDGPU::OPERAND_REG_IMM_FP16: 1738 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1739 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1740 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1741 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1743 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1744 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1745 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1746 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1747 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1748 return &APFloat::IEEEhalf(); 1749 default: 1750 llvm_unreachable("unsupported fp type"); 1751 } 1752 } 1753 1754 //===----------------------------------------------------------------------===// 1755 // Operand 1756 //===----------------------------------------------------------------------===// 1757 1758 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1759 bool Lost; 1760 1761 // Convert literal to single precision 1762 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1763 APFloat::rmNearestTiesToEven, 1764 &Lost); 1765 // We allow precision lost but not overflow or underflow 1766 if (Status != APFloat::opOK && 1767 Lost && 1768 ((Status & APFloat::opOverflow) != 0 || 1769 (Status & APFloat::opUnderflow) != 0)) { 1770 return false; 1771 } 1772 1773 return true; 1774 } 1775 1776 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1777 return isUIntN(Size, Val) || isIntN(Size, Val); 1778 } 1779 1780 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1781 if (VT.getScalarType() == MVT::i16) { 1782 // FP immediate values are broken. 1783 return isInlinableIntLiteral(Val); 1784 } 1785 1786 // f16/v2f16 operands work correctly for all values. 1787 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1788 } 1789 1790 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1791 1792 // This is a hack to enable named inline values like 1793 // shared_base with both 32-bit and 64-bit operands. 1794 // Note that these values are defined as 1795 // 32-bit operands only. 1796 if (isInlineValue()) { 1797 return true; 1798 } 1799 1800 if (!isImmTy(ImmTyNone)) { 1801 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1802 return false; 1803 } 1804 // TODO: We should avoid using host float here. It would be better to 1805 // check the float bit values which is what a few other places do. 1806 // We've had bot failures before due to weird NaN support on mips hosts. 1807 1808 APInt Literal(64, Imm.Val); 1809 1810 if (Imm.IsFPImm) { // We got fp literal token 1811 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1812 return AMDGPU::isInlinableLiteral64(Imm.Val, 1813 AsmParser->hasInv2PiInlineImm()); 1814 } 1815 1816 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1817 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1818 return false; 1819 1820 if (type.getScalarSizeInBits() == 16) { 1821 return isInlineableLiteralOp16( 1822 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1823 type, AsmParser->hasInv2PiInlineImm()); 1824 } 1825 1826 // Check if single precision literal is inlinable 1827 return AMDGPU::isInlinableLiteral32( 1828 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1829 AsmParser->hasInv2PiInlineImm()); 1830 } 1831 1832 // We got int literal token. 1833 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1834 return AMDGPU::isInlinableLiteral64(Imm.Val, 1835 AsmParser->hasInv2PiInlineImm()); 1836 } 1837 1838 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1839 return false; 1840 } 1841 1842 if (type.getScalarSizeInBits() == 16) { 1843 return isInlineableLiteralOp16( 1844 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1845 type, AsmParser->hasInv2PiInlineImm()); 1846 } 1847 1848 return AMDGPU::isInlinableLiteral32( 1849 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1850 AsmParser->hasInv2PiInlineImm()); 1851 } 1852 1853 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1854 // Check that this immediate can be added as literal 1855 if (!isImmTy(ImmTyNone)) { 1856 return false; 1857 } 1858 1859 if (!Imm.IsFPImm) { 1860 // We got int literal token. 1861 1862 if (type == MVT::f64 && hasFPModifiers()) { 1863 // Cannot apply fp modifiers to int literals preserving the same semantics 1864 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1865 // disable these cases. 1866 return false; 1867 } 1868 1869 unsigned Size = type.getSizeInBits(); 1870 if (Size == 64) 1871 Size = 32; 1872 1873 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1874 // types. 1875 return isSafeTruncation(Imm.Val, Size); 1876 } 1877 1878 // We got fp literal token 1879 if (type == MVT::f64) { // Expected 64-bit fp operand 1880 // We would set low 64-bits of literal to zeroes but we accept this literals 1881 return true; 1882 } 1883 1884 if (type == MVT::i64) { // Expected 64-bit int operand 1885 // We don't allow fp literals in 64-bit integer instructions. It is 1886 // unclear how we should encode them. 1887 return false; 1888 } 1889 1890 // We allow fp literals with f16x2 operands assuming that the specified 1891 // literal goes into the lower half and the upper half is zero. We also 1892 // require that the literal may be losslesly converted to f16. 1893 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1894 (type == MVT::v2i16)? MVT::i16 : 1895 (type == MVT::v2f32)? MVT::f32 : type; 1896 1897 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1898 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1899 } 1900 1901 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1902 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1903 } 1904 1905 bool AMDGPUOperand::isVRegWithInputMods() const { 1906 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1907 // GFX90A allows DPP on 64-bit operands. 1908 (isRegClass(AMDGPU::VReg_64RegClassID) && 1909 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1910 } 1911 1912 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1913 if (AsmParser->isVI()) 1914 return isVReg32(); 1915 else if (AsmParser->isGFX9Plus()) 1916 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1917 else 1918 return false; 1919 } 1920 1921 bool AMDGPUOperand::isSDWAFP16Operand() const { 1922 return isSDWAOperand(MVT::f16); 1923 } 1924 1925 bool AMDGPUOperand::isSDWAFP32Operand() const { 1926 return isSDWAOperand(MVT::f32); 1927 } 1928 1929 bool AMDGPUOperand::isSDWAInt16Operand() const { 1930 return isSDWAOperand(MVT::i16); 1931 } 1932 1933 bool AMDGPUOperand::isSDWAInt32Operand() const { 1934 return isSDWAOperand(MVT::i32); 1935 } 1936 1937 bool AMDGPUOperand::isBoolReg() const { 1938 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1939 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1940 } 1941 1942 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1943 { 1944 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1945 assert(Size == 2 || Size == 4 || Size == 8); 1946 1947 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1948 1949 if (Imm.Mods.Abs) { 1950 Val &= ~FpSignMask; 1951 } 1952 if (Imm.Mods.Neg) { 1953 Val ^= FpSignMask; 1954 } 1955 1956 return Val; 1957 } 1958 1959 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1960 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1961 Inst.getNumOperands())) { 1962 addLiteralImmOperand(Inst, Imm.Val, 1963 ApplyModifiers & 1964 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1965 } else { 1966 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1967 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1968 setImmKindNone(); 1969 } 1970 } 1971 1972 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1973 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1974 auto OpNum = Inst.getNumOperands(); 1975 // Check that this operand accepts literals 1976 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1977 1978 if (ApplyModifiers) { 1979 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1980 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1981 Val = applyInputFPModifiers(Val, Size); 1982 } 1983 1984 APInt Literal(64, Val); 1985 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1986 1987 if (Imm.IsFPImm) { // We got fp literal token 1988 switch (OpTy) { 1989 case AMDGPU::OPERAND_REG_IMM_INT64: 1990 case AMDGPU::OPERAND_REG_IMM_FP64: 1991 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1992 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1993 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1994 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1995 AsmParser->hasInv2PiInlineImm())) { 1996 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1997 setImmKindConst(); 1998 return; 1999 } 2000 2001 // Non-inlineable 2002 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2003 // For fp operands we check if low 32 bits are zeros 2004 if (Literal.getLoBits(32) != 0) { 2005 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2006 "Can't encode literal as exact 64-bit floating-point operand. " 2007 "Low 32-bits will be set to zero"); 2008 } 2009 2010 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2011 setImmKindLiteral(); 2012 return; 2013 } 2014 2015 // We don't allow fp literals in 64-bit integer instructions. It is 2016 // unclear how we should encode them. This case should be checked earlier 2017 // in predicate methods (isLiteralImm()) 2018 llvm_unreachable("fp literal in 64-bit integer instruction."); 2019 2020 case AMDGPU::OPERAND_REG_IMM_INT32: 2021 case AMDGPU::OPERAND_REG_IMM_FP32: 2022 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2023 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2026 case AMDGPU::OPERAND_REG_IMM_INT16: 2027 case AMDGPU::OPERAND_REG_IMM_FP16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2031 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2034 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2036 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2037 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2039 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2040 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2041 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2042 bool lost; 2043 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2044 // Convert literal to single precision 2045 FPLiteral.convert(*getOpFltSemantics(OpTy), 2046 APFloat::rmNearestTiesToEven, &lost); 2047 // We allow precision lost but not overflow or underflow. This should be 2048 // checked earlier in isLiteralImm() 2049 2050 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2051 Inst.addOperand(MCOperand::createImm(ImmVal)); 2052 setImmKindLiteral(); 2053 return; 2054 } 2055 default: 2056 llvm_unreachable("invalid operand size"); 2057 } 2058 2059 return; 2060 } 2061 2062 // We got int literal token. 2063 // Only sign extend inline immediates. 2064 switch (OpTy) { 2065 case AMDGPU::OPERAND_REG_IMM_INT32: 2066 case AMDGPU::OPERAND_REG_IMM_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2069 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2070 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2071 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2072 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2073 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2075 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2076 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2077 if (isSafeTruncation(Val, 32) && 2078 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2079 AsmParser->hasInv2PiInlineImm())) { 2080 Inst.addOperand(MCOperand::createImm(Val)); 2081 setImmKindConst(); 2082 return; 2083 } 2084 2085 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2086 setImmKindLiteral(); 2087 return; 2088 2089 case AMDGPU::OPERAND_REG_IMM_INT64: 2090 case AMDGPU::OPERAND_REG_IMM_FP64: 2091 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2092 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2093 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2094 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2095 Inst.addOperand(MCOperand::createImm(Val)); 2096 setImmKindConst(); 2097 return; 2098 } 2099 2100 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2101 setImmKindLiteral(); 2102 return; 2103 2104 case AMDGPU::OPERAND_REG_IMM_INT16: 2105 case AMDGPU::OPERAND_REG_IMM_FP16: 2106 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2107 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2108 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2109 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2110 if (isSafeTruncation(Val, 16) && 2111 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2112 AsmParser->hasInv2PiInlineImm())) { 2113 Inst.addOperand(MCOperand::createImm(Val)); 2114 setImmKindConst(); 2115 return; 2116 } 2117 2118 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2119 setImmKindLiteral(); 2120 return; 2121 2122 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2126 assert(isSafeTruncation(Val, 16)); 2127 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2128 AsmParser->hasInv2PiInlineImm())); 2129 2130 Inst.addOperand(MCOperand::createImm(Val)); 2131 return; 2132 } 2133 default: 2134 llvm_unreachable("invalid operand size"); 2135 } 2136 } 2137 2138 template <unsigned Bitwidth> 2139 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2140 APInt Literal(64, Imm.Val); 2141 setImmKindNone(); 2142 2143 if (!Imm.IsFPImm) { 2144 // We got int literal token. 2145 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2146 return; 2147 } 2148 2149 bool Lost; 2150 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2151 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2152 APFloat::rmNearestTiesToEven, &Lost); 2153 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2154 } 2155 2156 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2157 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2158 } 2159 2160 static bool isInlineValue(unsigned Reg) { 2161 switch (Reg) { 2162 case AMDGPU::SRC_SHARED_BASE: 2163 case AMDGPU::SRC_SHARED_LIMIT: 2164 case AMDGPU::SRC_PRIVATE_BASE: 2165 case AMDGPU::SRC_PRIVATE_LIMIT: 2166 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2167 return true; 2168 case AMDGPU::SRC_VCCZ: 2169 case AMDGPU::SRC_EXECZ: 2170 case AMDGPU::SRC_SCC: 2171 return true; 2172 case AMDGPU::SGPR_NULL: 2173 return true; 2174 default: 2175 return false; 2176 } 2177 } 2178 2179 bool AMDGPUOperand::isInlineValue() const { 2180 return isRegKind() && ::isInlineValue(getReg()); 2181 } 2182 2183 //===----------------------------------------------------------------------===// 2184 // AsmParser 2185 //===----------------------------------------------------------------------===// 2186 2187 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2188 if (Is == IS_VGPR) { 2189 switch (RegWidth) { 2190 default: return -1; 2191 case 1: return AMDGPU::VGPR_32RegClassID; 2192 case 2: return AMDGPU::VReg_64RegClassID; 2193 case 3: return AMDGPU::VReg_96RegClassID; 2194 case 4: return AMDGPU::VReg_128RegClassID; 2195 case 5: return AMDGPU::VReg_160RegClassID; 2196 case 6: return AMDGPU::VReg_192RegClassID; 2197 case 8: return AMDGPU::VReg_256RegClassID; 2198 case 16: return AMDGPU::VReg_512RegClassID; 2199 case 32: return AMDGPU::VReg_1024RegClassID; 2200 } 2201 } else if (Is == IS_TTMP) { 2202 switch (RegWidth) { 2203 default: return -1; 2204 case 1: return AMDGPU::TTMP_32RegClassID; 2205 case 2: return AMDGPU::TTMP_64RegClassID; 2206 case 4: return AMDGPU::TTMP_128RegClassID; 2207 case 8: return AMDGPU::TTMP_256RegClassID; 2208 case 16: return AMDGPU::TTMP_512RegClassID; 2209 } 2210 } else if (Is == IS_SGPR) { 2211 switch (RegWidth) { 2212 default: return -1; 2213 case 1: return AMDGPU::SGPR_32RegClassID; 2214 case 2: return AMDGPU::SGPR_64RegClassID; 2215 case 3: return AMDGPU::SGPR_96RegClassID; 2216 case 4: return AMDGPU::SGPR_128RegClassID; 2217 case 5: return AMDGPU::SGPR_160RegClassID; 2218 case 6: return AMDGPU::SGPR_192RegClassID; 2219 case 8: return AMDGPU::SGPR_256RegClassID; 2220 case 16: return AMDGPU::SGPR_512RegClassID; 2221 } 2222 } else if (Is == IS_AGPR) { 2223 switch (RegWidth) { 2224 default: return -1; 2225 case 1: return AMDGPU::AGPR_32RegClassID; 2226 case 2: return AMDGPU::AReg_64RegClassID; 2227 case 3: return AMDGPU::AReg_96RegClassID; 2228 case 4: return AMDGPU::AReg_128RegClassID; 2229 case 5: return AMDGPU::AReg_160RegClassID; 2230 case 6: return AMDGPU::AReg_192RegClassID; 2231 case 8: return AMDGPU::AReg_256RegClassID; 2232 case 16: return AMDGPU::AReg_512RegClassID; 2233 case 32: return AMDGPU::AReg_1024RegClassID; 2234 } 2235 } 2236 return -1; 2237 } 2238 2239 static unsigned getSpecialRegForName(StringRef RegName) { 2240 return StringSwitch<unsigned>(RegName) 2241 .Case("exec", AMDGPU::EXEC) 2242 .Case("vcc", AMDGPU::VCC) 2243 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2244 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2245 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2246 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2247 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2248 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2249 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2250 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2251 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2252 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2253 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2254 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2255 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2256 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2257 .Case("m0", AMDGPU::M0) 2258 .Case("vccz", AMDGPU::SRC_VCCZ) 2259 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2260 .Case("execz", AMDGPU::SRC_EXECZ) 2261 .Case("src_execz", AMDGPU::SRC_EXECZ) 2262 .Case("scc", AMDGPU::SRC_SCC) 2263 .Case("src_scc", AMDGPU::SRC_SCC) 2264 .Case("tba", AMDGPU::TBA) 2265 .Case("tma", AMDGPU::TMA) 2266 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2267 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2268 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2269 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2270 .Case("vcc_lo", AMDGPU::VCC_LO) 2271 .Case("vcc_hi", AMDGPU::VCC_HI) 2272 .Case("exec_lo", AMDGPU::EXEC_LO) 2273 .Case("exec_hi", AMDGPU::EXEC_HI) 2274 .Case("tma_lo", AMDGPU::TMA_LO) 2275 .Case("tma_hi", AMDGPU::TMA_HI) 2276 .Case("tba_lo", AMDGPU::TBA_LO) 2277 .Case("tba_hi", AMDGPU::TBA_HI) 2278 .Case("pc", AMDGPU::PC_REG) 2279 .Case("null", AMDGPU::SGPR_NULL) 2280 .Default(AMDGPU::NoRegister); 2281 } 2282 2283 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2284 SMLoc &EndLoc, bool RestoreOnFailure) { 2285 auto R = parseRegister(); 2286 if (!R) return true; 2287 assert(R->isReg()); 2288 RegNo = R->getReg(); 2289 StartLoc = R->getStartLoc(); 2290 EndLoc = R->getEndLoc(); 2291 return false; 2292 } 2293 2294 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2295 SMLoc &EndLoc) { 2296 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2297 } 2298 2299 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2300 SMLoc &StartLoc, 2301 SMLoc &EndLoc) { 2302 bool Result = 2303 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2304 bool PendingErrors = getParser().hasPendingError(); 2305 getParser().clearPendingErrors(); 2306 if (PendingErrors) 2307 return MatchOperand_ParseFail; 2308 if (Result) 2309 return MatchOperand_NoMatch; 2310 return MatchOperand_Success; 2311 } 2312 2313 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2314 RegisterKind RegKind, unsigned Reg1, 2315 SMLoc Loc) { 2316 switch (RegKind) { 2317 case IS_SPECIAL: 2318 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2319 Reg = AMDGPU::EXEC; 2320 RegWidth = 2; 2321 return true; 2322 } 2323 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2324 Reg = AMDGPU::FLAT_SCR; 2325 RegWidth = 2; 2326 return true; 2327 } 2328 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2329 Reg = AMDGPU::XNACK_MASK; 2330 RegWidth = 2; 2331 return true; 2332 } 2333 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2334 Reg = AMDGPU::VCC; 2335 RegWidth = 2; 2336 return true; 2337 } 2338 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2339 Reg = AMDGPU::TBA; 2340 RegWidth = 2; 2341 return true; 2342 } 2343 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2344 Reg = AMDGPU::TMA; 2345 RegWidth = 2; 2346 return true; 2347 } 2348 Error(Loc, "register does not fit in the list"); 2349 return false; 2350 case IS_VGPR: 2351 case IS_SGPR: 2352 case IS_AGPR: 2353 case IS_TTMP: 2354 if (Reg1 != Reg + RegWidth) { 2355 Error(Loc, "registers in a list must have consecutive indices"); 2356 return false; 2357 } 2358 RegWidth++; 2359 return true; 2360 default: 2361 llvm_unreachable("unexpected register kind"); 2362 } 2363 } 2364 2365 struct RegInfo { 2366 StringLiteral Name; 2367 RegisterKind Kind; 2368 }; 2369 2370 static constexpr RegInfo RegularRegisters[] = { 2371 {{"v"}, IS_VGPR}, 2372 {{"s"}, IS_SGPR}, 2373 {{"ttmp"}, IS_TTMP}, 2374 {{"acc"}, IS_AGPR}, 2375 {{"a"}, IS_AGPR}, 2376 }; 2377 2378 static bool isRegularReg(RegisterKind Kind) { 2379 return Kind == IS_VGPR || 2380 Kind == IS_SGPR || 2381 Kind == IS_TTMP || 2382 Kind == IS_AGPR; 2383 } 2384 2385 static const RegInfo* getRegularRegInfo(StringRef Str) { 2386 for (const RegInfo &Reg : RegularRegisters) 2387 if (Str.startswith(Reg.Name)) 2388 return &Reg; 2389 return nullptr; 2390 } 2391 2392 static bool getRegNum(StringRef Str, unsigned& Num) { 2393 return !Str.getAsInteger(10, Num); 2394 } 2395 2396 bool 2397 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2398 const AsmToken &NextToken) const { 2399 2400 // A list of consecutive registers: [s0,s1,s2,s3] 2401 if (Token.is(AsmToken::LBrac)) 2402 return true; 2403 2404 if (!Token.is(AsmToken::Identifier)) 2405 return false; 2406 2407 // A single register like s0 or a range of registers like s[0:1] 2408 2409 StringRef Str = Token.getString(); 2410 const RegInfo *Reg = getRegularRegInfo(Str); 2411 if (Reg) { 2412 StringRef RegName = Reg->Name; 2413 StringRef RegSuffix = Str.substr(RegName.size()); 2414 if (!RegSuffix.empty()) { 2415 unsigned Num; 2416 // A single register with an index: rXX 2417 if (getRegNum(RegSuffix, Num)) 2418 return true; 2419 } else { 2420 // A range of registers: r[XX:YY]. 2421 if (NextToken.is(AsmToken::LBrac)) 2422 return true; 2423 } 2424 } 2425 2426 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2427 } 2428 2429 bool 2430 AMDGPUAsmParser::isRegister() 2431 { 2432 return isRegister(getToken(), peekToken()); 2433 } 2434 2435 unsigned 2436 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2437 unsigned RegNum, 2438 unsigned RegWidth, 2439 SMLoc Loc) { 2440 2441 assert(isRegularReg(RegKind)); 2442 2443 unsigned AlignSize = 1; 2444 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2445 // SGPR and TTMP registers must be aligned. 2446 // Max required alignment is 4 dwords. 2447 AlignSize = std::min(RegWidth, 4u); 2448 } 2449 2450 if (RegNum % AlignSize != 0) { 2451 Error(Loc, "invalid register alignment"); 2452 return AMDGPU::NoRegister; 2453 } 2454 2455 unsigned RegIdx = RegNum / AlignSize; 2456 int RCID = getRegClass(RegKind, RegWidth); 2457 if (RCID == -1) { 2458 Error(Loc, "invalid or unsupported register size"); 2459 return AMDGPU::NoRegister; 2460 } 2461 2462 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2463 const MCRegisterClass RC = TRI->getRegClass(RCID); 2464 if (RegIdx >= RC.getNumRegs()) { 2465 Error(Loc, "register index is out of range"); 2466 return AMDGPU::NoRegister; 2467 } 2468 2469 return RC.getRegister(RegIdx); 2470 } 2471 2472 bool 2473 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2474 int64_t RegLo, RegHi; 2475 if (!skipToken(AsmToken::LBrac, "missing register index")) 2476 return false; 2477 2478 SMLoc FirstIdxLoc = getLoc(); 2479 SMLoc SecondIdxLoc; 2480 2481 if (!parseExpr(RegLo)) 2482 return false; 2483 2484 if (trySkipToken(AsmToken::Colon)) { 2485 SecondIdxLoc = getLoc(); 2486 if (!parseExpr(RegHi)) 2487 return false; 2488 } else { 2489 RegHi = RegLo; 2490 } 2491 2492 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2493 return false; 2494 2495 if (!isUInt<32>(RegLo)) { 2496 Error(FirstIdxLoc, "invalid register index"); 2497 return false; 2498 } 2499 2500 if (!isUInt<32>(RegHi)) { 2501 Error(SecondIdxLoc, "invalid register index"); 2502 return false; 2503 } 2504 2505 if (RegLo > RegHi) { 2506 Error(FirstIdxLoc, "first register index should not exceed second index"); 2507 return false; 2508 } 2509 2510 Num = static_cast<unsigned>(RegLo); 2511 Width = (RegHi - RegLo) + 1; 2512 return true; 2513 } 2514 2515 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2516 unsigned &RegNum, unsigned &RegWidth, 2517 SmallVectorImpl<AsmToken> &Tokens) { 2518 assert(isToken(AsmToken::Identifier)); 2519 unsigned Reg = getSpecialRegForName(getTokenStr()); 2520 if (Reg) { 2521 RegNum = 0; 2522 RegWidth = 1; 2523 RegKind = IS_SPECIAL; 2524 Tokens.push_back(getToken()); 2525 lex(); // skip register name 2526 } 2527 return Reg; 2528 } 2529 2530 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2531 unsigned &RegNum, unsigned &RegWidth, 2532 SmallVectorImpl<AsmToken> &Tokens) { 2533 assert(isToken(AsmToken::Identifier)); 2534 StringRef RegName = getTokenStr(); 2535 auto Loc = getLoc(); 2536 2537 const RegInfo *RI = getRegularRegInfo(RegName); 2538 if (!RI) { 2539 Error(Loc, "invalid register name"); 2540 return AMDGPU::NoRegister; 2541 } 2542 2543 Tokens.push_back(getToken()); 2544 lex(); // skip register name 2545 2546 RegKind = RI->Kind; 2547 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2548 if (!RegSuffix.empty()) { 2549 // Single 32-bit register: vXX. 2550 if (!getRegNum(RegSuffix, RegNum)) { 2551 Error(Loc, "invalid register index"); 2552 return AMDGPU::NoRegister; 2553 } 2554 RegWidth = 1; 2555 } else { 2556 // Range of registers: v[XX:YY]. ":YY" is optional. 2557 if (!ParseRegRange(RegNum, RegWidth)) 2558 return AMDGPU::NoRegister; 2559 } 2560 2561 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2562 } 2563 2564 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2565 unsigned &RegWidth, 2566 SmallVectorImpl<AsmToken> &Tokens) { 2567 unsigned Reg = AMDGPU::NoRegister; 2568 auto ListLoc = getLoc(); 2569 2570 if (!skipToken(AsmToken::LBrac, 2571 "expected a register or a list of registers")) { 2572 return AMDGPU::NoRegister; 2573 } 2574 2575 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2576 2577 auto Loc = getLoc(); 2578 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2579 return AMDGPU::NoRegister; 2580 if (RegWidth != 1) { 2581 Error(Loc, "expected a single 32-bit register"); 2582 return AMDGPU::NoRegister; 2583 } 2584 2585 for (; trySkipToken(AsmToken::Comma); ) { 2586 RegisterKind NextRegKind; 2587 unsigned NextReg, NextRegNum, NextRegWidth; 2588 Loc = getLoc(); 2589 2590 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2591 NextRegNum, NextRegWidth, 2592 Tokens)) { 2593 return AMDGPU::NoRegister; 2594 } 2595 if (NextRegWidth != 1) { 2596 Error(Loc, "expected a single 32-bit register"); 2597 return AMDGPU::NoRegister; 2598 } 2599 if (NextRegKind != RegKind) { 2600 Error(Loc, "registers in a list must be of the same kind"); 2601 return AMDGPU::NoRegister; 2602 } 2603 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2604 return AMDGPU::NoRegister; 2605 } 2606 2607 if (!skipToken(AsmToken::RBrac, 2608 "expected a comma or a closing square bracket")) { 2609 return AMDGPU::NoRegister; 2610 } 2611 2612 if (isRegularReg(RegKind)) 2613 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2614 2615 return Reg; 2616 } 2617 2618 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2619 unsigned &RegNum, unsigned &RegWidth, 2620 SmallVectorImpl<AsmToken> &Tokens) { 2621 auto Loc = getLoc(); 2622 Reg = AMDGPU::NoRegister; 2623 2624 if (isToken(AsmToken::Identifier)) { 2625 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2626 if (Reg == AMDGPU::NoRegister) 2627 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2628 } else { 2629 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2630 } 2631 2632 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2633 if (Reg == AMDGPU::NoRegister) { 2634 assert(Parser.hasPendingError()); 2635 return false; 2636 } 2637 2638 if (!subtargetHasRegister(*TRI, Reg)) { 2639 if (Reg == AMDGPU::SGPR_NULL) { 2640 Error(Loc, "'null' operand is not supported on this GPU"); 2641 } else { 2642 Error(Loc, "register not available on this GPU"); 2643 } 2644 return false; 2645 } 2646 2647 return true; 2648 } 2649 2650 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2651 unsigned &RegNum, unsigned &RegWidth, 2652 bool RestoreOnFailure /*=false*/) { 2653 Reg = AMDGPU::NoRegister; 2654 2655 SmallVector<AsmToken, 1> Tokens; 2656 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2657 if (RestoreOnFailure) { 2658 while (!Tokens.empty()) { 2659 getLexer().UnLex(Tokens.pop_back_val()); 2660 } 2661 } 2662 return true; 2663 } 2664 return false; 2665 } 2666 2667 Optional<StringRef> 2668 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2669 switch (RegKind) { 2670 case IS_VGPR: 2671 return StringRef(".amdgcn.next_free_vgpr"); 2672 case IS_SGPR: 2673 return StringRef(".amdgcn.next_free_sgpr"); 2674 default: 2675 return None; 2676 } 2677 } 2678 2679 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2680 auto SymbolName = getGprCountSymbolName(RegKind); 2681 assert(SymbolName && "initializing invalid register kind"); 2682 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2683 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2684 } 2685 2686 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2687 unsigned DwordRegIndex, 2688 unsigned RegWidth) { 2689 // Symbols are only defined for GCN targets 2690 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2691 return true; 2692 2693 auto SymbolName = getGprCountSymbolName(RegKind); 2694 if (!SymbolName) 2695 return true; 2696 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2697 2698 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2699 int64_t OldCount; 2700 2701 if (!Sym->isVariable()) 2702 return !Error(getLoc(), 2703 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2704 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2705 return !Error( 2706 getLoc(), 2707 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2708 2709 if (OldCount <= NewMax) 2710 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2711 2712 return true; 2713 } 2714 2715 std::unique_ptr<AMDGPUOperand> 2716 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2717 const auto &Tok = getToken(); 2718 SMLoc StartLoc = Tok.getLoc(); 2719 SMLoc EndLoc = Tok.getEndLoc(); 2720 RegisterKind RegKind; 2721 unsigned Reg, RegNum, RegWidth; 2722 2723 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2724 return nullptr; 2725 } 2726 if (isHsaAbiVersion3(&getSTI())) { 2727 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2728 return nullptr; 2729 } else 2730 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2731 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2732 } 2733 2734 OperandMatchResultTy 2735 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2736 // TODO: add syntactic sugar for 1/(2*PI) 2737 2738 assert(!isRegister()); 2739 assert(!isModifier()); 2740 2741 const auto& Tok = getToken(); 2742 const auto& NextTok = peekToken(); 2743 bool IsReal = Tok.is(AsmToken::Real); 2744 SMLoc S = getLoc(); 2745 bool Negate = false; 2746 2747 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2748 lex(); 2749 IsReal = true; 2750 Negate = true; 2751 } 2752 2753 if (IsReal) { 2754 // Floating-point expressions are not supported. 2755 // Can only allow floating-point literals with an 2756 // optional sign. 2757 2758 StringRef Num = getTokenStr(); 2759 lex(); 2760 2761 APFloat RealVal(APFloat::IEEEdouble()); 2762 auto roundMode = APFloat::rmNearestTiesToEven; 2763 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2764 return MatchOperand_ParseFail; 2765 } 2766 if (Negate) 2767 RealVal.changeSign(); 2768 2769 Operands.push_back( 2770 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2771 AMDGPUOperand::ImmTyNone, true)); 2772 2773 return MatchOperand_Success; 2774 2775 } else { 2776 int64_t IntVal; 2777 const MCExpr *Expr; 2778 SMLoc S = getLoc(); 2779 2780 if (HasSP3AbsModifier) { 2781 // This is a workaround for handling expressions 2782 // as arguments of SP3 'abs' modifier, for example: 2783 // |1.0| 2784 // |-1| 2785 // |1+x| 2786 // This syntax is not compatible with syntax of standard 2787 // MC expressions (due to the trailing '|'). 2788 SMLoc EndLoc; 2789 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2790 return MatchOperand_ParseFail; 2791 } else { 2792 if (Parser.parseExpression(Expr)) 2793 return MatchOperand_ParseFail; 2794 } 2795 2796 if (Expr->evaluateAsAbsolute(IntVal)) { 2797 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2798 } else { 2799 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2800 } 2801 2802 return MatchOperand_Success; 2803 } 2804 2805 return MatchOperand_NoMatch; 2806 } 2807 2808 OperandMatchResultTy 2809 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2810 if (!isRegister()) 2811 return MatchOperand_NoMatch; 2812 2813 if (auto R = parseRegister()) { 2814 assert(R->isReg()); 2815 Operands.push_back(std::move(R)); 2816 return MatchOperand_Success; 2817 } 2818 return MatchOperand_ParseFail; 2819 } 2820 2821 OperandMatchResultTy 2822 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2823 auto res = parseReg(Operands); 2824 if (res != MatchOperand_NoMatch) { 2825 return res; 2826 } else if (isModifier()) { 2827 return MatchOperand_NoMatch; 2828 } else { 2829 return parseImm(Operands, HasSP3AbsMod); 2830 } 2831 } 2832 2833 bool 2834 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2835 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2836 const auto &str = Token.getString(); 2837 return str == "abs" || str == "neg" || str == "sext"; 2838 } 2839 return false; 2840 } 2841 2842 bool 2843 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2844 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2845 } 2846 2847 bool 2848 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2849 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2850 } 2851 2852 bool 2853 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2854 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2855 } 2856 2857 // Check if this is an operand modifier or an opcode modifier 2858 // which may look like an expression but it is not. We should 2859 // avoid parsing these modifiers as expressions. Currently 2860 // recognized sequences are: 2861 // |...| 2862 // abs(...) 2863 // neg(...) 2864 // sext(...) 2865 // -reg 2866 // -|...| 2867 // -abs(...) 2868 // name:... 2869 // Note that simple opcode modifiers like 'gds' may be parsed as 2870 // expressions; this is a special case. See getExpressionAsToken. 2871 // 2872 bool 2873 AMDGPUAsmParser::isModifier() { 2874 2875 AsmToken Tok = getToken(); 2876 AsmToken NextToken[2]; 2877 peekTokens(NextToken); 2878 2879 return isOperandModifier(Tok, NextToken[0]) || 2880 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2881 isOpcodeModifierWithVal(Tok, NextToken[0]); 2882 } 2883 2884 // Check if the current token is an SP3 'neg' modifier. 2885 // Currently this modifier is allowed in the following context: 2886 // 2887 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2888 // 2. Before an 'abs' modifier: -abs(...) 2889 // 3. Before an SP3 'abs' modifier: -|...| 2890 // 2891 // In all other cases "-" is handled as a part 2892 // of an expression that follows the sign. 2893 // 2894 // Note: When "-" is followed by an integer literal, 2895 // this is interpreted as integer negation rather 2896 // than a floating-point NEG modifier applied to N. 2897 // Beside being contr-intuitive, such use of floating-point 2898 // NEG modifier would have resulted in different meaning 2899 // of integer literals used with VOP1/2/C and VOP3, 2900 // for example: 2901 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2902 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2903 // Negative fp literals with preceding "-" are 2904 // handled likewise for unifomtity 2905 // 2906 bool 2907 AMDGPUAsmParser::parseSP3NegModifier() { 2908 2909 AsmToken NextToken[2]; 2910 peekTokens(NextToken); 2911 2912 if (isToken(AsmToken::Minus) && 2913 (isRegister(NextToken[0], NextToken[1]) || 2914 NextToken[0].is(AsmToken::Pipe) || 2915 isId(NextToken[0], "abs"))) { 2916 lex(); 2917 return true; 2918 } 2919 2920 return false; 2921 } 2922 2923 OperandMatchResultTy 2924 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2925 bool AllowImm) { 2926 bool Neg, SP3Neg; 2927 bool Abs, SP3Abs; 2928 SMLoc Loc; 2929 2930 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2931 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2932 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2933 return MatchOperand_ParseFail; 2934 } 2935 2936 SP3Neg = parseSP3NegModifier(); 2937 2938 Loc = getLoc(); 2939 Neg = trySkipId("neg"); 2940 if (Neg && SP3Neg) { 2941 Error(Loc, "expected register or immediate"); 2942 return MatchOperand_ParseFail; 2943 } 2944 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2945 return MatchOperand_ParseFail; 2946 2947 Abs = trySkipId("abs"); 2948 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2949 return MatchOperand_ParseFail; 2950 2951 Loc = getLoc(); 2952 SP3Abs = trySkipToken(AsmToken::Pipe); 2953 if (Abs && SP3Abs) { 2954 Error(Loc, "expected register or immediate"); 2955 return MatchOperand_ParseFail; 2956 } 2957 2958 OperandMatchResultTy Res; 2959 if (AllowImm) { 2960 Res = parseRegOrImm(Operands, SP3Abs); 2961 } else { 2962 Res = parseReg(Operands); 2963 } 2964 if (Res != MatchOperand_Success) { 2965 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2966 } 2967 2968 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2969 return MatchOperand_ParseFail; 2970 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2971 return MatchOperand_ParseFail; 2972 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2973 return MatchOperand_ParseFail; 2974 2975 AMDGPUOperand::Modifiers Mods; 2976 Mods.Abs = Abs || SP3Abs; 2977 Mods.Neg = Neg || SP3Neg; 2978 2979 if (Mods.hasFPModifiers()) { 2980 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2981 if (Op.isExpr()) { 2982 Error(Op.getStartLoc(), "expected an absolute expression"); 2983 return MatchOperand_ParseFail; 2984 } 2985 Op.setModifiers(Mods); 2986 } 2987 return MatchOperand_Success; 2988 } 2989 2990 OperandMatchResultTy 2991 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2992 bool AllowImm) { 2993 bool Sext = trySkipId("sext"); 2994 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2995 return MatchOperand_ParseFail; 2996 2997 OperandMatchResultTy Res; 2998 if (AllowImm) { 2999 Res = parseRegOrImm(Operands); 3000 } else { 3001 Res = parseReg(Operands); 3002 } 3003 if (Res != MatchOperand_Success) { 3004 return Sext? MatchOperand_ParseFail : Res; 3005 } 3006 3007 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3008 return MatchOperand_ParseFail; 3009 3010 AMDGPUOperand::Modifiers Mods; 3011 Mods.Sext = Sext; 3012 3013 if (Mods.hasIntModifiers()) { 3014 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3015 if (Op.isExpr()) { 3016 Error(Op.getStartLoc(), "expected an absolute expression"); 3017 return MatchOperand_ParseFail; 3018 } 3019 Op.setModifiers(Mods); 3020 } 3021 3022 return MatchOperand_Success; 3023 } 3024 3025 OperandMatchResultTy 3026 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3027 return parseRegOrImmWithFPInputMods(Operands, false); 3028 } 3029 3030 OperandMatchResultTy 3031 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3032 return parseRegOrImmWithIntInputMods(Operands, false); 3033 } 3034 3035 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3036 auto Loc = getLoc(); 3037 if (trySkipId("off")) { 3038 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3039 AMDGPUOperand::ImmTyOff, false)); 3040 return MatchOperand_Success; 3041 } 3042 3043 if (!isRegister()) 3044 return MatchOperand_NoMatch; 3045 3046 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3047 if (Reg) { 3048 Operands.push_back(std::move(Reg)); 3049 return MatchOperand_Success; 3050 } 3051 3052 return MatchOperand_ParseFail; 3053 3054 } 3055 3056 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3057 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3058 3059 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3060 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3061 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3062 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3063 return Match_InvalidOperand; 3064 3065 if ((TSFlags & SIInstrFlags::VOP3) && 3066 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3067 getForcedEncodingSize() != 64) 3068 return Match_PreferE32; 3069 3070 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3071 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3072 // v_mac_f32/16 allow only dst_sel == DWORD; 3073 auto OpNum = 3074 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3075 const auto &Op = Inst.getOperand(OpNum); 3076 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3077 return Match_InvalidOperand; 3078 } 3079 } 3080 3081 return Match_Success; 3082 } 3083 3084 static ArrayRef<unsigned> getAllVariants() { 3085 static const unsigned Variants[] = { 3086 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3087 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3088 }; 3089 3090 return makeArrayRef(Variants); 3091 } 3092 3093 // What asm variants we should check 3094 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3095 if (getForcedEncodingSize() == 32) { 3096 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3097 return makeArrayRef(Variants); 3098 } 3099 3100 if (isForcedVOP3()) { 3101 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3102 return makeArrayRef(Variants); 3103 } 3104 3105 if (isForcedSDWA()) { 3106 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3107 AMDGPUAsmVariants::SDWA9}; 3108 return makeArrayRef(Variants); 3109 } 3110 3111 if (isForcedDPP()) { 3112 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3113 return makeArrayRef(Variants); 3114 } 3115 3116 return getAllVariants(); 3117 } 3118 3119 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3120 if (getForcedEncodingSize() == 32) 3121 return "e32"; 3122 3123 if (isForcedVOP3()) 3124 return "e64"; 3125 3126 if (isForcedSDWA()) 3127 return "sdwa"; 3128 3129 if (isForcedDPP()) 3130 return "dpp"; 3131 3132 return ""; 3133 } 3134 3135 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3136 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3137 const unsigned Num = Desc.getNumImplicitUses(); 3138 for (unsigned i = 0; i < Num; ++i) { 3139 unsigned Reg = Desc.ImplicitUses[i]; 3140 switch (Reg) { 3141 case AMDGPU::FLAT_SCR: 3142 case AMDGPU::VCC: 3143 case AMDGPU::VCC_LO: 3144 case AMDGPU::VCC_HI: 3145 case AMDGPU::M0: 3146 return Reg; 3147 default: 3148 break; 3149 } 3150 } 3151 return AMDGPU::NoRegister; 3152 } 3153 3154 // NB: This code is correct only when used to check constant 3155 // bus limitations because GFX7 support no f16 inline constants. 3156 // Note that there are no cases when a GFX7 opcode violates 3157 // constant bus limitations due to the use of an f16 constant. 3158 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3159 unsigned OpIdx) const { 3160 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3161 3162 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3163 return false; 3164 } 3165 3166 const MCOperand &MO = Inst.getOperand(OpIdx); 3167 3168 int64_t Val = MO.getImm(); 3169 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3170 3171 switch (OpSize) { // expected operand size 3172 case 8: 3173 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3174 case 4: 3175 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3176 case 2: { 3177 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3178 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3179 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3180 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3181 return AMDGPU::isInlinableIntLiteral(Val); 3182 3183 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3184 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3185 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3186 return AMDGPU::isInlinableIntLiteralV216(Val); 3187 3188 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3189 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3190 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3191 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3192 3193 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3194 } 3195 default: 3196 llvm_unreachable("invalid operand size"); 3197 } 3198 } 3199 3200 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3201 if (!isGFX10Plus()) 3202 return 1; 3203 3204 switch (Opcode) { 3205 // 64-bit shift instructions can use only one scalar value input 3206 case AMDGPU::V_LSHLREV_B64_e64: 3207 case AMDGPU::V_LSHLREV_B64_gfx10: 3208 case AMDGPU::V_LSHRREV_B64_e64: 3209 case AMDGPU::V_LSHRREV_B64_gfx10: 3210 case AMDGPU::V_ASHRREV_I64_e64: 3211 case AMDGPU::V_ASHRREV_I64_gfx10: 3212 case AMDGPU::V_LSHL_B64_e64: 3213 case AMDGPU::V_LSHR_B64_e64: 3214 case AMDGPU::V_ASHR_I64_e64: 3215 return 1; 3216 default: 3217 return 2; 3218 } 3219 } 3220 3221 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3222 const MCOperand &MO = Inst.getOperand(OpIdx); 3223 if (MO.isImm()) { 3224 return !isInlineConstant(Inst, OpIdx); 3225 } else if (MO.isReg()) { 3226 auto Reg = MO.getReg(); 3227 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3228 auto PReg = mc2PseudoReg(Reg); 3229 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3230 } else { 3231 return true; 3232 } 3233 } 3234 3235 bool 3236 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3237 const OperandVector &Operands) { 3238 const unsigned Opcode = Inst.getOpcode(); 3239 const MCInstrDesc &Desc = MII.get(Opcode); 3240 unsigned LastSGPR = AMDGPU::NoRegister; 3241 unsigned ConstantBusUseCount = 0; 3242 unsigned NumLiterals = 0; 3243 unsigned LiteralSize; 3244 3245 if (Desc.TSFlags & 3246 (SIInstrFlags::VOPC | 3247 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3248 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3249 SIInstrFlags::SDWA)) { 3250 // Check special imm operands (used by madmk, etc) 3251 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3252 ++ConstantBusUseCount; 3253 } 3254 3255 SmallDenseSet<unsigned> SGPRsUsed; 3256 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3257 if (SGPRUsed != AMDGPU::NoRegister) { 3258 SGPRsUsed.insert(SGPRUsed); 3259 ++ConstantBusUseCount; 3260 } 3261 3262 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3263 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3264 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3265 3266 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3267 3268 for (int OpIdx : OpIndices) { 3269 if (OpIdx == -1) break; 3270 3271 const MCOperand &MO = Inst.getOperand(OpIdx); 3272 if (usesConstantBus(Inst, OpIdx)) { 3273 if (MO.isReg()) { 3274 LastSGPR = mc2PseudoReg(MO.getReg()); 3275 // Pairs of registers with a partial intersections like these 3276 // s0, s[0:1] 3277 // flat_scratch_lo, flat_scratch 3278 // flat_scratch_lo, flat_scratch_hi 3279 // are theoretically valid but they are disabled anyway. 3280 // Note that this code mimics SIInstrInfo::verifyInstruction 3281 if (!SGPRsUsed.count(LastSGPR)) { 3282 SGPRsUsed.insert(LastSGPR); 3283 ++ConstantBusUseCount; 3284 } 3285 } else { // Expression or a literal 3286 3287 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3288 continue; // special operand like VINTERP attr_chan 3289 3290 // An instruction may use only one literal. 3291 // This has been validated on the previous step. 3292 // See validateVOP3Literal. 3293 // This literal may be used as more than one operand. 3294 // If all these operands are of the same size, 3295 // this literal counts as one scalar value. 3296 // Otherwise it counts as 2 scalar values. 3297 // See "GFX10 Shader Programming", section 3.6.2.3. 3298 3299 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3300 if (Size < 4) Size = 4; 3301 3302 if (NumLiterals == 0) { 3303 NumLiterals = 1; 3304 LiteralSize = Size; 3305 } else if (LiteralSize != Size) { 3306 NumLiterals = 2; 3307 } 3308 } 3309 } 3310 } 3311 } 3312 ConstantBusUseCount += NumLiterals; 3313 3314 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3315 return true; 3316 3317 SMLoc LitLoc = getLitLoc(Operands); 3318 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3319 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3320 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3321 return false; 3322 } 3323 3324 bool 3325 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3326 const OperandVector &Operands) { 3327 const unsigned Opcode = Inst.getOpcode(); 3328 const MCInstrDesc &Desc = MII.get(Opcode); 3329 3330 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3331 if (DstIdx == -1 || 3332 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3333 return true; 3334 } 3335 3336 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3337 3338 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3339 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3340 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3341 3342 assert(DstIdx != -1); 3343 const MCOperand &Dst = Inst.getOperand(DstIdx); 3344 assert(Dst.isReg()); 3345 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3346 3347 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3348 3349 for (int SrcIdx : SrcIndices) { 3350 if (SrcIdx == -1) break; 3351 const MCOperand &Src = Inst.getOperand(SrcIdx); 3352 if (Src.isReg()) { 3353 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3354 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3355 Error(getRegLoc(SrcReg, Operands), 3356 "destination must be different than all sources"); 3357 return false; 3358 } 3359 } 3360 } 3361 3362 return true; 3363 } 3364 3365 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3366 3367 const unsigned Opc = Inst.getOpcode(); 3368 const MCInstrDesc &Desc = MII.get(Opc); 3369 3370 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3371 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3372 assert(ClampIdx != -1); 3373 return Inst.getOperand(ClampIdx).getImm() == 0; 3374 } 3375 3376 return true; 3377 } 3378 3379 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3380 3381 const unsigned Opc = Inst.getOpcode(); 3382 const MCInstrDesc &Desc = MII.get(Opc); 3383 3384 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3385 return true; 3386 3387 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3388 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3389 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3390 3391 assert(VDataIdx != -1); 3392 3393 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3394 return true; 3395 3396 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3397 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3398 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3399 if (DMask == 0) 3400 DMask = 1; 3401 3402 unsigned DataSize = 3403 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3404 if (hasPackedD16()) { 3405 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3406 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3407 DataSize = (DataSize + 1) / 2; 3408 } 3409 3410 return (VDataSize / 4) == DataSize + TFESize; 3411 } 3412 3413 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3414 const unsigned Opc = Inst.getOpcode(); 3415 const MCInstrDesc &Desc = MII.get(Opc); 3416 3417 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3418 return true; 3419 3420 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3421 3422 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3423 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3424 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3425 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3426 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3427 3428 assert(VAddr0Idx != -1); 3429 assert(SrsrcIdx != -1); 3430 assert(SrsrcIdx > VAddr0Idx); 3431 3432 if (DimIdx == -1) 3433 return true; // intersect_ray 3434 3435 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3436 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3437 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3438 unsigned VAddrSize = 3439 IsNSA ? SrsrcIdx - VAddr0Idx 3440 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3441 3442 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3443 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3444 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3445 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3446 if (!IsNSA) { 3447 if (AddrSize > 8) 3448 AddrSize = 16; 3449 else if (AddrSize > 4) 3450 AddrSize = 8; 3451 } 3452 3453 return VAddrSize == AddrSize; 3454 } 3455 3456 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3457 3458 const unsigned Opc = Inst.getOpcode(); 3459 const MCInstrDesc &Desc = MII.get(Opc); 3460 3461 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3462 return true; 3463 if (!Desc.mayLoad() || !Desc.mayStore()) 3464 return true; // Not atomic 3465 3466 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3467 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3468 3469 // This is an incomplete check because image_atomic_cmpswap 3470 // may only use 0x3 and 0xf while other atomic operations 3471 // may use 0x1 and 0x3. However these limitations are 3472 // verified when we check that dmask matches dst size. 3473 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3474 } 3475 3476 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3477 3478 const unsigned Opc = Inst.getOpcode(); 3479 const MCInstrDesc &Desc = MII.get(Opc); 3480 3481 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3482 return true; 3483 3484 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3485 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3486 3487 // GATHER4 instructions use dmask in a different fashion compared to 3488 // other MIMG instructions. The only useful DMASK values are 3489 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3490 // (red,red,red,red) etc.) The ISA document doesn't mention 3491 // this. 3492 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3493 } 3494 3495 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3496 { 3497 switch (Opcode) { 3498 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3499 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3500 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3501 return true; 3502 default: 3503 return false; 3504 } 3505 } 3506 3507 // movrels* opcodes should only allow VGPRS as src0. 3508 // This is specified in .td description for vop1/vop3, 3509 // but sdwa is handled differently. See isSDWAOperand. 3510 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3511 const OperandVector &Operands) { 3512 3513 const unsigned Opc = Inst.getOpcode(); 3514 const MCInstrDesc &Desc = MII.get(Opc); 3515 3516 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3517 return true; 3518 3519 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3520 assert(Src0Idx != -1); 3521 3522 SMLoc ErrLoc; 3523 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3524 if (Src0.isReg()) { 3525 auto Reg = mc2PseudoReg(Src0.getReg()); 3526 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3527 if (!isSGPR(Reg, TRI)) 3528 return true; 3529 ErrLoc = getRegLoc(Reg, Operands); 3530 } else { 3531 ErrLoc = getConstLoc(Operands); 3532 } 3533 3534 Error(ErrLoc, "source operand must be a VGPR"); 3535 return false; 3536 } 3537 3538 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3539 const OperandVector &Operands) { 3540 3541 const unsigned Opc = Inst.getOpcode(); 3542 3543 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3544 return true; 3545 3546 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3547 assert(Src0Idx != -1); 3548 3549 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3550 if (!Src0.isReg()) 3551 return true; 3552 3553 auto Reg = mc2PseudoReg(Src0.getReg()); 3554 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3555 if (isSGPR(Reg, TRI)) { 3556 Error(getRegLoc(Reg, Operands), 3557 "source operand must be either a VGPR or an inline constant"); 3558 return false; 3559 } 3560 3561 return true; 3562 } 3563 3564 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3565 switch (Inst.getOpcode()) { 3566 default: 3567 return true; 3568 case V_DIV_SCALE_F32_gfx6_gfx7: 3569 case V_DIV_SCALE_F32_vi: 3570 case V_DIV_SCALE_F32_gfx10: 3571 case V_DIV_SCALE_F64_gfx6_gfx7: 3572 case V_DIV_SCALE_F64_vi: 3573 case V_DIV_SCALE_F64_gfx10: 3574 break; 3575 } 3576 3577 // TODO: Check that src0 = src1 or src2. 3578 3579 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3580 AMDGPU::OpName::src2_modifiers, 3581 AMDGPU::OpName::src2_modifiers}) { 3582 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3583 .getImm() & 3584 SISrcMods::ABS) { 3585 return false; 3586 } 3587 } 3588 3589 return true; 3590 } 3591 3592 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3593 3594 const unsigned Opc = Inst.getOpcode(); 3595 const MCInstrDesc &Desc = MII.get(Opc); 3596 3597 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3598 return true; 3599 3600 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3601 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3602 if (isCI() || isSI()) 3603 return false; 3604 } 3605 3606 return true; 3607 } 3608 3609 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3610 const unsigned Opc = Inst.getOpcode(); 3611 const MCInstrDesc &Desc = MII.get(Opc); 3612 3613 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3614 return true; 3615 3616 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3617 if (DimIdx < 0) 3618 return true; 3619 3620 long Imm = Inst.getOperand(DimIdx).getImm(); 3621 if (Imm < 0 || Imm >= 8) 3622 return false; 3623 3624 return true; 3625 } 3626 3627 static bool IsRevOpcode(const unsigned Opcode) 3628 { 3629 switch (Opcode) { 3630 case AMDGPU::V_SUBREV_F32_e32: 3631 case AMDGPU::V_SUBREV_F32_e64: 3632 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3633 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3634 case AMDGPU::V_SUBREV_F32_e32_vi: 3635 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3636 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3637 case AMDGPU::V_SUBREV_F32_e64_vi: 3638 3639 case AMDGPU::V_SUBREV_CO_U32_e32: 3640 case AMDGPU::V_SUBREV_CO_U32_e64: 3641 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3642 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3643 3644 case AMDGPU::V_SUBBREV_U32_e32: 3645 case AMDGPU::V_SUBBREV_U32_e64: 3646 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3647 case AMDGPU::V_SUBBREV_U32_e32_vi: 3648 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3649 case AMDGPU::V_SUBBREV_U32_e64_vi: 3650 3651 case AMDGPU::V_SUBREV_U32_e32: 3652 case AMDGPU::V_SUBREV_U32_e64: 3653 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3654 case AMDGPU::V_SUBREV_U32_e32_vi: 3655 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3656 case AMDGPU::V_SUBREV_U32_e64_vi: 3657 3658 case AMDGPU::V_SUBREV_F16_e32: 3659 case AMDGPU::V_SUBREV_F16_e64: 3660 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3661 case AMDGPU::V_SUBREV_F16_e32_vi: 3662 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3663 case AMDGPU::V_SUBREV_F16_e64_vi: 3664 3665 case AMDGPU::V_SUBREV_U16_e32: 3666 case AMDGPU::V_SUBREV_U16_e64: 3667 case AMDGPU::V_SUBREV_U16_e32_vi: 3668 case AMDGPU::V_SUBREV_U16_e64_vi: 3669 3670 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3671 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3672 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3673 3674 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3675 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3676 3677 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3678 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3679 3680 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3681 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3682 3683 case AMDGPU::V_LSHRREV_B32_e32: 3684 case AMDGPU::V_LSHRREV_B32_e64: 3685 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3686 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3687 case AMDGPU::V_LSHRREV_B32_e32_vi: 3688 case AMDGPU::V_LSHRREV_B32_e64_vi: 3689 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3690 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3691 3692 case AMDGPU::V_ASHRREV_I32_e32: 3693 case AMDGPU::V_ASHRREV_I32_e64: 3694 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3695 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3696 case AMDGPU::V_ASHRREV_I32_e32_vi: 3697 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3698 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3699 case AMDGPU::V_ASHRREV_I32_e64_vi: 3700 3701 case AMDGPU::V_LSHLREV_B32_e32: 3702 case AMDGPU::V_LSHLREV_B32_e64: 3703 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3704 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3705 case AMDGPU::V_LSHLREV_B32_e32_vi: 3706 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3707 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3708 case AMDGPU::V_LSHLREV_B32_e64_vi: 3709 3710 case AMDGPU::V_LSHLREV_B16_e32: 3711 case AMDGPU::V_LSHLREV_B16_e64: 3712 case AMDGPU::V_LSHLREV_B16_e32_vi: 3713 case AMDGPU::V_LSHLREV_B16_e64_vi: 3714 case AMDGPU::V_LSHLREV_B16_gfx10: 3715 3716 case AMDGPU::V_LSHRREV_B16_e32: 3717 case AMDGPU::V_LSHRREV_B16_e64: 3718 case AMDGPU::V_LSHRREV_B16_e32_vi: 3719 case AMDGPU::V_LSHRREV_B16_e64_vi: 3720 case AMDGPU::V_LSHRREV_B16_gfx10: 3721 3722 case AMDGPU::V_ASHRREV_I16_e32: 3723 case AMDGPU::V_ASHRREV_I16_e64: 3724 case AMDGPU::V_ASHRREV_I16_e32_vi: 3725 case AMDGPU::V_ASHRREV_I16_e64_vi: 3726 case AMDGPU::V_ASHRREV_I16_gfx10: 3727 3728 case AMDGPU::V_LSHLREV_B64_e64: 3729 case AMDGPU::V_LSHLREV_B64_gfx10: 3730 case AMDGPU::V_LSHLREV_B64_vi: 3731 3732 case AMDGPU::V_LSHRREV_B64_e64: 3733 case AMDGPU::V_LSHRREV_B64_gfx10: 3734 case AMDGPU::V_LSHRREV_B64_vi: 3735 3736 case AMDGPU::V_ASHRREV_I64_e64: 3737 case AMDGPU::V_ASHRREV_I64_gfx10: 3738 case AMDGPU::V_ASHRREV_I64_vi: 3739 3740 case AMDGPU::V_PK_LSHLREV_B16: 3741 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3742 case AMDGPU::V_PK_LSHLREV_B16_vi: 3743 3744 case AMDGPU::V_PK_LSHRREV_B16: 3745 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3746 case AMDGPU::V_PK_LSHRREV_B16_vi: 3747 case AMDGPU::V_PK_ASHRREV_I16: 3748 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3749 case AMDGPU::V_PK_ASHRREV_I16_vi: 3750 return true; 3751 default: 3752 return false; 3753 } 3754 } 3755 3756 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3757 3758 using namespace SIInstrFlags; 3759 const unsigned Opcode = Inst.getOpcode(); 3760 const MCInstrDesc &Desc = MII.get(Opcode); 3761 3762 // lds_direct register is defined so that it can be used 3763 // with 9-bit operands only. Ignore encodings which do not accept these. 3764 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3765 return true; 3766 3767 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3768 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3769 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3770 3771 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3772 3773 // lds_direct cannot be specified as either src1 or src2. 3774 for (int SrcIdx : SrcIndices) { 3775 if (SrcIdx == -1) break; 3776 const MCOperand &Src = Inst.getOperand(SrcIdx); 3777 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3778 return false; 3779 } 3780 } 3781 3782 if (Src0Idx == -1) 3783 return true; 3784 3785 const MCOperand &Src = Inst.getOperand(Src0Idx); 3786 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3787 return true; 3788 3789 // lds_direct is specified as src0. Check additional limitations. 3790 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3791 } 3792 3793 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3794 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3795 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3796 if (Op.isFlatOffset()) 3797 return Op.getStartLoc(); 3798 } 3799 return getLoc(); 3800 } 3801 3802 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3803 const OperandVector &Operands) { 3804 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3805 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3806 return true; 3807 3808 auto Opcode = Inst.getOpcode(); 3809 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3810 assert(OpNum != -1); 3811 3812 const auto &Op = Inst.getOperand(OpNum); 3813 if (!hasFlatOffsets() && Op.getImm() != 0) { 3814 Error(getFlatOffsetLoc(Operands), 3815 "flat offset modifier is not supported on this GPU"); 3816 return false; 3817 } 3818 3819 // For FLAT segment the offset must be positive; 3820 // MSB is ignored and forced to zero. 3821 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3822 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3823 if (!isIntN(OffsetSize, Op.getImm())) { 3824 Error(getFlatOffsetLoc(Operands), 3825 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3826 return false; 3827 } 3828 } else { 3829 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3830 if (!isUIntN(OffsetSize, Op.getImm())) { 3831 Error(getFlatOffsetLoc(Operands), 3832 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3833 return false; 3834 } 3835 } 3836 3837 return true; 3838 } 3839 3840 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3841 // Start with second operand because SMEM Offset cannot be dst or src0. 3842 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3843 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3844 if (Op.isSMEMOffset()) 3845 return Op.getStartLoc(); 3846 } 3847 return getLoc(); 3848 } 3849 3850 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3851 const OperandVector &Operands) { 3852 if (isCI() || isSI()) 3853 return true; 3854 3855 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3856 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3857 return true; 3858 3859 auto Opcode = Inst.getOpcode(); 3860 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3861 if (OpNum == -1) 3862 return true; 3863 3864 const auto &Op = Inst.getOperand(OpNum); 3865 if (!Op.isImm()) 3866 return true; 3867 3868 uint64_t Offset = Op.getImm(); 3869 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3870 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3871 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3872 return true; 3873 3874 Error(getSMEMOffsetLoc(Operands), 3875 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3876 "expected a 21-bit signed offset"); 3877 3878 return false; 3879 } 3880 3881 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3882 unsigned Opcode = Inst.getOpcode(); 3883 const MCInstrDesc &Desc = MII.get(Opcode); 3884 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3885 return true; 3886 3887 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3888 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3889 3890 const int OpIndices[] = { Src0Idx, Src1Idx }; 3891 3892 unsigned NumExprs = 0; 3893 unsigned NumLiterals = 0; 3894 uint32_t LiteralValue; 3895 3896 for (int OpIdx : OpIndices) { 3897 if (OpIdx == -1) break; 3898 3899 const MCOperand &MO = Inst.getOperand(OpIdx); 3900 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3901 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3902 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3903 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3904 if (NumLiterals == 0 || LiteralValue != Value) { 3905 LiteralValue = Value; 3906 ++NumLiterals; 3907 } 3908 } else if (MO.isExpr()) { 3909 ++NumExprs; 3910 } 3911 } 3912 } 3913 3914 return NumLiterals + NumExprs <= 1; 3915 } 3916 3917 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3918 const unsigned Opc = Inst.getOpcode(); 3919 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3920 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3921 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3922 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3923 3924 if (OpSel & ~3) 3925 return false; 3926 } 3927 return true; 3928 } 3929 3930 // Check if VCC register matches wavefront size 3931 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3932 auto FB = getFeatureBits(); 3933 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3934 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3935 } 3936 3937 // VOP3 literal is only allowed in GFX10+ and only one can be used 3938 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3939 const OperandVector &Operands) { 3940 unsigned Opcode = Inst.getOpcode(); 3941 const MCInstrDesc &Desc = MII.get(Opcode); 3942 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3943 return true; 3944 3945 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3946 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3947 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3948 3949 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3950 3951 unsigned NumExprs = 0; 3952 unsigned NumLiterals = 0; 3953 uint32_t LiteralValue; 3954 3955 for (int OpIdx : OpIndices) { 3956 if (OpIdx == -1) break; 3957 3958 const MCOperand &MO = Inst.getOperand(OpIdx); 3959 if (!MO.isImm() && !MO.isExpr()) 3960 continue; 3961 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3962 continue; 3963 3964 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3965 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3966 Error(getConstLoc(Operands), 3967 "inline constants are not allowed for this operand"); 3968 return false; 3969 } 3970 3971 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3972 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3973 if (NumLiterals == 0 || LiteralValue != Value) { 3974 LiteralValue = Value; 3975 ++NumLiterals; 3976 } 3977 } else if (MO.isExpr()) { 3978 ++NumExprs; 3979 } 3980 } 3981 NumLiterals += NumExprs; 3982 3983 if (!NumLiterals) 3984 return true; 3985 3986 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3987 Error(getLitLoc(Operands), "literal operands are not supported"); 3988 return false; 3989 } 3990 3991 if (NumLiterals > 1) { 3992 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3993 return false; 3994 } 3995 3996 return true; 3997 } 3998 3999 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4000 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4001 const MCRegisterInfo *MRI) { 4002 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4003 if (OpIdx < 0) 4004 return -1; 4005 4006 const MCOperand &Op = Inst.getOperand(OpIdx); 4007 if (!Op.isReg()) 4008 return -1; 4009 4010 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4011 auto Reg = Sub ? Sub : Op.getReg(); 4012 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4013 return AGRP32.contains(Reg) ? 1 : 0; 4014 } 4015 4016 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4017 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4018 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4019 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4020 SIInstrFlags::DS)) == 0) 4021 return true; 4022 4023 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4024 : AMDGPU::OpName::vdata; 4025 4026 const MCRegisterInfo *MRI = getMRI(); 4027 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4028 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4029 4030 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4031 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4032 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4033 return false; 4034 } 4035 4036 auto FB = getFeatureBits(); 4037 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4038 if (DataAreg < 0 || DstAreg < 0) 4039 return true; 4040 return DstAreg == DataAreg; 4041 } 4042 4043 return DstAreg < 1 && DataAreg < 1; 4044 } 4045 4046 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4047 auto FB = getFeatureBits(); 4048 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4049 return true; 4050 4051 const MCRegisterInfo *MRI = getMRI(); 4052 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4053 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4054 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4055 const MCOperand &Op = Inst.getOperand(I); 4056 if (!Op.isReg()) 4057 continue; 4058 4059 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4060 if (!Sub) 4061 continue; 4062 4063 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4064 return false; 4065 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4066 return false; 4067 } 4068 4069 return true; 4070 } 4071 4072 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4073 const OperandVector &Operands, 4074 const SMLoc &IDLoc) { 4075 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4076 AMDGPU::OpName::glc1); 4077 if (GLCPos != -1) { 4078 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 4079 // in the asm string, and the default value means it is not present. 4080 if (Inst.getOperand(GLCPos).getImm() == -1) { 4081 Error(IDLoc, "instruction must use glc"); 4082 return false; 4083 } 4084 } 4085 4086 return true; 4087 } 4088 4089 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4090 const SMLoc &IDLoc, 4091 const OperandVector &Operands) { 4092 if (!validateLdsDirect(Inst)) { 4093 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 4094 "invalid use of lds_direct"); 4095 return false; 4096 } 4097 if (!validateSOPLiteral(Inst)) { 4098 Error(getLitLoc(Operands), 4099 "only one literal operand is allowed"); 4100 return false; 4101 } 4102 if (!validateVOP3Literal(Inst, Operands)) { 4103 return false; 4104 } 4105 if (!validateConstantBusLimitations(Inst, Operands)) { 4106 return false; 4107 } 4108 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4109 return false; 4110 } 4111 if (!validateIntClampSupported(Inst)) { 4112 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4113 "integer clamping is not supported on this GPU"); 4114 return false; 4115 } 4116 if (!validateOpSel(Inst)) { 4117 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4118 "invalid op_sel operand"); 4119 return false; 4120 } 4121 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4122 if (!validateMIMGD16(Inst)) { 4123 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4124 "d16 modifier is not supported on this GPU"); 4125 return false; 4126 } 4127 if (!validateMIMGDim(Inst)) { 4128 Error(IDLoc, "dim modifier is required on this GPU"); 4129 return false; 4130 } 4131 if (!validateMIMGDataSize(Inst)) { 4132 Error(IDLoc, 4133 "image data size does not match dmask and tfe"); 4134 return false; 4135 } 4136 if (!validateMIMGAddrSize(Inst)) { 4137 Error(IDLoc, 4138 "image address size does not match dim and a16"); 4139 return false; 4140 } 4141 if (!validateMIMGAtomicDMask(Inst)) { 4142 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4143 "invalid atomic image dmask"); 4144 return false; 4145 } 4146 if (!validateMIMGGatherDMask(Inst)) { 4147 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4148 "invalid image_gather dmask: only one bit must be set"); 4149 return false; 4150 } 4151 if (!validateMovrels(Inst, Operands)) { 4152 return false; 4153 } 4154 if (!validateFlatOffset(Inst, Operands)) { 4155 return false; 4156 } 4157 if (!validateSMEMOffset(Inst, Operands)) { 4158 return false; 4159 } 4160 if (!validateMAIAccWrite(Inst, Operands)) { 4161 return false; 4162 } 4163 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4164 return false; 4165 } 4166 4167 if (!validateAGPRLdSt(Inst)) { 4168 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4169 ? "invalid register class: data and dst should be all VGPR or AGPR" 4170 : "invalid register class: agpr loads and stores not supported on this GPU" 4171 ); 4172 return false; 4173 } 4174 if (!validateVGPRAlign(Inst)) { 4175 Error(IDLoc, 4176 "invalid register class: vgpr tuples must be 64 bit aligned"); 4177 return false; 4178 } 4179 4180 if (!validateDivScale(Inst)) { 4181 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4182 return false; 4183 } 4184 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4185 return false; 4186 } 4187 4188 return true; 4189 } 4190 4191 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4192 const FeatureBitset &FBS, 4193 unsigned VariantID = 0); 4194 4195 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4196 const FeatureBitset &AvailableFeatures, 4197 unsigned VariantID); 4198 4199 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4200 const FeatureBitset &FBS) { 4201 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4202 } 4203 4204 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4205 const FeatureBitset &FBS, 4206 ArrayRef<unsigned> Variants) { 4207 for (auto Variant : Variants) { 4208 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4209 return true; 4210 } 4211 4212 return false; 4213 } 4214 4215 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4216 const SMLoc &IDLoc) { 4217 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4218 4219 // Check if requested instruction variant is supported. 4220 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4221 return false; 4222 4223 // This instruction is not supported. 4224 // Clear any other pending errors because they are no longer relevant. 4225 getParser().clearPendingErrors(); 4226 4227 // Requested instruction variant is not supported. 4228 // Check if any other variants are supported. 4229 StringRef VariantName = getMatchedVariantName(); 4230 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4231 return Error(IDLoc, 4232 Twine(VariantName, 4233 " variant of this instruction is not supported")); 4234 } 4235 4236 // Finally check if this instruction is supported on any other GPU. 4237 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4238 return Error(IDLoc, "instruction not supported on this GPU"); 4239 } 4240 4241 // Instruction not supported on any GPU. Probably a typo. 4242 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4243 return Error(IDLoc, "invalid instruction" + Suggestion); 4244 } 4245 4246 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4247 OperandVector &Operands, 4248 MCStreamer &Out, 4249 uint64_t &ErrorInfo, 4250 bool MatchingInlineAsm) { 4251 MCInst Inst; 4252 unsigned Result = Match_Success; 4253 for (auto Variant : getMatchedVariants()) { 4254 uint64_t EI; 4255 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4256 Variant); 4257 // We order match statuses from least to most specific. We use most specific 4258 // status as resulting 4259 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4260 if ((R == Match_Success) || 4261 (R == Match_PreferE32) || 4262 (R == Match_MissingFeature && Result != Match_PreferE32) || 4263 (R == Match_InvalidOperand && Result != Match_MissingFeature 4264 && Result != Match_PreferE32) || 4265 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4266 && Result != Match_MissingFeature 4267 && Result != Match_PreferE32)) { 4268 Result = R; 4269 ErrorInfo = EI; 4270 } 4271 if (R == Match_Success) 4272 break; 4273 } 4274 4275 if (Result == Match_Success) { 4276 if (!validateInstruction(Inst, IDLoc, Operands)) { 4277 return true; 4278 } 4279 Inst.setLoc(IDLoc); 4280 Out.emitInstruction(Inst, getSTI()); 4281 return false; 4282 } 4283 4284 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4285 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4286 return true; 4287 } 4288 4289 switch (Result) { 4290 default: break; 4291 case Match_MissingFeature: 4292 // It has been verified that the specified instruction 4293 // mnemonic is valid. A match was found but it requires 4294 // features which are not supported on this GPU. 4295 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4296 4297 case Match_InvalidOperand: { 4298 SMLoc ErrorLoc = IDLoc; 4299 if (ErrorInfo != ~0ULL) { 4300 if (ErrorInfo >= Operands.size()) { 4301 return Error(IDLoc, "too few operands for instruction"); 4302 } 4303 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4304 if (ErrorLoc == SMLoc()) 4305 ErrorLoc = IDLoc; 4306 } 4307 return Error(ErrorLoc, "invalid operand for instruction"); 4308 } 4309 4310 case Match_PreferE32: 4311 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4312 "should be encoded as e32"); 4313 case Match_MnemonicFail: 4314 llvm_unreachable("Invalid instructions should have been handled already"); 4315 } 4316 llvm_unreachable("Implement any new match types added!"); 4317 } 4318 4319 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4320 int64_t Tmp = -1; 4321 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4322 return true; 4323 } 4324 if (getParser().parseAbsoluteExpression(Tmp)) { 4325 return true; 4326 } 4327 Ret = static_cast<uint32_t>(Tmp); 4328 return false; 4329 } 4330 4331 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4332 uint32_t &Minor) { 4333 if (ParseAsAbsoluteExpression(Major)) 4334 return TokError("invalid major version"); 4335 4336 if (!trySkipToken(AsmToken::Comma)) 4337 return TokError("minor version number required, comma expected"); 4338 4339 if (ParseAsAbsoluteExpression(Minor)) 4340 return TokError("invalid minor version"); 4341 4342 return false; 4343 } 4344 4345 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4346 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4347 return TokError("directive only supported for amdgcn architecture"); 4348 4349 std::string Target; 4350 4351 SMLoc TargetStart = getLoc(); 4352 if (getParser().parseEscapedString(Target)) 4353 return true; 4354 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4355 4356 std::string ExpectedTarget; 4357 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4358 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4359 4360 if (Target != ExpectedTargetOS.str()) 4361 return Error(TargetRange.Start, "target must match options", TargetRange); 4362 4363 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4364 return false; 4365 } 4366 4367 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4368 return Error(Range.Start, "value out of range", Range); 4369 } 4370 4371 bool AMDGPUAsmParser::calculateGPRBlocks( 4372 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4373 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4374 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4375 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4376 // TODO(scott.linder): These calculations are duplicated from 4377 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4378 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4379 4380 unsigned NumVGPRs = NextFreeVGPR; 4381 unsigned NumSGPRs = NextFreeSGPR; 4382 4383 if (Version.Major >= 10) 4384 NumSGPRs = 0; 4385 else { 4386 unsigned MaxAddressableNumSGPRs = 4387 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4388 4389 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4390 NumSGPRs > MaxAddressableNumSGPRs) 4391 return OutOfRangeError(SGPRRange); 4392 4393 NumSGPRs += 4394 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4395 4396 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4397 NumSGPRs > MaxAddressableNumSGPRs) 4398 return OutOfRangeError(SGPRRange); 4399 4400 if (Features.test(FeatureSGPRInitBug)) 4401 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4402 } 4403 4404 VGPRBlocks = 4405 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4406 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4407 4408 return false; 4409 } 4410 4411 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4412 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4413 return TokError("directive only supported for amdgcn architecture"); 4414 4415 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4416 return TokError("directive only supported for amdhsa OS"); 4417 4418 StringRef KernelName; 4419 if (getParser().parseIdentifier(KernelName)) 4420 return true; 4421 4422 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4423 4424 StringSet<> Seen; 4425 4426 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4427 4428 SMRange VGPRRange; 4429 uint64_t NextFreeVGPR = 0; 4430 uint64_t AccumOffset = 0; 4431 SMRange SGPRRange; 4432 uint64_t NextFreeSGPR = 0; 4433 unsigned UserSGPRCount = 0; 4434 bool ReserveVCC = true; 4435 bool ReserveFlatScr = true; 4436 bool ReserveXNACK = hasXNACK(); 4437 Optional<bool> EnableWavefrontSize32; 4438 4439 while (true) { 4440 while (trySkipToken(AsmToken::EndOfStatement)); 4441 4442 StringRef ID; 4443 SMRange IDRange = getTok().getLocRange(); 4444 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4445 return true; 4446 4447 if (ID == ".end_amdhsa_kernel") 4448 break; 4449 4450 if (Seen.find(ID) != Seen.end()) 4451 return TokError(".amdhsa_ directives cannot be repeated"); 4452 Seen.insert(ID); 4453 4454 SMLoc ValStart = getLoc(); 4455 int64_t IVal; 4456 if (getParser().parseAbsoluteExpression(IVal)) 4457 return true; 4458 SMLoc ValEnd = getLoc(); 4459 SMRange ValRange = SMRange(ValStart, ValEnd); 4460 4461 if (IVal < 0) 4462 return OutOfRangeError(ValRange); 4463 4464 uint64_t Val = IVal; 4465 4466 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4467 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4468 return OutOfRangeError(RANGE); \ 4469 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4470 4471 if (ID == ".amdhsa_group_segment_fixed_size") { 4472 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4473 return OutOfRangeError(ValRange); 4474 KD.group_segment_fixed_size = Val; 4475 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4476 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4477 return OutOfRangeError(ValRange); 4478 KD.private_segment_fixed_size = Val; 4479 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4480 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4481 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4482 Val, ValRange); 4483 if (Val) 4484 UserSGPRCount += 4; 4485 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4486 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4487 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4488 ValRange); 4489 if (Val) 4490 UserSGPRCount += 2; 4491 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4492 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4493 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4494 ValRange); 4495 if (Val) 4496 UserSGPRCount += 2; 4497 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4498 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4499 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4500 Val, ValRange); 4501 if (Val) 4502 UserSGPRCount += 2; 4503 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4504 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4505 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4506 ValRange); 4507 if (Val) 4508 UserSGPRCount += 2; 4509 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4510 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4511 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4512 ValRange); 4513 if (Val) 4514 UserSGPRCount += 2; 4515 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4516 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4517 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4518 Val, ValRange); 4519 if (Val) 4520 UserSGPRCount += 1; 4521 } else if (ID == ".amdhsa_wavefront_size32") { 4522 if (IVersion.Major < 10) 4523 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4524 EnableWavefrontSize32 = Val; 4525 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4526 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4527 Val, ValRange); 4528 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4529 PARSE_BITS_ENTRY( 4530 KD.compute_pgm_rsrc2, 4531 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4532 ValRange); 4533 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4534 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4535 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4536 ValRange); 4537 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4538 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4539 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4540 ValRange); 4541 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4542 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4543 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4544 ValRange); 4545 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4546 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4547 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4548 ValRange); 4549 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4550 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4551 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4552 ValRange); 4553 } else if (ID == ".amdhsa_next_free_vgpr") { 4554 VGPRRange = ValRange; 4555 NextFreeVGPR = Val; 4556 } else if (ID == ".amdhsa_next_free_sgpr") { 4557 SGPRRange = ValRange; 4558 NextFreeSGPR = Val; 4559 } else if (ID == ".amdhsa_accum_offset") { 4560 if (!isGFX90A()) 4561 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4562 AccumOffset = Val; 4563 } else if (ID == ".amdhsa_reserve_vcc") { 4564 if (!isUInt<1>(Val)) 4565 return OutOfRangeError(ValRange); 4566 ReserveVCC = Val; 4567 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4568 if (IVersion.Major < 7) 4569 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4570 if (!isUInt<1>(Val)) 4571 return OutOfRangeError(ValRange); 4572 ReserveFlatScr = Val; 4573 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4574 if (IVersion.Major < 8) 4575 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4576 if (!isUInt<1>(Val)) 4577 return OutOfRangeError(ValRange); 4578 ReserveXNACK = Val; 4579 } else if (ID == ".amdhsa_float_round_mode_32") { 4580 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4581 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4582 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4583 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4584 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4585 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4586 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4587 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4588 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4589 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4590 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4591 ValRange); 4592 } else if (ID == ".amdhsa_dx10_clamp") { 4593 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4594 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4595 } else if (ID == ".amdhsa_ieee_mode") { 4596 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4597 Val, ValRange); 4598 } else if (ID == ".amdhsa_fp16_overflow") { 4599 if (IVersion.Major < 9) 4600 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4601 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4602 ValRange); 4603 } else if (ID == ".amdhsa_tg_split") { 4604 if (!isGFX90A()) 4605 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4606 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4607 ValRange); 4608 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4609 if (IVersion.Major < 10) 4610 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4611 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4612 ValRange); 4613 } else if (ID == ".amdhsa_memory_ordered") { 4614 if (IVersion.Major < 10) 4615 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4616 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4617 ValRange); 4618 } else if (ID == ".amdhsa_forward_progress") { 4619 if (IVersion.Major < 10) 4620 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4621 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4622 ValRange); 4623 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4624 PARSE_BITS_ENTRY( 4625 KD.compute_pgm_rsrc2, 4626 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4627 ValRange); 4628 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4629 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4630 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4631 Val, ValRange); 4632 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4633 PARSE_BITS_ENTRY( 4634 KD.compute_pgm_rsrc2, 4635 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4636 ValRange); 4637 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4639 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4640 Val, ValRange); 4641 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4642 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4643 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4644 Val, ValRange); 4645 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4646 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4647 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4648 Val, ValRange); 4649 } else if (ID == ".amdhsa_exception_int_div_zero") { 4650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4651 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4652 Val, ValRange); 4653 } else { 4654 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4655 } 4656 4657 #undef PARSE_BITS_ENTRY 4658 } 4659 4660 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4661 return TokError(".amdhsa_next_free_vgpr directive is required"); 4662 4663 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4664 return TokError(".amdhsa_next_free_sgpr directive is required"); 4665 4666 unsigned VGPRBlocks; 4667 unsigned SGPRBlocks; 4668 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4669 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4670 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4671 SGPRBlocks)) 4672 return true; 4673 4674 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4675 VGPRBlocks)) 4676 return OutOfRangeError(VGPRRange); 4677 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4678 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4679 4680 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4681 SGPRBlocks)) 4682 return OutOfRangeError(SGPRRange); 4683 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4684 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4685 SGPRBlocks); 4686 4687 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4688 return TokError("too many user SGPRs enabled"); 4689 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4690 UserSGPRCount); 4691 4692 if (isGFX90A()) { 4693 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4694 return TokError(".amdhsa_accum_offset directive is required"); 4695 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4696 return TokError("accum_offset should be in range [4..256] in " 4697 "increments of 4"); 4698 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4699 return TokError("accum_offset exceeds total VGPR allocation"); 4700 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4701 (AccumOffset / 4 - 1)); 4702 } 4703 4704 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4705 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4706 ReserveFlatScr, ReserveXNACK); 4707 return false; 4708 } 4709 4710 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4711 uint32_t Major; 4712 uint32_t Minor; 4713 4714 if (ParseDirectiveMajorMinor(Major, Minor)) 4715 return true; 4716 4717 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4718 return false; 4719 } 4720 4721 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4722 uint32_t Major; 4723 uint32_t Minor; 4724 uint32_t Stepping; 4725 StringRef VendorName; 4726 StringRef ArchName; 4727 4728 // If this directive has no arguments, then use the ISA version for the 4729 // targeted GPU. 4730 if (isToken(AsmToken::EndOfStatement)) { 4731 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4732 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4733 ISA.Stepping, 4734 "AMD", "AMDGPU"); 4735 return false; 4736 } 4737 4738 if (ParseDirectiveMajorMinor(Major, Minor)) 4739 return true; 4740 4741 if (!trySkipToken(AsmToken::Comma)) 4742 return TokError("stepping version number required, comma expected"); 4743 4744 if (ParseAsAbsoluteExpression(Stepping)) 4745 return TokError("invalid stepping version"); 4746 4747 if (!trySkipToken(AsmToken::Comma)) 4748 return TokError("vendor name required, comma expected"); 4749 4750 if (!parseString(VendorName, "invalid vendor name")) 4751 return true; 4752 4753 if (!trySkipToken(AsmToken::Comma)) 4754 return TokError("arch name required, comma expected"); 4755 4756 if (!parseString(ArchName, "invalid arch name")) 4757 return true; 4758 4759 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4760 VendorName, ArchName); 4761 return false; 4762 } 4763 4764 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4765 amd_kernel_code_t &Header) { 4766 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4767 // assembly for backwards compatibility. 4768 if (ID == "max_scratch_backing_memory_byte_size") { 4769 Parser.eatToEndOfStatement(); 4770 return false; 4771 } 4772 4773 SmallString<40> ErrStr; 4774 raw_svector_ostream Err(ErrStr); 4775 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4776 return TokError(Err.str()); 4777 } 4778 Lex(); 4779 4780 if (ID == "enable_wavefront_size32") { 4781 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4782 if (!isGFX10Plus()) 4783 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4784 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4785 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4786 } else { 4787 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4788 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4789 } 4790 } 4791 4792 if (ID == "wavefront_size") { 4793 if (Header.wavefront_size == 5) { 4794 if (!isGFX10Plus()) 4795 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4796 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4797 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4798 } else if (Header.wavefront_size == 6) { 4799 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4800 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4801 } 4802 } 4803 4804 if (ID == "enable_wgp_mode") { 4805 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4806 !isGFX10Plus()) 4807 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4808 } 4809 4810 if (ID == "enable_mem_ordered") { 4811 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4812 !isGFX10Plus()) 4813 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4814 } 4815 4816 if (ID == "enable_fwd_progress") { 4817 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4818 !isGFX10Plus()) 4819 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4820 } 4821 4822 return false; 4823 } 4824 4825 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4826 amd_kernel_code_t Header; 4827 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4828 4829 while (true) { 4830 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4831 // will set the current token to EndOfStatement. 4832 while(trySkipToken(AsmToken::EndOfStatement)); 4833 4834 StringRef ID; 4835 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4836 return true; 4837 4838 if (ID == ".end_amd_kernel_code_t") 4839 break; 4840 4841 if (ParseAMDKernelCodeTValue(ID, Header)) 4842 return true; 4843 } 4844 4845 getTargetStreamer().EmitAMDKernelCodeT(Header); 4846 4847 return false; 4848 } 4849 4850 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4851 StringRef KernelName; 4852 if (!parseId(KernelName, "expected symbol name")) 4853 return true; 4854 4855 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4856 ELF::STT_AMDGPU_HSA_KERNEL); 4857 4858 KernelScope.initialize(getContext()); 4859 return false; 4860 } 4861 4862 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4863 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4864 return Error(getLoc(), 4865 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4866 "architectures"); 4867 } 4868 4869 auto ISAVersionStringFromASM = getToken().getStringContents(); 4870 4871 std::string ISAVersionStringFromSTI; 4872 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4873 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4874 4875 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4876 return Error(getLoc(), 4877 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4878 "arguments specified through the command line"); 4879 } 4880 4881 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4882 Lex(); 4883 4884 return false; 4885 } 4886 4887 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4888 const char *AssemblerDirectiveBegin; 4889 const char *AssemblerDirectiveEnd; 4890 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4891 isHsaAbiVersion3(&getSTI()) 4892 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4893 HSAMD::V3::AssemblerDirectiveEnd) 4894 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4895 HSAMD::AssemblerDirectiveEnd); 4896 4897 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4898 return Error(getLoc(), 4899 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4900 "not available on non-amdhsa OSes")).str()); 4901 } 4902 4903 std::string HSAMetadataString; 4904 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4905 HSAMetadataString)) 4906 return true; 4907 4908 if (isHsaAbiVersion3(&getSTI())) { 4909 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4910 return Error(getLoc(), "invalid HSA metadata"); 4911 } else { 4912 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4913 return Error(getLoc(), "invalid HSA metadata"); 4914 } 4915 4916 return false; 4917 } 4918 4919 /// Common code to parse out a block of text (typically YAML) between start and 4920 /// end directives. 4921 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4922 const char *AssemblerDirectiveEnd, 4923 std::string &CollectString) { 4924 4925 raw_string_ostream CollectStream(CollectString); 4926 4927 getLexer().setSkipSpace(false); 4928 4929 bool FoundEnd = false; 4930 while (!isToken(AsmToken::Eof)) { 4931 while (isToken(AsmToken::Space)) { 4932 CollectStream << getTokenStr(); 4933 Lex(); 4934 } 4935 4936 if (trySkipId(AssemblerDirectiveEnd)) { 4937 FoundEnd = true; 4938 break; 4939 } 4940 4941 CollectStream << Parser.parseStringToEndOfStatement() 4942 << getContext().getAsmInfo()->getSeparatorString(); 4943 4944 Parser.eatToEndOfStatement(); 4945 } 4946 4947 getLexer().setSkipSpace(true); 4948 4949 if (isToken(AsmToken::Eof) && !FoundEnd) { 4950 return TokError(Twine("expected directive ") + 4951 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4952 } 4953 4954 CollectStream.flush(); 4955 return false; 4956 } 4957 4958 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4959 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4960 std::string String; 4961 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4962 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4963 return true; 4964 4965 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4966 if (!PALMetadata->setFromString(String)) 4967 return Error(getLoc(), "invalid PAL metadata"); 4968 return false; 4969 } 4970 4971 /// Parse the assembler directive for old linear-format PAL metadata. 4972 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4973 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4974 return Error(getLoc(), 4975 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4976 "not available on non-amdpal OSes")).str()); 4977 } 4978 4979 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4980 PALMetadata->setLegacy(); 4981 for (;;) { 4982 uint32_t Key, Value; 4983 if (ParseAsAbsoluteExpression(Key)) { 4984 return TokError(Twine("invalid value in ") + 4985 Twine(PALMD::AssemblerDirective)); 4986 } 4987 if (!trySkipToken(AsmToken::Comma)) { 4988 return TokError(Twine("expected an even number of values in ") + 4989 Twine(PALMD::AssemblerDirective)); 4990 } 4991 if (ParseAsAbsoluteExpression(Value)) { 4992 return TokError(Twine("invalid value in ") + 4993 Twine(PALMD::AssemblerDirective)); 4994 } 4995 PALMetadata->setRegister(Key, Value); 4996 if (!trySkipToken(AsmToken::Comma)) 4997 break; 4998 } 4999 return false; 5000 } 5001 5002 /// ParseDirectiveAMDGPULDS 5003 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5004 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5005 if (getParser().checkForValidSection()) 5006 return true; 5007 5008 StringRef Name; 5009 SMLoc NameLoc = getLoc(); 5010 if (getParser().parseIdentifier(Name)) 5011 return TokError("expected identifier in directive"); 5012 5013 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5014 if (parseToken(AsmToken::Comma, "expected ','")) 5015 return true; 5016 5017 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5018 5019 int64_t Size; 5020 SMLoc SizeLoc = getLoc(); 5021 if (getParser().parseAbsoluteExpression(Size)) 5022 return true; 5023 if (Size < 0) 5024 return Error(SizeLoc, "size must be non-negative"); 5025 if (Size > LocalMemorySize) 5026 return Error(SizeLoc, "size is too large"); 5027 5028 int64_t Alignment = 4; 5029 if (trySkipToken(AsmToken::Comma)) { 5030 SMLoc AlignLoc = getLoc(); 5031 if (getParser().parseAbsoluteExpression(Alignment)) 5032 return true; 5033 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5034 return Error(AlignLoc, "alignment must be a power of two"); 5035 5036 // Alignment larger than the size of LDS is possible in theory, as long 5037 // as the linker manages to place to symbol at address 0, but we do want 5038 // to make sure the alignment fits nicely into a 32-bit integer. 5039 if (Alignment >= 1u << 31) 5040 return Error(AlignLoc, "alignment is too large"); 5041 } 5042 5043 if (parseToken(AsmToken::EndOfStatement, 5044 "unexpected token in '.amdgpu_lds' directive")) 5045 return true; 5046 5047 Symbol->redefineIfPossible(); 5048 if (!Symbol->isUndefined()) 5049 return Error(NameLoc, "invalid symbol redefinition"); 5050 5051 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5052 return false; 5053 } 5054 5055 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5056 StringRef IDVal = DirectiveID.getString(); 5057 5058 if (isHsaAbiVersion3(&getSTI())) { 5059 if (IDVal == ".amdgcn_target") 5060 return ParseDirectiveAMDGCNTarget(); 5061 5062 if (IDVal == ".amdhsa_kernel") 5063 return ParseDirectiveAMDHSAKernel(); 5064 5065 // TODO: Restructure/combine with PAL metadata directive. 5066 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5067 return ParseDirectiveHSAMetadata(); 5068 } else { 5069 if (IDVal == ".hsa_code_object_version") 5070 return ParseDirectiveHSACodeObjectVersion(); 5071 5072 if (IDVal == ".hsa_code_object_isa") 5073 return ParseDirectiveHSACodeObjectISA(); 5074 5075 if (IDVal == ".amd_kernel_code_t") 5076 return ParseDirectiveAMDKernelCodeT(); 5077 5078 if (IDVal == ".amdgpu_hsa_kernel") 5079 return ParseDirectiveAMDGPUHsaKernel(); 5080 5081 if (IDVal == ".amd_amdgpu_isa") 5082 return ParseDirectiveISAVersion(); 5083 5084 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5085 return ParseDirectiveHSAMetadata(); 5086 } 5087 5088 if (IDVal == ".amdgpu_lds") 5089 return ParseDirectiveAMDGPULDS(); 5090 5091 if (IDVal == PALMD::AssemblerDirectiveBegin) 5092 return ParseDirectivePALMetadataBegin(); 5093 5094 if (IDVal == PALMD::AssemblerDirective) 5095 return ParseDirectivePALMetadata(); 5096 5097 return true; 5098 } 5099 5100 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5101 unsigned RegNo) const { 5102 5103 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5104 R.isValid(); ++R) { 5105 if (*R == RegNo) 5106 return isGFX9Plus(); 5107 } 5108 5109 // GFX10 has 2 more SGPRs 104 and 105. 5110 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5111 R.isValid(); ++R) { 5112 if (*R == RegNo) 5113 return hasSGPR104_SGPR105(); 5114 } 5115 5116 switch (RegNo) { 5117 case AMDGPU::SRC_SHARED_BASE: 5118 case AMDGPU::SRC_SHARED_LIMIT: 5119 case AMDGPU::SRC_PRIVATE_BASE: 5120 case AMDGPU::SRC_PRIVATE_LIMIT: 5121 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5122 return isGFX9Plus(); 5123 case AMDGPU::TBA: 5124 case AMDGPU::TBA_LO: 5125 case AMDGPU::TBA_HI: 5126 case AMDGPU::TMA: 5127 case AMDGPU::TMA_LO: 5128 case AMDGPU::TMA_HI: 5129 return !isGFX9Plus(); 5130 case AMDGPU::XNACK_MASK: 5131 case AMDGPU::XNACK_MASK_LO: 5132 case AMDGPU::XNACK_MASK_HI: 5133 return (isVI() || isGFX9()) && hasXNACK(); 5134 case AMDGPU::SGPR_NULL: 5135 return isGFX10Plus(); 5136 default: 5137 break; 5138 } 5139 5140 if (isCI()) 5141 return true; 5142 5143 if (isSI() || isGFX10Plus()) { 5144 // No flat_scr on SI. 5145 // On GFX10 flat scratch is not a valid register operand and can only be 5146 // accessed with s_setreg/s_getreg. 5147 switch (RegNo) { 5148 case AMDGPU::FLAT_SCR: 5149 case AMDGPU::FLAT_SCR_LO: 5150 case AMDGPU::FLAT_SCR_HI: 5151 return false; 5152 default: 5153 return true; 5154 } 5155 } 5156 5157 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5158 // SI/CI have. 5159 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5160 R.isValid(); ++R) { 5161 if (*R == RegNo) 5162 return hasSGPR102_SGPR103(); 5163 } 5164 5165 return true; 5166 } 5167 5168 OperandMatchResultTy 5169 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5170 OperandMode Mode) { 5171 // Try to parse with a custom parser 5172 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5173 5174 // If we successfully parsed the operand or if there as an error parsing, 5175 // we are done. 5176 // 5177 // If we are parsing after we reach EndOfStatement then this means we 5178 // are appending default values to the Operands list. This is only done 5179 // by custom parser, so we shouldn't continue on to the generic parsing. 5180 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5181 isToken(AsmToken::EndOfStatement)) 5182 return ResTy; 5183 5184 SMLoc RBraceLoc; 5185 SMLoc LBraceLoc = getLoc(); 5186 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5187 unsigned Prefix = Operands.size(); 5188 5189 for (;;) { 5190 auto Loc = getLoc(); 5191 ResTy = parseReg(Operands); 5192 if (ResTy == MatchOperand_NoMatch) 5193 Error(Loc, "expected a register"); 5194 if (ResTy != MatchOperand_Success) 5195 return MatchOperand_ParseFail; 5196 5197 RBraceLoc = getLoc(); 5198 if (trySkipToken(AsmToken::RBrac)) 5199 break; 5200 5201 if (!skipToken(AsmToken::Comma, 5202 "expected a comma or a closing square bracket")) { 5203 return MatchOperand_ParseFail; 5204 } 5205 } 5206 5207 if (Operands.size() - Prefix > 1) { 5208 Operands.insert(Operands.begin() + Prefix, 5209 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5210 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5211 } 5212 5213 return MatchOperand_Success; 5214 } 5215 5216 return parseRegOrImm(Operands); 5217 } 5218 5219 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5220 // Clear any forced encodings from the previous instruction. 5221 setForcedEncodingSize(0); 5222 setForcedDPP(false); 5223 setForcedSDWA(false); 5224 5225 if (Name.endswith("_e64")) { 5226 setForcedEncodingSize(64); 5227 return Name.substr(0, Name.size() - 4); 5228 } else if (Name.endswith("_e32")) { 5229 setForcedEncodingSize(32); 5230 return Name.substr(0, Name.size() - 4); 5231 } else if (Name.endswith("_dpp")) { 5232 setForcedDPP(true); 5233 return Name.substr(0, Name.size() - 4); 5234 } else if (Name.endswith("_sdwa")) { 5235 setForcedSDWA(true); 5236 return Name.substr(0, Name.size() - 5); 5237 } 5238 return Name; 5239 } 5240 5241 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5242 StringRef Name, 5243 SMLoc NameLoc, OperandVector &Operands) { 5244 // Add the instruction mnemonic 5245 Name = parseMnemonicSuffix(Name); 5246 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5247 5248 bool IsMIMG = Name.startswith("image_"); 5249 5250 while (!trySkipToken(AsmToken::EndOfStatement)) { 5251 OperandMode Mode = OperandMode_Default; 5252 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5253 Mode = OperandMode_NSA; 5254 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5255 5256 if (Res != MatchOperand_Success) { 5257 checkUnsupportedInstruction(Name, NameLoc); 5258 if (!Parser.hasPendingError()) { 5259 // FIXME: use real operand location rather than the current location. 5260 StringRef Msg = 5261 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5262 "not a valid operand."; 5263 Error(getLoc(), Msg); 5264 } 5265 while (!trySkipToken(AsmToken::EndOfStatement)) { 5266 lex(); 5267 } 5268 return true; 5269 } 5270 5271 // Eat the comma or space if there is one. 5272 trySkipToken(AsmToken::Comma); 5273 } 5274 5275 return false; 5276 } 5277 5278 //===----------------------------------------------------------------------===// 5279 // Utility functions 5280 //===----------------------------------------------------------------------===// 5281 5282 OperandMatchResultTy 5283 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5284 5285 if (!trySkipId(Prefix, AsmToken::Colon)) 5286 return MatchOperand_NoMatch; 5287 5288 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5289 } 5290 5291 OperandMatchResultTy 5292 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5293 AMDGPUOperand::ImmTy ImmTy, 5294 bool (*ConvertResult)(int64_t&)) { 5295 SMLoc S = getLoc(); 5296 int64_t Value = 0; 5297 5298 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5299 if (Res != MatchOperand_Success) 5300 return Res; 5301 5302 if (ConvertResult && !ConvertResult(Value)) { 5303 Error(S, "invalid " + StringRef(Prefix) + " value."); 5304 } 5305 5306 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5307 return MatchOperand_Success; 5308 } 5309 5310 OperandMatchResultTy 5311 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5312 OperandVector &Operands, 5313 AMDGPUOperand::ImmTy ImmTy, 5314 bool (*ConvertResult)(int64_t&)) { 5315 SMLoc S = getLoc(); 5316 if (!trySkipId(Prefix, AsmToken::Colon)) 5317 return MatchOperand_NoMatch; 5318 5319 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5320 return MatchOperand_ParseFail; 5321 5322 unsigned Val = 0; 5323 const unsigned MaxSize = 4; 5324 5325 // FIXME: How to verify the number of elements matches the number of src 5326 // operands? 5327 for (int I = 0; ; ++I) { 5328 int64_t Op; 5329 SMLoc Loc = getLoc(); 5330 if (!parseExpr(Op)) 5331 return MatchOperand_ParseFail; 5332 5333 if (Op != 0 && Op != 1) { 5334 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5335 return MatchOperand_ParseFail; 5336 } 5337 5338 Val |= (Op << I); 5339 5340 if (trySkipToken(AsmToken::RBrac)) 5341 break; 5342 5343 if (I + 1 == MaxSize) { 5344 Error(getLoc(), "expected a closing square bracket"); 5345 return MatchOperand_ParseFail; 5346 } 5347 5348 if (!skipToken(AsmToken::Comma, "expected a comma")) 5349 return MatchOperand_ParseFail; 5350 } 5351 5352 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5353 return MatchOperand_Success; 5354 } 5355 5356 OperandMatchResultTy 5357 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5358 AMDGPUOperand::ImmTy ImmTy) { 5359 int64_t Bit; 5360 SMLoc S = getLoc(); 5361 5362 if (trySkipId(Name)) { 5363 Bit = 1; 5364 } else if (trySkipId("no", Name)) { 5365 Bit = 0; 5366 } else { 5367 return MatchOperand_NoMatch; 5368 } 5369 5370 if (Name == "r128" && !hasMIMG_R128()) { 5371 Error(S, "r128 modifier is not supported on this GPU"); 5372 return MatchOperand_ParseFail; 5373 } 5374 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5375 Error(S, "a16 modifier is not supported on this GPU"); 5376 return MatchOperand_ParseFail; 5377 } 5378 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) { 5379 Error(S, "dlc modifier is not supported on this GPU"); 5380 return MatchOperand_ParseFail; 5381 } 5382 if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB) 5383 return MatchOperand_ParseFail; 5384 5385 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5386 ImmTy = AMDGPUOperand::ImmTyR128A16; 5387 5388 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5389 return MatchOperand_Success; 5390 } 5391 5392 static void addOptionalImmOperand( 5393 MCInst& Inst, const OperandVector& Operands, 5394 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5395 AMDGPUOperand::ImmTy ImmT, 5396 int64_t Default = 0) { 5397 auto i = OptionalIdx.find(ImmT); 5398 if (i != OptionalIdx.end()) { 5399 unsigned Idx = i->second; 5400 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5401 } else { 5402 Inst.addOperand(MCOperand::createImm(Default)); 5403 } 5404 } 5405 5406 OperandMatchResultTy 5407 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5408 StringRef &Value, 5409 SMLoc &StringLoc) { 5410 if (!trySkipId(Prefix, AsmToken::Colon)) 5411 return MatchOperand_NoMatch; 5412 5413 StringLoc = getLoc(); 5414 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5415 : MatchOperand_ParseFail; 5416 } 5417 5418 //===----------------------------------------------------------------------===// 5419 // MTBUF format 5420 //===----------------------------------------------------------------------===// 5421 5422 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5423 int64_t MaxVal, 5424 int64_t &Fmt) { 5425 int64_t Val; 5426 SMLoc Loc = getLoc(); 5427 5428 auto Res = parseIntWithPrefix(Pref, Val); 5429 if (Res == MatchOperand_ParseFail) 5430 return false; 5431 if (Res == MatchOperand_NoMatch) 5432 return true; 5433 5434 if (Val < 0 || Val > MaxVal) { 5435 Error(Loc, Twine("out of range ", StringRef(Pref))); 5436 return false; 5437 } 5438 5439 Fmt = Val; 5440 return true; 5441 } 5442 5443 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5444 // values to live in a joint format operand in the MCInst encoding. 5445 OperandMatchResultTy 5446 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5447 using namespace llvm::AMDGPU::MTBUFFormat; 5448 5449 int64_t Dfmt = DFMT_UNDEF; 5450 int64_t Nfmt = NFMT_UNDEF; 5451 5452 // dfmt and nfmt can appear in either order, and each is optional. 5453 for (int I = 0; I < 2; ++I) { 5454 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5455 return MatchOperand_ParseFail; 5456 5457 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5458 return MatchOperand_ParseFail; 5459 } 5460 // Skip optional comma between dfmt/nfmt 5461 // but guard against 2 commas following each other. 5462 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5463 !peekToken().is(AsmToken::Comma)) { 5464 trySkipToken(AsmToken::Comma); 5465 } 5466 } 5467 5468 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5469 return MatchOperand_NoMatch; 5470 5471 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5472 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5473 5474 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5475 return MatchOperand_Success; 5476 } 5477 5478 OperandMatchResultTy 5479 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5480 using namespace llvm::AMDGPU::MTBUFFormat; 5481 5482 int64_t Fmt = UFMT_UNDEF; 5483 5484 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5485 return MatchOperand_ParseFail; 5486 5487 if (Fmt == UFMT_UNDEF) 5488 return MatchOperand_NoMatch; 5489 5490 Format = Fmt; 5491 return MatchOperand_Success; 5492 } 5493 5494 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5495 int64_t &Nfmt, 5496 StringRef FormatStr, 5497 SMLoc Loc) { 5498 using namespace llvm::AMDGPU::MTBUFFormat; 5499 int64_t Format; 5500 5501 Format = getDfmt(FormatStr); 5502 if (Format != DFMT_UNDEF) { 5503 Dfmt = Format; 5504 return true; 5505 } 5506 5507 Format = getNfmt(FormatStr, getSTI()); 5508 if (Format != NFMT_UNDEF) { 5509 Nfmt = Format; 5510 return true; 5511 } 5512 5513 Error(Loc, "unsupported format"); 5514 return false; 5515 } 5516 5517 OperandMatchResultTy 5518 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5519 SMLoc FormatLoc, 5520 int64_t &Format) { 5521 using namespace llvm::AMDGPU::MTBUFFormat; 5522 5523 int64_t Dfmt = DFMT_UNDEF; 5524 int64_t Nfmt = NFMT_UNDEF; 5525 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5526 return MatchOperand_ParseFail; 5527 5528 if (trySkipToken(AsmToken::Comma)) { 5529 StringRef Str; 5530 SMLoc Loc = getLoc(); 5531 if (!parseId(Str, "expected a format string") || 5532 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5533 return MatchOperand_ParseFail; 5534 } 5535 if (Dfmt == DFMT_UNDEF) { 5536 Error(Loc, "duplicate numeric format"); 5537 return MatchOperand_ParseFail; 5538 } else if (Nfmt == NFMT_UNDEF) { 5539 Error(Loc, "duplicate data format"); 5540 return MatchOperand_ParseFail; 5541 } 5542 } 5543 5544 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5545 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5546 5547 if (isGFX10Plus()) { 5548 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5549 if (Ufmt == UFMT_UNDEF) { 5550 Error(FormatLoc, "unsupported format"); 5551 return MatchOperand_ParseFail; 5552 } 5553 Format = Ufmt; 5554 } else { 5555 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5556 } 5557 5558 return MatchOperand_Success; 5559 } 5560 5561 OperandMatchResultTy 5562 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5563 SMLoc Loc, 5564 int64_t &Format) { 5565 using namespace llvm::AMDGPU::MTBUFFormat; 5566 5567 auto Id = getUnifiedFormat(FormatStr); 5568 if (Id == UFMT_UNDEF) 5569 return MatchOperand_NoMatch; 5570 5571 if (!isGFX10Plus()) { 5572 Error(Loc, "unified format is not supported on this GPU"); 5573 return MatchOperand_ParseFail; 5574 } 5575 5576 Format = Id; 5577 return MatchOperand_Success; 5578 } 5579 5580 OperandMatchResultTy 5581 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5582 using namespace llvm::AMDGPU::MTBUFFormat; 5583 SMLoc Loc = getLoc(); 5584 5585 if (!parseExpr(Format)) 5586 return MatchOperand_ParseFail; 5587 if (!isValidFormatEncoding(Format, getSTI())) { 5588 Error(Loc, "out of range format"); 5589 return MatchOperand_ParseFail; 5590 } 5591 5592 return MatchOperand_Success; 5593 } 5594 5595 OperandMatchResultTy 5596 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5597 using namespace llvm::AMDGPU::MTBUFFormat; 5598 5599 if (!trySkipId("format", AsmToken::Colon)) 5600 return MatchOperand_NoMatch; 5601 5602 if (trySkipToken(AsmToken::LBrac)) { 5603 StringRef FormatStr; 5604 SMLoc Loc = getLoc(); 5605 if (!parseId(FormatStr, "expected a format string")) 5606 return MatchOperand_ParseFail; 5607 5608 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5609 if (Res == MatchOperand_NoMatch) 5610 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5611 if (Res != MatchOperand_Success) 5612 return Res; 5613 5614 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5615 return MatchOperand_ParseFail; 5616 5617 return MatchOperand_Success; 5618 } 5619 5620 return parseNumericFormat(Format); 5621 } 5622 5623 OperandMatchResultTy 5624 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5625 using namespace llvm::AMDGPU::MTBUFFormat; 5626 5627 int64_t Format = getDefaultFormatEncoding(getSTI()); 5628 OperandMatchResultTy Res; 5629 SMLoc Loc = getLoc(); 5630 5631 // Parse legacy format syntax. 5632 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5633 if (Res == MatchOperand_ParseFail) 5634 return Res; 5635 5636 bool FormatFound = (Res == MatchOperand_Success); 5637 5638 Operands.push_back( 5639 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5640 5641 if (FormatFound) 5642 trySkipToken(AsmToken::Comma); 5643 5644 if (isToken(AsmToken::EndOfStatement)) { 5645 // We are expecting an soffset operand, 5646 // but let matcher handle the error. 5647 return MatchOperand_Success; 5648 } 5649 5650 // Parse soffset. 5651 Res = parseRegOrImm(Operands); 5652 if (Res != MatchOperand_Success) 5653 return Res; 5654 5655 trySkipToken(AsmToken::Comma); 5656 5657 if (!FormatFound) { 5658 Res = parseSymbolicOrNumericFormat(Format); 5659 if (Res == MatchOperand_ParseFail) 5660 return Res; 5661 if (Res == MatchOperand_Success) { 5662 auto Size = Operands.size(); 5663 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5664 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5665 Op.setImm(Format); 5666 } 5667 return MatchOperand_Success; 5668 } 5669 5670 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5671 Error(getLoc(), "duplicate format"); 5672 return MatchOperand_ParseFail; 5673 } 5674 return MatchOperand_Success; 5675 } 5676 5677 //===----------------------------------------------------------------------===// 5678 // ds 5679 //===----------------------------------------------------------------------===// 5680 5681 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5682 const OperandVector &Operands) { 5683 OptionalImmIndexMap OptionalIdx; 5684 5685 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5686 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5687 5688 // Add the register arguments 5689 if (Op.isReg()) { 5690 Op.addRegOperands(Inst, 1); 5691 continue; 5692 } 5693 5694 // Handle optional arguments 5695 OptionalIdx[Op.getImmTy()] = i; 5696 } 5697 5698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5701 5702 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5703 } 5704 5705 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5706 bool IsGdsHardcoded) { 5707 OptionalImmIndexMap OptionalIdx; 5708 5709 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5710 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5711 5712 // Add the register arguments 5713 if (Op.isReg()) { 5714 Op.addRegOperands(Inst, 1); 5715 continue; 5716 } 5717 5718 if (Op.isToken() && Op.getToken() == "gds") { 5719 IsGdsHardcoded = true; 5720 continue; 5721 } 5722 5723 // Handle optional arguments 5724 OptionalIdx[Op.getImmTy()] = i; 5725 } 5726 5727 AMDGPUOperand::ImmTy OffsetType = 5728 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5729 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5730 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5731 AMDGPUOperand::ImmTyOffset; 5732 5733 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5734 5735 if (!IsGdsHardcoded) { 5736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5737 } 5738 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5739 } 5740 5741 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5742 OptionalImmIndexMap OptionalIdx; 5743 5744 unsigned OperandIdx[4]; 5745 unsigned EnMask = 0; 5746 int SrcIdx = 0; 5747 5748 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5749 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5750 5751 // Add the register arguments 5752 if (Op.isReg()) { 5753 assert(SrcIdx < 4); 5754 OperandIdx[SrcIdx] = Inst.size(); 5755 Op.addRegOperands(Inst, 1); 5756 ++SrcIdx; 5757 continue; 5758 } 5759 5760 if (Op.isOff()) { 5761 assert(SrcIdx < 4); 5762 OperandIdx[SrcIdx] = Inst.size(); 5763 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5764 ++SrcIdx; 5765 continue; 5766 } 5767 5768 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5769 Op.addImmOperands(Inst, 1); 5770 continue; 5771 } 5772 5773 if (Op.isToken() && Op.getToken() == "done") 5774 continue; 5775 5776 // Handle optional arguments 5777 OptionalIdx[Op.getImmTy()] = i; 5778 } 5779 5780 assert(SrcIdx == 4); 5781 5782 bool Compr = false; 5783 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5784 Compr = true; 5785 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5786 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5787 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5788 } 5789 5790 for (auto i = 0; i < SrcIdx; ++i) { 5791 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5792 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5793 } 5794 } 5795 5796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5798 5799 Inst.addOperand(MCOperand::createImm(EnMask)); 5800 } 5801 5802 //===----------------------------------------------------------------------===// 5803 // s_waitcnt 5804 //===----------------------------------------------------------------------===// 5805 5806 static bool 5807 encodeCnt( 5808 const AMDGPU::IsaVersion ISA, 5809 int64_t &IntVal, 5810 int64_t CntVal, 5811 bool Saturate, 5812 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5813 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5814 { 5815 bool Failed = false; 5816 5817 IntVal = encode(ISA, IntVal, CntVal); 5818 if (CntVal != decode(ISA, IntVal)) { 5819 if (Saturate) { 5820 IntVal = encode(ISA, IntVal, -1); 5821 } else { 5822 Failed = true; 5823 } 5824 } 5825 return Failed; 5826 } 5827 5828 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5829 5830 SMLoc CntLoc = getLoc(); 5831 StringRef CntName = getTokenStr(); 5832 5833 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5834 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5835 return false; 5836 5837 int64_t CntVal; 5838 SMLoc ValLoc = getLoc(); 5839 if (!parseExpr(CntVal)) 5840 return false; 5841 5842 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5843 5844 bool Failed = true; 5845 bool Sat = CntName.endswith("_sat"); 5846 5847 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5848 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5849 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5850 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5851 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5852 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5853 } else { 5854 Error(CntLoc, "invalid counter name " + CntName); 5855 return false; 5856 } 5857 5858 if (Failed) { 5859 Error(ValLoc, "too large value for " + CntName); 5860 return false; 5861 } 5862 5863 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5864 return false; 5865 5866 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5867 if (isToken(AsmToken::EndOfStatement)) { 5868 Error(getLoc(), "expected a counter name"); 5869 return false; 5870 } 5871 } 5872 5873 return true; 5874 } 5875 5876 OperandMatchResultTy 5877 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5878 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5879 int64_t Waitcnt = getWaitcntBitMask(ISA); 5880 SMLoc S = getLoc(); 5881 5882 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5883 while (!isToken(AsmToken::EndOfStatement)) { 5884 if (!parseCnt(Waitcnt)) 5885 return MatchOperand_ParseFail; 5886 } 5887 } else { 5888 if (!parseExpr(Waitcnt)) 5889 return MatchOperand_ParseFail; 5890 } 5891 5892 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5893 return MatchOperand_Success; 5894 } 5895 5896 bool 5897 AMDGPUOperand::isSWaitCnt() const { 5898 return isImm(); 5899 } 5900 5901 //===----------------------------------------------------------------------===// 5902 // hwreg 5903 //===----------------------------------------------------------------------===// 5904 5905 bool 5906 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5907 OperandInfoTy &Offset, 5908 OperandInfoTy &Width) { 5909 using namespace llvm::AMDGPU::Hwreg; 5910 5911 // The register may be specified by name or using a numeric code 5912 HwReg.Loc = getLoc(); 5913 if (isToken(AsmToken::Identifier) && 5914 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5915 HwReg.IsSymbolic = true; 5916 lex(); // skip register name 5917 } else if (!parseExpr(HwReg.Id, "a register name")) { 5918 return false; 5919 } 5920 5921 if (trySkipToken(AsmToken::RParen)) 5922 return true; 5923 5924 // parse optional params 5925 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5926 return false; 5927 5928 Offset.Loc = getLoc(); 5929 if (!parseExpr(Offset.Id)) 5930 return false; 5931 5932 if (!skipToken(AsmToken::Comma, "expected a comma")) 5933 return false; 5934 5935 Width.Loc = getLoc(); 5936 return parseExpr(Width.Id) && 5937 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5938 } 5939 5940 bool 5941 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5942 const OperandInfoTy &Offset, 5943 const OperandInfoTy &Width) { 5944 5945 using namespace llvm::AMDGPU::Hwreg; 5946 5947 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5948 Error(HwReg.Loc, 5949 "specified hardware register is not supported on this GPU"); 5950 return false; 5951 } 5952 if (!isValidHwreg(HwReg.Id)) { 5953 Error(HwReg.Loc, 5954 "invalid code of hardware register: only 6-bit values are legal"); 5955 return false; 5956 } 5957 if (!isValidHwregOffset(Offset.Id)) { 5958 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5959 return false; 5960 } 5961 if (!isValidHwregWidth(Width.Id)) { 5962 Error(Width.Loc, 5963 "invalid bitfield width: only values from 1 to 32 are legal"); 5964 return false; 5965 } 5966 return true; 5967 } 5968 5969 OperandMatchResultTy 5970 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5971 using namespace llvm::AMDGPU::Hwreg; 5972 5973 int64_t ImmVal = 0; 5974 SMLoc Loc = getLoc(); 5975 5976 if (trySkipId("hwreg", AsmToken::LParen)) { 5977 OperandInfoTy HwReg(ID_UNKNOWN_); 5978 OperandInfoTy Offset(OFFSET_DEFAULT_); 5979 OperandInfoTy Width(WIDTH_DEFAULT_); 5980 if (parseHwregBody(HwReg, Offset, Width) && 5981 validateHwreg(HwReg, Offset, Width)) { 5982 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5983 } else { 5984 return MatchOperand_ParseFail; 5985 } 5986 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5987 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5988 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5989 return MatchOperand_ParseFail; 5990 } 5991 } else { 5992 return MatchOperand_ParseFail; 5993 } 5994 5995 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5996 return MatchOperand_Success; 5997 } 5998 5999 bool AMDGPUOperand::isHwreg() const { 6000 return isImmTy(ImmTyHwreg); 6001 } 6002 6003 //===----------------------------------------------------------------------===// 6004 // sendmsg 6005 //===----------------------------------------------------------------------===// 6006 6007 bool 6008 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6009 OperandInfoTy &Op, 6010 OperandInfoTy &Stream) { 6011 using namespace llvm::AMDGPU::SendMsg; 6012 6013 Msg.Loc = getLoc(); 6014 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6015 Msg.IsSymbolic = true; 6016 lex(); // skip message name 6017 } else if (!parseExpr(Msg.Id, "a message name")) { 6018 return false; 6019 } 6020 6021 if (trySkipToken(AsmToken::Comma)) { 6022 Op.IsDefined = true; 6023 Op.Loc = getLoc(); 6024 if (isToken(AsmToken::Identifier) && 6025 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6026 lex(); // skip operation name 6027 } else if (!parseExpr(Op.Id, "an operation name")) { 6028 return false; 6029 } 6030 6031 if (trySkipToken(AsmToken::Comma)) { 6032 Stream.IsDefined = true; 6033 Stream.Loc = getLoc(); 6034 if (!parseExpr(Stream.Id)) 6035 return false; 6036 } 6037 } 6038 6039 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6040 } 6041 6042 bool 6043 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6044 const OperandInfoTy &Op, 6045 const OperandInfoTy &Stream) { 6046 using namespace llvm::AMDGPU::SendMsg; 6047 6048 // Validation strictness depends on whether message is specified 6049 // in a symbolc or in a numeric form. In the latter case 6050 // only encoding possibility is checked. 6051 bool Strict = Msg.IsSymbolic; 6052 6053 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6054 Error(Msg.Loc, "invalid message id"); 6055 return false; 6056 } 6057 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6058 if (Op.IsDefined) { 6059 Error(Op.Loc, "message does not support operations"); 6060 } else { 6061 Error(Msg.Loc, "missing message operation"); 6062 } 6063 return false; 6064 } 6065 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6066 Error(Op.Loc, "invalid operation id"); 6067 return false; 6068 } 6069 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6070 Error(Stream.Loc, "message operation does not support streams"); 6071 return false; 6072 } 6073 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6074 Error(Stream.Loc, "invalid message stream id"); 6075 return false; 6076 } 6077 return true; 6078 } 6079 6080 OperandMatchResultTy 6081 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6082 using namespace llvm::AMDGPU::SendMsg; 6083 6084 int64_t ImmVal = 0; 6085 SMLoc Loc = getLoc(); 6086 6087 if (trySkipId("sendmsg", AsmToken::LParen)) { 6088 OperandInfoTy Msg(ID_UNKNOWN_); 6089 OperandInfoTy Op(OP_NONE_); 6090 OperandInfoTy Stream(STREAM_ID_NONE_); 6091 if (parseSendMsgBody(Msg, Op, Stream) && 6092 validateSendMsg(Msg, Op, Stream)) { 6093 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6094 } else { 6095 return MatchOperand_ParseFail; 6096 } 6097 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6098 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6099 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6100 return MatchOperand_ParseFail; 6101 } 6102 } else { 6103 return MatchOperand_ParseFail; 6104 } 6105 6106 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6107 return MatchOperand_Success; 6108 } 6109 6110 bool AMDGPUOperand::isSendMsg() const { 6111 return isImmTy(ImmTySendMsg); 6112 } 6113 6114 //===----------------------------------------------------------------------===// 6115 // v_interp 6116 //===----------------------------------------------------------------------===// 6117 6118 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6119 StringRef Str; 6120 SMLoc S = getLoc(); 6121 6122 if (!parseId(Str)) 6123 return MatchOperand_NoMatch; 6124 6125 int Slot = StringSwitch<int>(Str) 6126 .Case("p10", 0) 6127 .Case("p20", 1) 6128 .Case("p0", 2) 6129 .Default(-1); 6130 6131 if (Slot == -1) { 6132 Error(S, "invalid interpolation slot"); 6133 return MatchOperand_ParseFail; 6134 } 6135 6136 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6137 AMDGPUOperand::ImmTyInterpSlot)); 6138 return MatchOperand_Success; 6139 } 6140 6141 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6142 StringRef Str; 6143 SMLoc S = getLoc(); 6144 6145 if (!parseId(Str)) 6146 return MatchOperand_NoMatch; 6147 6148 if (!Str.startswith("attr")) { 6149 Error(S, "invalid interpolation attribute"); 6150 return MatchOperand_ParseFail; 6151 } 6152 6153 StringRef Chan = Str.take_back(2); 6154 int AttrChan = StringSwitch<int>(Chan) 6155 .Case(".x", 0) 6156 .Case(".y", 1) 6157 .Case(".z", 2) 6158 .Case(".w", 3) 6159 .Default(-1); 6160 if (AttrChan == -1) { 6161 Error(S, "invalid or missing interpolation attribute channel"); 6162 return MatchOperand_ParseFail; 6163 } 6164 6165 Str = Str.drop_back(2).drop_front(4); 6166 6167 uint8_t Attr; 6168 if (Str.getAsInteger(10, Attr)) { 6169 Error(S, "invalid or missing interpolation attribute number"); 6170 return MatchOperand_ParseFail; 6171 } 6172 6173 if (Attr > 63) { 6174 Error(S, "out of bounds interpolation attribute number"); 6175 return MatchOperand_ParseFail; 6176 } 6177 6178 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6179 6180 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6181 AMDGPUOperand::ImmTyInterpAttr)); 6182 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6183 AMDGPUOperand::ImmTyAttrChan)); 6184 return MatchOperand_Success; 6185 } 6186 6187 //===----------------------------------------------------------------------===// 6188 // exp 6189 //===----------------------------------------------------------------------===// 6190 6191 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6192 using namespace llvm::AMDGPU::Exp; 6193 6194 StringRef Str; 6195 SMLoc S = getLoc(); 6196 6197 if (!parseId(Str)) 6198 return MatchOperand_NoMatch; 6199 6200 unsigned Id = getTgtId(Str); 6201 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6202 Error(S, (Id == ET_INVALID) ? 6203 "invalid exp target" : 6204 "exp target is not supported on this GPU"); 6205 return MatchOperand_ParseFail; 6206 } 6207 6208 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6209 AMDGPUOperand::ImmTyExpTgt)); 6210 return MatchOperand_Success; 6211 } 6212 6213 //===----------------------------------------------------------------------===// 6214 // parser helpers 6215 //===----------------------------------------------------------------------===// 6216 6217 bool 6218 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6219 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6220 } 6221 6222 bool 6223 AMDGPUAsmParser::isId(const StringRef Id) const { 6224 return isId(getToken(), Id); 6225 } 6226 6227 bool 6228 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6229 return getTokenKind() == Kind; 6230 } 6231 6232 bool 6233 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6234 if (isId(Id)) { 6235 lex(); 6236 return true; 6237 } 6238 return false; 6239 } 6240 6241 bool 6242 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6243 if (isToken(AsmToken::Identifier)) { 6244 StringRef Tok = getTokenStr(); 6245 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6246 lex(); 6247 return true; 6248 } 6249 } 6250 return false; 6251 } 6252 6253 bool 6254 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6255 if (isId(Id) && peekToken().is(Kind)) { 6256 lex(); 6257 lex(); 6258 return true; 6259 } 6260 return false; 6261 } 6262 6263 bool 6264 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6265 if (isToken(Kind)) { 6266 lex(); 6267 return true; 6268 } 6269 return false; 6270 } 6271 6272 bool 6273 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6274 const StringRef ErrMsg) { 6275 if (!trySkipToken(Kind)) { 6276 Error(getLoc(), ErrMsg); 6277 return false; 6278 } 6279 return true; 6280 } 6281 6282 bool 6283 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6284 SMLoc S = getLoc(); 6285 6286 const MCExpr *Expr; 6287 if (Parser.parseExpression(Expr)) 6288 return false; 6289 6290 if (Expr->evaluateAsAbsolute(Imm)) 6291 return true; 6292 6293 if (Expected.empty()) { 6294 Error(S, "expected absolute expression"); 6295 } else { 6296 Error(S, Twine("expected ", Expected) + 6297 Twine(" or an absolute expression")); 6298 } 6299 return false; 6300 } 6301 6302 bool 6303 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6304 SMLoc S = getLoc(); 6305 6306 const MCExpr *Expr; 6307 if (Parser.parseExpression(Expr)) 6308 return false; 6309 6310 int64_t IntVal; 6311 if (Expr->evaluateAsAbsolute(IntVal)) { 6312 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6313 } else { 6314 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6315 } 6316 return true; 6317 } 6318 6319 bool 6320 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6321 if (isToken(AsmToken::String)) { 6322 Val = getToken().getStringContents(); 6323 lex(); 6324 return true; 6325 } else { 6326 Error(getLoc(), ErrMsg); 6327 return false; 6328 } 6329 } 6330 6331 bool 6332 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6333 if (isToken(AsmToken::Identifier)) { 6334 Val = getTokenStr(); 6335 lex(); 6336 return true; 6337 } else { 6338 if (!ErrMsg.empty()) 6339 Error(getLoc(), ErrMsg); 6340 return false; 6341 } 6342 } 6343 6344 AsmToken 6345 AMDGPUAsmParser::getToken() const { 6346 return Parser.getTok(); 6347 } 6348 6349 AsmToken 6350 AMDGPUAsmParser::peekToken() { 6351 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6352 } 6353 6354 void 6355 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6356 auto TokCount = getLexer().peekTokens(Tokens); 6357 6358 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6359 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6360 } 6361 6362 AsmToken::TokenKind 6363 AMDGPUAsmParser::getTokenKind() const { 6364 return getLexer().getKind(); 6365 } 6366 6367 SMLoc 6368 AMDGPUAsmParser::getLoc() const { 6369 return getToken().getLoc(); 6370 } 6371 6372 StringRef 6373 AMDGPUAsmParser::getTokenStr() const { 6374 return getToken().getString(); 6375 } 6376 6377 void 6378 AMDGPUAsmParser::lex() { 6379 Parser.Lex(); 6380 } 6381 6382 SMLoc 6383 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6384 const OperandVector &Operands) const { 6385 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6387 if (Test(Op)) 6388 return Op.getStartLoc(); 6389 } 6390 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6391 } 6392 6393 SMLoc 6394 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6395 const OperandVector &Operands) const { 6396 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6397 return getOperandLoc(Test, Operands); 6398 } 6399 6400 SMLoc 6401 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6402 const OperandVector &Operands) const { 6403 auto Test = [=](const AMDGPUOperand& Op) { 6404 return Op.isRegKind() && Op.getReg() == Reg; 6405 }; 6406 return getOperandLoc(Test, Operands); 6407 } 6408 6409 SMLoc 6410 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6411 auto Test = [](const AMDGPUOperand& Op) { 6412 return Op.IsImmKindLiteral() || Op.isExpr(); 6413 }; 6414 return getOperandLoc(Test, Operands); 6415 } 6416 6417 SMLoc 6418 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6419 auto Test = [](const AMDGPUOperand& Op) { 6420 return Op.isImmKindConst(); 6421 }; 6422 return getOperandLoc(Test, Operands); 6423 } 6424 6425 //===----------------------------------------------------------------------===// 6426 // swizzle 6427 //===----------------------------------------------------------------------===// 6428 6429 LLVM_READNONE 6430 static unsigned 6431 encodeBitmaskPerm(const unsigned AndMask, 6432 const unsigned OrMask, 6433 const unsigned XorMask) { 6434 using namespace llvm::AMDGPU::Swizzle; 6435 6436 return BITMASK_PERM_ENC | 6437 (AndMask << BITMASK_AND_SHIFT) | 6438 (OrMask << BITMASK_OR_SHIFT) | 6439 (XorMask << BITMASK_XOR_SHIFT); 6440 } 6441 6442 bool 6443 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6444 const unsigned MinVal, 6445 const unsigned MaxVal, 6446 const StringRef ErrMsg, 6447 SMLoc &Loc) { 6448 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6449 return false; 6450 } 6451 Loc = getLoc(); 6452 if (!parseExpr(Op)) { 6453 return false; 6454 } 6455 if (Op < MinVal || Op > MaxVal) { 6456 Error(Loc, ErrMsg); 6457 return false; 6458 } 6459 6460 return true; 6461 } 6462 6463 bool 6464 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6465 const unsigned MinVal, 6466 const unsigned MaxVal, 6467 const StringRef ErrMsg) { 6468 SMLoc Loc; 6469 for (unsigned i = 0; i < OpNum; ++i) { 6470 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6471 return false; 6472 } 6473 6474 return true; 6475 } 6476 6477 bool 6478 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6479 using namespace llvm::AMDGPU::Swizzle; 6480 6481 int64_t Lane[LANE_NUM]; 6482 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6483 "expected a 2-bit lane id")) { 6484 Imm = QUAD_PERM_ENC; 6485 for (unsigned I = 0; I < LANE_NUM; ++I) { 6486 Imm |= Lane[I] << (LANE_SHIFT * I); 6487 } 6488 return true; 6489 } 6490 return false; 6491 } 6492 6493 bool 6494 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6495 using namespace llvm::AMDGPU::Swizzle; 6496 6497 SMLoc Loc; 6498 int64_t GroupSize; 6499 int64_t LaneIdx; 6500 6501 if (!parseSwizzleOperand(GroupSize, 6502 2, 32, 6503 "group size must be in the interval [2,32]", 6504 Loc)) { 6505 return false; 6506 } 6507 if (!isPowerOf2_64(GroupSize)) { 6508 Error(Loc, "group size must be a power of two"); 6509 return false; 6510 } 6511 if (parseSwizzleOperand(LaneIdx, 6512 0, GroupSize - 1, 6513 "lane id must be in the interval [0,group size - 1]", 6514 Loc)) { 6515 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6516 return true; 6517 } 6518 return false; 6519 } 6520 6521 bool 6522 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6523 using namespace llvm::AMDGPU::Swizzle; 6524 6525 SMLoc Loc; 6526 int64_t GroupSize; 6527 6528 if (!parseSwizzleOperand(GroupSize, 6529 2, 32, 6530 "group size must be in the interval [2,32]", 6531 Loc)) { 6532 return false; 6533 } 6534 if (!isPowerOf2_64(GroupSize)) { 6535 Error(Loc, "group size must be a power of two"); 6536 return false; 6537 } 6538 6539 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6540 return true; 6541 } 6542 6543 bool 6544 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6545 using namespace llvm::AMDGPU::Swizzle; 6546 6547 SMLoc Loc; 6548 int64_t GroupSize; 6549 6550 if (!parseSwizzleOperand(GroupSize, 6551 1, 16, 6552 "group size must be in the interval [1,16]", 6553 Loc)) { 6554 return false; 6555 } 6556 if (!isPowerOf2_64(GroupSize)) { 6557 Error(Loc, "group size must be a power of two"); 6558 return false; 6559 } 6560 6561 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6562 return true; 6563 } 6564 6565 bool 6566 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6567 using namespace llvm::AMDGPU::Swizzle; 6568 6569 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6570 return false; 6571 } 6572 6573 StringRef Ctl; 6574 SMLoc StrLoc = getLoc(); 6575 if (!parseString(Ctl)) { 6576 return false; 6577 } 6578 if (Ctl.size() != BITMASK_WIDTH) { 6579 Error(StrLoc, "expected a 5-character mask"); 6580 return false; 6581 } 6582 6583 unsigned AndMask = 0; 6584 unsigned OrMask = 0; 6585 unsigned XorMask = 0; 6586 6587 for (size_t i = 0; i < Ctl.size(); ++i) { 6588 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6589 switch(Ctl[i]) { 6590 default: 6591 Error(StrLoc, "invalid mask"); 6592 return false; 6593 case '0': 6594 break; 6595 case '1': 6596 OrMask |= Mask; 6597 break; 6598 case 'p': 6599 AndMask |= Mask; 6600 break; 6601 case 'i': 6602 AndMask |= Mask; 6603 XorMask |= Mask; 6604 break; 6605 } 6606 } 6607 6608 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6609 return true; 6610 } 6611 6612 bool 6613 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6614 6615 SMLoc OffsetLoc = getLoc(); 6616 6617 if (!parseExpr(Imm, "a swizzle macro")) { 6618 return false; 6619 } 6620 if (!isUInt<16>(Imm)) { 6621 Error(OffsetLoc, "expected a 16-bit offset"); 6622 return false; 6623 } 6624 return true; 6625 } 6626 6627 bool 6628 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6629 using namespace llvm::AMDGPU::Swizzle; 6630 6631 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6632 6633 SMLoc ModeLoc = getLoc(); 6634 bool Ok = false; 6635 6636 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6637 Ok = parseSwizzleQuadPerm(Imm); 6638 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6639 Ok = parseSwizzleBitmaskPerm(Imm); 6640 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6641 Ok = parseSwizzleBroadcast(Imm); 6642 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6643 Ok = parseSwizzleSwap(Imm); 6644 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6645 Ok = parseSwizzleReverse(Imm); 6646 } else { 6647 Error(ModeLoc, "expected a swizzle mode"); 6648 } 6649 6650 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6651 } 6652 6653 return false; 6654 } 6655 6656 OperandMatchResultTy 6657 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6658 SMLoc S = getLoc(); 6659 int64_t Imm = 0; 6660 6661 if (trySkipId("offset")) { 6662 6663 bool Ok = false; 6664 if (skipToken(AsmToken::Colon, "expected a colon")) { 6665 if (trySkipId("swizzle")) { 6666 Ok = parseSwizzleMacro(Imm); 6667 } else { 6668 Ok = parseSwizzleOffset(Imm); 6669 } 6670 } 6671 6672 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6673 6674 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6675 } else { 6676 // Swizzle "offset" operand is optional. 6677 // If it is omitted, try parsing other optional operands. 6678 return parseOptionalOpr(Operands); 6679 } 6680 } 6681 6682 bool 6683 AMDGPUOperand::isSwizzle() const { 6684 return isImmTy(ImmTySwizzle); 6685 } 6686 6687 //===----------------------------------------------------------------------===// 6688 // VGPR Index Mode 6689 //===----------------------------------------------------------------------===// 6690 6691 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6692 6693 using namespace llvm::AMDGPU::VGPRIndexMode; 6694 6695 if (trySkipToken(AsmToken::RParen)) { 6696 return OFF; 6697 } 6698 6699 int64_t Imm = 0; 6700 6701 while (true) { 6702 unsigned Mode = 0; 6703 SMLoc S = getLoc(); 6704 6705 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6706 if (trySkipId(IdSymbolic[ModeId])) { 6707 Mode = 1 << ModeId; 6708 break; 6709 } 6710 } 6711 6712 if (Mode == 0) { 6713 Error(S, (Imm == 0)? 6714 "expected a VGPR index mode or a closing parenthesis" : 6715 "expected a VGPR index mode"); 6716 return UNDEF; 6717 } 6718 6719 if (Imm & Mode) { 6720 Error(S, "duplicate VGPR index mode"); 6721 return UNDEF; 6722 } 6723 Imm |= Mode; 6724 6725 if (trySkipToken(AsmToken::RParen)) 6726 break; 6727 if (!skipToken(AsmToken::Comma, 6728 "expected a comma or a closing parenthesis")) 6729 return UNDEF; 6730 } 6731 6732 return Imm; 6733 } 6734 6735 OperandMatchResultTy 6736 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6737 6738 using namespace llvm::AMDGPU::VGPRIndexMode; 6739 6740 int64_t Imm = 0; 6741 SMLoc S = getLoc(); 6742 6743 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6744 Imm = parseGPRIdxMacro(); 6745 if (Imm == UNDEF) 6746 return MatchOperand_ParseFail; 6747 } else { 6748 if (getParser().parseAbsoluteExpression(Imm)) 6749 return MatchOperand_ParseFail; 6750 if (Imm < 0 || !isUInt<4>(Imm)) { 6751 Error(S, "invalid immediate: only 4-bit values are legal"); 6752 return MatchOperand_ParseFail; 6753 } 6754 } 6755 6756 Operands.push_back( 6757 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6758 return MatchOperand_Success; 6759 } 6760 6761 bool AMDGPUOperand::isGPRIdxMode() const { 6762 return isImmTy(ImmTyGprIdxMode); 6763 } 6764 6765 //===----------------------------------------------------------------------===// 6766 // sopp branch targets 6767 //===----------------------------------------------------------------------===// 6768 6769 OperandMatchResultTy 6770 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6771 6772 // Make sure we are not parsing something 6773 // that looks like a label or an expression but is not. 6774 // This will improve error messages. 6775 if (isRegister() || isModifier()) 6776 return MatchOperand_NoMatch; 6777 6778 if (!parseExpr(Operands)) 6779 return MatchOperand_ParseFail; 6780 6781 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6782 assert(Opr.isImm() || Opr.isExpr()); 6783 SMLoc Loc = Opr.getStartLoc(); 6784 6785 // Currently we do not support arbitrary expressions as branch targets. 6786 // Only labels and absolute expressions are accepted. 6787 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6788 Error(Loc, "expected an absolute expression or a label"); 6789 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6790 Error(Loc, "expected a 16-bit signed jump offset"); 6791 } 6792 6793 return MatchOperand_Success; 6794 } 6795 6796 //===----------------------------------------------------------------------===// 6797 // Boolean holding registers 6798 //===----------------------------------------------------------------------===// 6799 6800 OperandMatchResultTy 6801 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6802 return parseReg(Operands); 6803 } 6804 6805 //===----------------------------------------------------------------------===// 6806 // mubuf 6807 //===----------------------------------------------------------------------===// 6808 6809 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6810 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6811 } 6812 6813 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const { 6814 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB); 6815 } 6816 6817 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6818 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6819 } 6820 6821 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6822 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6823 } 6824 6825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6826 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6827 } 6828 6829 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6830 const OperandVector &Operands, 6831 bool IsAtomic, 6832 bool IsAtomicReturn, 6833 bool IsLds) { 6834 bool IsLdsOpcode = IsLds; 6835 bool HasLdsModifier = false; 6836 OptionalImmIndexMap OptionalIdx; 6837 assert(IsAtomicReturn ? IsAtomic : true); 6838 unsigned FirstOperandIdx = 1; 6839 6840 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6841 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6842 6843 // Add the register arguments 6844 if (Op.isReg()) { 6845 Op.addRegOperands(Inst, 1); 6846 // Insert a tied src for atomic return dst. 6847 // This cannot be postponed as subsequent calls to 6848 // addImmOperands rely on correct number of MC operands. 6849 if (IsAtomicReturn && i == FirstOperandIdx) 6850 Op.addRegOperands(Inst, 1); 6851 continue; 6852 } 6853 6854 // Handle the case where soffset is an immediate 6855 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6856 Op.addImmOperands(Inst, 1); 6857 continue; 6858 } 6859 6860 HasLdsModifier |= Op.isLDS(); 6861 6862 // Handle tokens like 'offen' which are sometimes hard-coded into the 6863 // asm string. There are no MCInst operands for these. 6864 if (Op.isToken()) { 6865 continue; 6866 } 6867 assert(Op.isImm()); 6868 6869 // Handle optional arguments 6870 OptionalIdx[Op.getImmTy()] = i; 6871 } 6872 6873 // This is a workaround for an llvm quirk which may result in an 6874 // incorrect instruction selection. Lds and non-lds versions of 6875 // MUBUF instructions are identical except that lds versions 6876 // have mandatory 'lds' modifier. However this modifier follows 6877 // optional modifiers and llvm asm matcher regards this 'lds' 6878 // modifier as an optional one. As a result, an lds version 6879 // of opcode may be selected even if it has no 'lds' modifier. 6880 if (IsLdsOpcode && !HasLdsModifier) { 6881 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6882 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6883 Inst.setOpcode(NoLdsOpcode); 6884 IsLdsOpcode = false; 6885 } 6886 } 6887 6888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6889 if (!IsAtomic || IsAtomicReturn) { 6890 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6891 IsAtomicReturn ? -1 : 0); 6892 } 6893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6894 6895 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6897 } 6898 6899 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6900 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6901 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); 6902 } 6903 6904 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6905 OptionalImmIndexMap OptionalIdx; 6906 6907 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6908 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6909 6910 // Add the register arguments 6911 if (Op.isReg()) { 6912 Op.addRegOperands(Inst, 1); 6913 continue; 6914 } 6915 6916 // Handle the case where soffset is an immediate 6917 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6918 Op.addImmOperands(Inst, 1); 6919 continue; 6920 } 6921 6922 // Handle tokens like 'offen' which are sometimes hard-coded into the 6923 // asm string. There are no MCInst operands for these. 6924 if (Op.isToken()) { 6925 continue; 6926 } 6927 assert(Op.isImm()); 6928 6929 // Handle optional arguments 6930 OptionalIdx[Op.getImmTy()] = i; 6931 } 6932 6933 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6934 AMDGPUOperand::ImmTyOffset); 6935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); 6942 } 6943 6944 //===----------------------------------------------------------------------===// 6945 // mimg 6946 //===----------------------------------------------------------------------===// 6947 6948 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6949 bool IsAtomic) { 6950 unsigned I = 1; 6951 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6952 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6953 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6954 } 6955 6956 if (IsAtomic) { 6957 // Add src, same as dst 6958 assert(Desc.getNumDefs() == 1); 6959 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6960 } 6961 6962 OptionalImmIndexMap OptionalIdx; 6963 6964 for (unsigned E = Operands.size(); I != E; ++I) { 6965 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6966 6967 // Add the register arguments 6968 if (Op.isReg()) { 6969 Op.addRegOperands(Inst, 1); 6970 } else if (Op.isImmModifier()) { 6971 OptionalIdx[Op.getImmTy()] = I; 6972 } else if (!Op.isToken()) { 6973 llvm_unreachable("unexpected operand type"); 6974 } 6975 } 6976 6977 bool IsGFX10Plus = isGFX10Plus(); 6978 6979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6980 if (IsGFX10Plus) 6981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6983 6984 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1) 6985 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6986 AMDGPUOperand::ImmTySCCB); 6987 6988 if (IsGFX10Plus) 6989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6990 6991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6994 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 6995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6996 if (IsGFX10Plus) 6997 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6998 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6999 if (!IsGFX10Plus) 7000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7002 } 7003 7004 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7005 cvtMIMG(Inst, Operands, true); 7006 } 7007 7008 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7009 const OperandVector &Operands) { 7010 for (unsigned I = 1; I < Operands.size(); ++I) { 7011 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7012 if (Operand.isReg()) 7013 Operand.addRegOperands(Inst, 1); 7014 } 7015 7016 Inst.addOperand(MCOperand::createImm(1)); // a16 7017 } 7018 7019 //===----------------------------------------------------------------------===// 7020 // smrd 7021 //===----------------------------------------------------------------------===// 7022 7023 bool AMDGPUOperand::isSMRDOffset8() const { 7024 return isImm() && isUInt<8>(getImm()); 7025 } 7026 7027 bool AMDGPUOperand::isSMEMOffset() const { 7028 return isImm(); // Offset range is checked later by validator. 7029 } 7030 7031 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7032 // 32-bit literals are only supported on CI and we only want to use them 7033 // when the offset is > 8-bits. 7034 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7035 } 7036 7037 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7038 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7039 } 7040 7041 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7042 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7043 } 7044 7045 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7046 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7047 } 7048 7049 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7050 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7051 } 7052 7053 //===----------------------------------------------------------------------===// 7054 // vop3 7055 //===----------------------------------------------------------------------===// 7056 7057 static bool ConvertOmodMul(int64_t &Mul) { 7058 if (Mul != 1 && Mul != 2 && Mul != 4) 7059 return false; 7060 7061 Mul >>= 1; 7062 return true; 7063 } 7064 7065 static bool ConvertOmodDiv(int64_t &Div) { 7066 if (Div == 1) { 7067 Div = 0; 7068 return true; 7069 } 7070 7071 if (Div == 2) { 7072 Div = 3; 7073 return true; 7074 } 7075 7076 return false; 7077 } 7078 7079 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7080 // This is intentional and ensures compatibility with sp3. 7081 // See bug 35397 for details. 7082 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7083 if (BoundCtrl == 0 || BoundCtrl == 1) { 7084 BoundCtrl = 1; 7085 return true; 7086 } 7087 return false; 7088 } 7089 7090 // Note: the order in this table matches the order of operands in AsmString. 7091 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7092 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7093 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7094 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7095 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7096 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7097 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7098 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7099 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7100 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7101 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 7102 {"scc", AMDGPUOperand::ImmTySCCB, true, nullptr}, 7103 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 7104 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 7105 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7106 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7107 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7108 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7109 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7110 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7111 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7112 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7113 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7114 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7115 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7116 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7117 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7118 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7119 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7120 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7121 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7122 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7123 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7124 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7125 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7126 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7127 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7128 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7129 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7130 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7131 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7132 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7133 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7134 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7135 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7136 }; 7137 7138 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7139 7140 OperandMatchResultTy res = parseOptionalOpr(Operands); 7141 7142 // This is a hack to enable hardcoded mandatory operands which follow 7143 // optional operands. 7144 // 7145 // Current design assumes that all operands after the first optional operand 7146 // are also optional. However implementation of some instructions violates 7147 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7148 // 7149 // To alleviate this problem, we have to (implicitly) parse extra operands 7150 // to make sure autogenerated parser of custom operands never hit hardcoded 7151 // mandatory operands. 7152 7153 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7154 if (res != MatchOperand_Success || 7155 isToken(AsmToken::EndOfStatement)) 7156 break; 7157 7158 trySkipToken(AsmToken::Comma); 7159 res = parseOptionalOpr(Operands); 7160 } 7161 7162 return res; 7163 } 7164 7165 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7166 OperandMatchResultTy res; 7167 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7168 // try to parse any optional operand here 7169 if (Op.IsBit) { 7170 res = parseNamedBit(Op.Name, Operands, Op.Type); 7171 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7172 res = parseOModOperand(Operands); 7173 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7174 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7175 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7176 res = parseSDWASel(Operands, Op.Name, Op.Type); 7177 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7178 res = parseSDWADstUnused(Operands); 7179 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7180 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7181 Op.Type == AMDGPUOperand::ImmTyNegLo || 7182 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7183 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7184 Op.ConvertResult); 7185 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7186 res = parseDim(Operands); 7187 } else { 7188 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7189 } 7190 if (res != MatchOperand_NoMatch) { 7191 return res; 7192 } 7193 } 7194 return MatchOperand_NoMatch; 7195 } 7196 7197 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7198 StringRef Name = getTokenStr(); 7199 if (Name == "mul") { 7200 return parseIntWithPrefix("mul", Operands, 7201 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7202 } 7203 7204 if (Name == "div") { 7205 return parseIntWithPrefix("div", Operands, 7206 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7207 } 7208 7209 return MatchOperand_NoMatch; 7210 } 7211 7212 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7213 cvtVOP3P(Inst, Operands); 7214 7215 int Opc = Inst.getOpcode(); 7216 7217 int SrcNum; 7218 const int Ops[] = { AMDGPU::OpName::src0, 7219 AMDGPU::OpName::src1, 7220 AMDGPU::OpName::src2 }; 7221 for (SrcNum = 0; 7222 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7223 ++SrcNum); 7224 assert(SrcNum > 0); 7225 7226 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7227 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7228 7229 if ((OpSel & (1 << SrcNum)) != 0) { 7230 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7231 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7232 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7233 } 7234 } 7235 7236 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7237 // 1. This operand is input modifiers 7238 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7239 // 2. This is not last operand 7240 && Desc.NumOperands > (OpNum + 1) 7241 // 3. Next operand is register class 7242 && Desc.OpInfo[OpNum + 1].RegClass != -1 7243 // 4. Next register is not tied to any other operand 7244 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7245 } 7246 7247 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7248 { 7249 OptionalImmIndexMap OptionalIdx; 7250 unsigned Opc = Inst.getOpcode(); 7251 7252 unsigned I = 1; 7253 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7254 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7255 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7256 } 7257 7258 for (unsigned E = Operands.size(); I != E; ++I) { 7259 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7260 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7261 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7262 } else if (Op.isInterpSlot() || 7263 Op.isInterpAttr() || 7264 Op.isAttrChan()) { 7265 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7266 } else if (Op.isImmModifier()) { 7267 OptionalIdx[Op.getImmTy()] = I; 7268 } else { 7269 llvm_unreachable("unhandled operand type"); 7270 } 7271 } 7272 7273 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7275 } 7276 7277 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7278 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7279 } 7280 7281 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7283 } 7284 } 7285 7286 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7287 OptionalImmIndexMap &OptionalIdx) { 7288 unsigned Opc = Inst.getOpcode(); 7289 7290 unsigned I = 1; 7291 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7292 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7293 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7294 } 7295 7296 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7297 // This instruction has src modifiers 7298 for (unsigned E = Operands.size(); I != E; ++I) { 7299 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7300 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7301 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7302 } else if (Op.isImmModifier()) { 7303 OptionalIdx[Op.getImmTy()] = I; 7304 } else if (Op.isRegOrImm()) { 7305 Op.addRegOrImmOperands(Inst, 1); 7306 } else { 7307 llvm_unreachable("unhandled operand type"); 7308 } 7309 } 7310 } else { 7311 // No src modifiers 7312 for (unsigned E = Operands.size(); I != E; ++I) { 7313 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7314 if (Op.isMod()) { 7315 OptionalIdx[Op.getImmTy()] = I; 7316 } else { 7317 Op.addRegOrImmOperands(Inst, 1); 7318 } 7319 } 7320 } 7321 7322 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7324 } 7325 7326 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7328 } 7329 7330 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7331 // it has src2 register operand that is tied to dst operand 7332 // we don't allow modifiers for this operand in assembler so src2_modifiers 7333 // should be 0. 7334 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7335 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7336 Opc == AMDGPU::V_MAC_F32_e64_vi || 7337 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7338 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7339 Opc == AMDGPU::V_MAC_F16_e64_vi || 7340 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7341 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7342 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7343 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7344 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7345 auto it = Inst.begin(); 7346 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7347 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7348 ++it; 7349 // Copy the operand to ensure it's not invalidated when Inst grows. 7350 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7351 } 7352 } 7353 7354 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7355 OptionalImmIndexMap OptionalIdx; 7356 cvtVOP3(Inst, Operands, OptionalIdx); 7357 } 7358 7359 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7360 const OperandVector &Operands) { 7361 OptionalImmIndexMap OptIdx; 7362 const int Opc = Inst.getOpcode(); 7363 const MCInstrDesc &Desc = MII.get(Opc); 7364 7365 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7366 7367 cvtVOP3(Inst, Operands, OptIdx); 7368 7369 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7370 assert(!IsPacked); 7371 Inst.addOperand(Inst.getOperand(0)); 7372 } 7373 7374 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7375 // instruction, and then figure out where to actually put the modifiers 7376 7377 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7378 7379 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7380 if (OpSelHiIdx != -1) { 7381 int DefaultVal = IsPacked ? -1 : 0; 7382 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7383 DefaultVal); 7384 } 7385 7386 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7387 if (NegLoIdx != -1) { 7388 assert(IsPacked); 7389 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7390 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7391 } 7392 7393 const int Ops[] = { AMDGPU::OpName::src0, 7394 AMDGPU::OpName::src1, 7395 AMDGPU::OpName::src2 }; 7396 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7397 AMDGPU::OpName::src1_modifiers, 7398 AMDGPU::OpName::src2_modifiers }; 7399 7400 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7401 7402 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7403 unsigned OpSelHi = 0; 7404 unsigned NegLo = 0; 7405 unsigned NegHi = 0; 7406 7407 if (OpSelHiIdx != -1) { 7408 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7409 } 7410 7411 if (NegLoIdx != -1) { 7412 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7413 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7414 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7415 } 7416 7417 for (int J = 0; J < 3; ++J) { 7418 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7419 if (OpIdx == -1) 7420 break; 7421 7422 uint32_t ModVal = 0; 7423 7424 if ((OpSel & (1 << J)) != 0) 7425 ModVal |= SISrcMods::OP_SEL_0; 7426 7427 if ((OpSelHi & (1 << J)) != 0) 7428 ModVal |= SISrcMods::OP_SEL_1; 7429 7430 if ((NegLo & (1 << J)) != 0) 7431 ModVal |= SISrcMods::NEG; 7432 7433 if ((NegHi & (1 << J)) != 0) 7434 ModVal |= SISrcMods::NEG_HI; 7435 7436 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7437 7438 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7439 } 7440 } 7441 7442 //===----------------------------------------------------------------------===// 7443 // dpp 7444 //===----------------------------------------------------------------------===// 7445 7446 bool AMDGPUOperand::isDPP8() const { 7447 return isImmTy(ImmTyDPP8); 7448 } 7449 7450 bool AMDGPUOperand::isDPPCtrl() const { 7451 using namespace AMDGPU::DPP; 7452 7453 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7454 if (result) { 7455 int64_t Imm = getImm(); 7456 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7457 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7458 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7459 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7460 (Imm == DppCtrl::WAVE_SHL1) || 7461 (Imm == DppCtrl::WAVE_ROL1) || 7462 (Imm == DppCtrl::WAVE_SHR1) || 7463 (Imm == DppCtrl::WAVE_ROR1) || 7464 (Imm == DppCtrl::ROW_MIRROR) || 7465 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7466 (Imm == DppCtrl::BCAST15) || 7467 (Imm == DppCtrl::BCAST31) || 7468 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7469 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7470 } 7471 return false; 7472 } 7473 7474 //===----------------------------------------------------------------------===// 7475 // mAI 7476 //===----------------------------------------------------------------------===// 7477 7478 bool AMDGPUOperand::isBLGP() const { 7479 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7480 } 7481 7482 bool AMDGPUOperand::isCBSZ() const { 7483 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7484 } 7485 7486 bool AMDGPUOperand::isABID() const { 7487 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7488 } 7489 7490 bool AMDGPUOperand::isS16Imm() const { 7491 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7492 } 7493 7494 bool AMDGPUOperand::isU16Imm() const { 7495 return isImm() && isUInt<16>(getImm()); 7496 } 7497 7498 //===----------------------------------------------------------------------===// 7499 // dim 7500 //===----------------------------------------------------------------------===// 7501 7502 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7503 // We want to allow "dim:1D" etc., 7504 // but the initial 1 is tokenized as an integer. 7505 std::string Token; 7506 if (isToken(AsmToken::Integer)) { 7507 SMLoc Loc = getToken().getEndLoc(); 7508 Token = std::string(getTokenStr()); 7509 lex(); 7510 if (getLoc() != Loc) 7511 return false; 7512 } 7513 7514 StringRef Suffix; 7515 if (!parseId(Suffix)) 7516 return false; 7517 Token += Suffix; 7518 7519 StringRef DimId = Token; 7520 if (DimId.startswith("SQ_RSRC_IMG_")) 7521 DimId = DimId.drop_front(12); 7522 7523 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7524 if (!DimInfo) 7525 return false; 7526 7527 Encoding = DimInfo->Encoding; 7528 return true; 7529 } 7530 7531 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7532 if (!isGFX10Plus()) 7533 return MatchOperand_NoMatch; 7534 7535 SMLoc S = getLoc(); 7536 7537 if (!trySkipId("dim", AsmToken::Colon)) 7538 return MatchOperand_NoMatch; 7539 7540 unsigned Encoding; 7541 SMLoc Loc = getLoc(); 7542 if (!parseDimId(Encoding)) { 7543 Error(Loc, "invalid dim value"); 7544 return MatchOperand_ParseFail; 7545 } 7546 7547 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7548 AMDGPUOperand::ImmTyDim)); 7549 return MatchOperand_Success; 7550 } 7551 7552 //===----------------------------------------------------------------------===// 7553 // dpp 7554 //===----------------------------------------------------------------------===// 7555 7556 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7557 SMLoc S = getLoc(); 7558 7559 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7560 return MatchOperand_NoMatch; 7561 7562 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7563 7564 int64_t Sels[8]; 7565 7566 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7567 return MatchOperand_ParseFail; 7568 7569 for (size_t i = 0; i < 8; ++i) { 7570 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7571 return MatchOperand_ParseFail; 7572 7573 SMLoc Loc = getLoc(); 7574 if (getParser().parseAbsoluteExpression(Sels[i])) 7575 return MatchOperand_ParseFail; 7576 if (0 > Sels[i] || 7 < Sels[i]) { 7577 Error(Loc, "expected a 3-bit value"); 7578 return MatchOperand_ParseFail; 7579 } 7580 } 7581 7582 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7583 return MatchOperand_ParseFail; 7584 7585 unsigned DPP8 = 0; 7586 for (size_t i = 0; i < 8; ++i) 7587 DPP8 |= (Sels[i] << (i * 3)); 7588 7589 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7590 return MatchOperand_Success; 7591 } 7592 7593 bool 7594 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7595 const OperandVector &Operands) { 7596 if (Ctrl == "row_newbcast") 7597 return isGFX90A(); 7598 7599 // DPP64 is supported for row_newbcast only. 7600 const MCRegisterInfo *MRI = getMRI(); 7601 if (Operands.size() > 2 && Operands[1]->isReg() && 7602 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7603 return false; 7604 7605 if (Ctrl == "row_share" || 7606 Ctrl == "row_xmask") 7607 return isGFX10Plus(); 7608 7609 if (Ctrl == "wave_shl" || 7610 Ctrl == "wave_shr" || 7611 Ctrl == "wave_rol" || 7612 Ctrl == "wave_ror" || 7613 Ctrl == "row_bcast") 7614 return isVI() || isGFX9(); 7615 7616 return Ctrl == "row_mirror" || 7617 Ctrl == "row_half_mirror" || 7618 Ctrl == "quad_perm" || 7619 Ctrl == "row_shl" || 7620 Ctrl == "row_shr" || 7621 Ctrl == "row_ror"; 7622 } 7623 7624 int64_t 7625 AMDGPUAsmParser::parseDPPCtrlPerm() { 7626 // quad_perm:[%d,%d,%d,%d] 7627 7628 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7629 return -1; 7630 7631 int64_t Val = 0; 7632 for (int i = 0; i < 4; ++i) { 7633 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7634 return -1; 7635 7636 int64_t Temp; 7637 SMLoc Loc = getLoc(); 7638 if (getParser().parseAbsoluteExpression(Temp)) 7639 return -1; 7640 if (Temp < 0 || Temp > 3) { 7641 Error(Loc, "expected a 2-bit value"); 7642 return -1; 7643 } 7644 7645 Val += (Temp << i * 2); 7646 } 7647 7648 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7649 return -1; 7650 7651 return Val; 7652 } 7653 7654 int64_t 7655 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7656 using namespace AMDGPU::DPP; 7657 7658 // sel:%d 7659 7660 int64_t Val; 7661 SMLoc Loc = getLoc(); 7662 7663 if (getParser().parseAbsoluteExpression(Val)) 7664 return -1; 7665 7666 struct DppCtrlCheck { 7667 int64_t Ctrl; 7668 int Lo; 7669 int Hi; 7670 }; 7671 7672 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7673 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7674 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7675 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7676 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7677 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7678 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7679 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7680 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7681 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7682 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7683 .Default({-1, 0, 0}); 7684 7685 bool Valid; 7686 if (Check.Ctrl == -1) { 7687 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7688 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7689 } else { 7690 Valid = Check.Lo <= Val && Val <= Check.Hi; 7691 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7692 } 7693 7694 if (!Valid) { 7695 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7696 return -1; 7697 } 7698 7699 return Val; 7700 } 7701 7702 OperandMatchResultTy 7703 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7704 using namespace AMDGPU::DPP; 7705 7706 if (!isToken(AsmToken::Identifier) || 7707 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7708 return MatchOperand_NoMatch; 7709 7710 SMLoc S = getLoc(); 7711 int64_t Val = -1; 7712 StringRef Ctrl; 7713 7714 parseId(Ctrl); 7715 7716 if (Ctrl == "row_mirror") { 7717 Val = DppCtrl::ROW_MIRROR; 7718 } else if (Ctrl == "row_half_mirror") { 7719 Val = DppCtrl::ROW_HALF_MIRROR; 7720 } else { 7721 if (skipToken(AsmToken::Colon, "expected a colon")) { 7722 if (Ctrl == "quad_perm") { 7723 Val = parseDPPCtrlPerm(); 7724 } else { 7725 Val = parseDPPCtrlSel(Ctrl); 7726 } 7727 } 7728 } 7729 7730 if (Val == -1) 7731 return MatchOperand_ParseFail; 7732 7733 Operands.push_back( 7734 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7735 return MatchOperand_Success; 7736 } 7737 7738 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7739 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7740 } 7741 7742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7743 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7744 } 7745 7746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7747 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7748 } 7749 7750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7751 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7752 } 7753 7754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7755 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7756 } 7757 7758 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7759 OptionalImmIndexMap OptionalIdx; 7760 7761 unsigned I = 1; 7762 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7763 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7764 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7765 } 7766 7767 int Fi = 0; 7768 for (unsigned E = Operands.size(); I != E; ++I) { 7769 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7770 MCOI::TIED_TO); 7771 if (TiedTo != -1) { 7772 assert((unsigned)TiedTo < Inst.getNumOperands()); 7773 // handle tied old or src2 for MAC instructions 7774 Inst.addOperand(Inst.getOperand(TiedTo)); 7775 } 7776 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7777 // Add the register arguments 7778 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7779 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7780 // Skip it. 7781 continue; 7782 } 7783 7784 if (IsDPP8) { 7785 if (Op.isDPP8()) { 7786 Op.addImmOperands(Inst, 1); 7787 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7788 Op.addRegWithFPInputModsOperands(Inst, 2); 7789 } else if (Op.isFI()) { 7790 Fi = Op.getImm(); 7791 } else if (Op.isReg()) { 7792 Op.addRegOperands(Inst, 1); 7793 } else { 7794 llvm_unreachable("Invalid operand type"); 7795 } 7796 } else { 7797 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7798 Op.addRegWithFPInputModsOperands(Inst, 2); 7799 } else if (Op.isDPPCtrl()) { 7800 Op.addImmOperands(Inst, 1); 7801 } else if (Op.isImm()) { 7802 // Handle optional arguments 7803 OptionalIdx[Op.getImmTy()] = I; 7804 } else { 7805 llvm_unreachable("Invalid operand type"); 7806 } 7807 } 7808 } 7809 7810 if (IsDPP8) { 7811 using namespace llvm::AMDGPU::DPP; 7812 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7813 } else { 7814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7817 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7819 } 7820 } 7821 } 7822 7823 //===----------------------------------------------------------------------===// 7824 // sdwa 7825 //===----------------------------------------------------------------------===// 7826 7827 OperandMatchResultTy 7828 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7829 AMDGPUOperand::ImmTy Type) { 7830 using namespace llvm::AMDGPU::SDWA; 7831 7832 SMLoc S = getLoc(); 7833 StringRef Value; 7834 OperandMatchResultTy res; 7835 7836 SMLoc StringLoc; 7837 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7838 if (res != MatchOperand_Success) { 7839 return res; 7840 } 7841 7842 int64_t Int; 7843 Int = StringSwitch<int64_t>(Value) 7844 .Case("BYTE_0", SdwaSel::BYTE_0) 7845 .Case("BYTE_1", SdwaSel::BYTE_1) 7846 .Case("BYTE_2", SdwaSel::BYTE_2) 7847 .Case("BYTE_3", SdwaSel::BYTE_3) 7848 .Case("WORD_0", SdwaSel::WORD_0) 7849 .Case("WORD_1", SdwaSel::WORD_1) 7850 .Case("DWORD", SdwaSel::DWORD) 7851 .Default(0xffffffff); 7852 7853 if (Int == 0xffffffff) { 7854 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7855 return MatchOperand_ParseFail; 7856 } 7857 7858 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7859 return MatchOperand_Success; 7860 } 7861 7862 OperandMatchResultTy 7863 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7864 using namespace llvm::AMDGPU::SDWA; 7865 7866 SMLoc S = getLoc(); 7867 StringRef Value; 7868 OperandMatchResultTy res; 7869 7870 SMLoc StringLoc; 7871 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 7872 if (res != MatchOperand_Success) { 7873 return res; 7874 } 7875 7876 int64_t Int; 7877 Int = StringSwitch<int64_t>(Value) 7878 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7879 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7880 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7881 .Default(0xffffffff); 7882 7883 if (Int == 0xffffffff) { 7884 Error(StringLoc, "invalid dst_unused value"); 7885 return MatchOperand_ParseFail; 7886 } 7887 7888 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7889 return MatchOperand_Success; 7890 } 7891 7892 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7893 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7894 } 7895 7896 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7897 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7898 } 7899 7900 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7901 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7902 } 7903 7904 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7905 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7906 } 7907 7908 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7909 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7910 } 7911 7912 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7913 uint64_t BasicInstType, 7914 bool SkipDstVcc, 7915 bool SkipSrcVcc) { 7916 using namespace llvm::AMDGPU::SDWA; 7917 7918 OptionalImmIndexMap OptionalIdx; 7919 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7920 bool SkippedVcc = false; 7921 7922 unsigned I = 1; 7923 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7924 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7925 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7926 } 7927 7928 for (unsigned E = Operands.size(); I != E; ++I) { 7929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7930 if (SkipVcc && !SkippedVcc && Op.isReg() && 7931 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7932 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7933 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7934 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7935 // Skip VCC only if we didn't skip it on previous iteration. 7936 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7937 if (BasicInstType == SIInstrFlags::VOP2 && 7938 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7939 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7940 SkippedVcc = true; 7941 continue; 7942 } else if (BasicInstType == SIInstrFlags::VOPC && 7943 Inst.getNumOperands() == 0) { 7944 SkippedVcc = true; 7945 continue; 7946 } 7947 } 7948 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7949 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7950 } else if (Op.isImm()) { 7951 // Handle optional arguments 7952 OptionalIdx[Op.getImmTy()] = I; 7953 } else { 7954 llvm_unreachable("Invalid operand type"); 7955 } 7956 SkippedVcc = false; 7957 } 7958 7959 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7960 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7961 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7962 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7963 switch (BasicInstType) { 7964 case SIInstrFlags::VOP1: 7965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7966 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7967 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7968 } 7969 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7970 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7971 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7972 break; 7973 7974 case SIInstrFlags::VOP2: 7975 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7976 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7977 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7978 } 7979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7983 break; 7984 7985 case SIInstrFlags::VOPC: 7986 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7988 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7990 break; 7991 7992 default: 7993 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7994 } 7995 } 7996 7997 // special case v_mac_{f16, f32}: 7998 // it has src2 register operand that is tied to dst operand 7999 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8000 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8001 auto it = Inst.begin(); 8002 std::advance( 8003 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8004 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8005 } 8006 } 8007 8008 //===----------------------------------------------------------------------===// 8009 // mAI 8010 //===----------------------------------------------------------------------===// 8011 8012 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8013 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8014 } 8015 8016 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8017 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8018 } 8019 8020 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8021 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8022 } 8023 8024 /// Force static initialization. 8025 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8026 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8027 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8028 } 8029 8030 #define GET_REGISTER_MATCHER 8031 #define GET_MATCHER_IMPLEMENTATION 8032 #define GET_MNEMONIC_SPELL_CHECKER 8033 #define GET_MNEMONIC_CHECKER 8034 #include "AMDGPUGenAsmMatcher.inc" 8035 8036 // This fuction should be defined after auto-generated include so that we have 8037 // MatchClassKind enum defined 8038 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8039 unsigned Kind) { 8040 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8041 // But MatchInstructionImpl() expects to meet token and fails to validate 8042 // operand. This method checks if we are given immediate operand but expect to 8043 // get corresponding token. 8044 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8045 switch (Kind) { 8046 case MCK_addr64: 8047 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8048 case MCK_gds: 8049 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8050 case MCK_lds: 8051 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8052 case MCK_glc: 8053 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 8054 case MCK_idxen: 8055 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8056 case MCK_offen: 8057 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8058 case MCK_SSrcB32: 8059 // When operands have expression values, they will return true for isToken, 8060 // because it is not possible to distinguish between a token and an 8061 // expression at parse time. MatchInstructionImpl() will always try to 8062 // match an operand as a token, when isToken returns true, and when the 8063 // name of the expression is not a valid token, the match will fail, 8064 // so we need to handle it here. 8065 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8066 case MCK_SSrcF32: 8067 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8068 case MCK_SoppBrTarget: 8069 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8070 case MCK_VReg32OrOff: 8071 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8072 case MCK_InterpSlot: 8073 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8074 case MCK_Attr: 8075 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8076 case MCK_AttrChan: 8077 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8078 case MCK_ImmSMEMOffset: 8079 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8080 case MCK_SReg_64: 8081 case MCK_SReg_64_XEXEC: 8082 // Null is defined as a 32-bit register but 8083 // it should also be enabled with 64-bit operands. 8084 // The following code enables it for SReg_64 operands 8085 // used as source and destination. Remaining source 8086 // operands are handled in isInlinableImm. 8087 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8088 default: 8089 return Match_InvalidOperand; 8090 } 8091 } 8092 8093 //===----------------------------------------------------------------------===// 8094 // endpgm 8095 //===----------------------------------------------------------------------===// 8096 8097 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8098 SMLoc S = getLoc(); 8099 int64_t Imm = 0; 8100 8101 if (!parseExpr(Imm)) { 8102 // The operand is optional, if not present default to 0 8103 Imm = 0; 8104 } 8105 8106 if (!isUInt<16>(Imm)) { 8107 Error(S, "expected a 16-bit value"); 8108 return MatchOperand_ParseFail; 8109 } 8110 8111 Operands.push_back( 8112 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8113 return MatchOperand_Success; 8114 } 8115 8116 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8117