1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_256RegClassID) || 282 isRegClass(AMDGPU::VReg_512RegClassID) || 283 isRegClass(AMDGPU::VReg_1024RegClassID); 284 } 285 286 bool isVReg32() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID); 288 } 289 290 bool isVReg32OrOff() const { 291 return isOff() || isVReg32(); 292 } 293 294 bool isNull() const { 295 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 296 } 297 298 bool isSDWAOperand(MVT type) const; 299 bool isSDWAFP16Operand() const; 300 bool isSDWAFP32Operand() const; 301 bool isSDWAInt16Operand() const; 302 bool isSDWAInt32Operand() const; 303 304 bool isImmTy(ImmTy ImmT) const { 305 return isImm() && Imm.Type == ImmT; 306 } 307 308 bool isImmModifier() const { 309 return isImm() && Imm.Type != ImmTyNone; 310 } 311 312 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 313 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 314 bool isDMask() const { return isImmTy(ImmTyDMask); } 315 bool isDim() const { return isImmTy(ImmTyDim); } 316 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 317 bool isDA() const { return isImmTy(ImmTyDA); } 318 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 319 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 320 bool isLWE() const { return isImmTy(ImmTyLWE); } 321 bool isOff() const { return isImmTy(ImmTyOff); } 322 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 323 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 324 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 325 bool isOffen() const { return isImmTy(ImmTyOffen); } 326 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 327 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 328 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 329 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 330 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 331 332 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 333 bool isGDS() const { return isImmTy(ImmTyGDS); } 334 bool isLDS() const { return isImmTy(ImmTyLDS); } 335 bool isDLC() const { return isImmTy(ImmTyDLC); } 336 bool isGLC() const { return isImmTy(ImmTyGLC); } 337 bool isSLC() const { return isImmTy(ImmTySLC); } 338 bool isSWZ() const { return isImmTy(ImmTySWZ); } 339 bool isTFE() const { return isImmTy(ImmTyTFE); } 340 bool isD16() const { return isImmTy(ImmTyD16); } 341 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 342 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 343 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 344 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 345 bool isFI() const { return isImmTy(ImmTyDppFi); } 346 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 347 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 348 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 349 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 350 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 351 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 352 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 353 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 354 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 355 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 356 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 357 bool isHigh() const { return isImmTy(ImmTyHigh); } 358 359 bool isMod() const { 360 return isClampSI() || isOModSI(); 361 } 362 363 bool isRegOrImm() const { 364 return isReg() || isImm(); 365 } 366 367 bool isRegClass(unsigned RCID) const; 368 369 bool isInlineValue() const; 370 371 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 372 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 373 } 374 375 bool isSCSrcB16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 377 } 378 379 bool isSCSrcV2B16() const { 380 return isSCSrcB16(); 381 } 382 383 bool isSCSrcB32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 385 } 386 387 bool isSCSrcB64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 389 } 390 391 bool isBoolReg() const; 392 393 bool isSCSrcF16() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 395 } 396 397 bool isSCSrcV2F16() const { 398 return isSCSrcF16(); 399 } 400 401 bool isSCSrcF32() const { 402 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 403 } 404 405 bool isSCSrcF64() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 407 } 408 409 bool isSSrcB32() const { 410 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 411 } 412 413 bool isSSrcB16() const { 414 return isSCSrcB16() || isLiteralImm(MVT::i16); 415 } 416 417 bool isSSrcV2B16() const { 418 llvm_unreachable("cannot happen"); 419 return isSSrcB16(); 420 } 421 422 bool isSSrcB64() const { 423 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 424 // See isVSrc64(). 425 return isSCSrcB64() || isLiteralImm(MVT::i64); 426 } 427 428 bool isSSrcF32() const { 429 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 430 } 431 432 bool isSSrcF64() const { 433 return isSCSrcB64() || isLiteralImm(MVT::f64); 434 } 435 436 bool isSSrcF16() const { 437 return isSCSrcB16() || isLiteralImm(MVT::f16); 438 } 439 440 bool isSSrcV2F16() const { 441 llvm_unreachable("cannot happen"); 442 return isSSrcF16(); 443 } 444 445 bool isSSrcOrLdsB32() const { 446 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 447 isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isVCSrcB32() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 452 } 453 454 bool isVCSrcB64() const { 455 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 456 } 457 458 bool isVCSrcB16() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 460 } 461 462 bool isVCSrcV2B16() const { 463 return isVCSrcB16(); 464 } 465 466 bool isVCSrcF32() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 468 } 469 470 bool isVCSrcF64() const { 471 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 472 } 473 474 bool isVCSrcF16() const { 475 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 476 } 477 478 bool isVCSrcV2F16() const { 479 return isVCSrcF16(); 480 } 481 482 bool isVSrcB32() const { 483 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 484 } 485 486 bool isVSrcB64() const { 487 return isVCSrcF64() || isLiteralImm(MVT::i64); 488 } 489 490 bool isVSrcB16() const { 491 return isVCSrcF16() || isLiteralImm(MVT::i16); 492 } 493 494 bool isVSrcV2B16() const { 495 return isVSrcB16() || isLiteralImm(MVT::v2i16); 496 } 497 498 bool isVSrcF32() const { 499 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 500 } 501 502 bool isVSrcF64() const { 503 return isVCSrcF64() || isLiteralImm(MVT::f64); 504 } 505 506 bool isVSrcF16() const { 507 return isVCSrcF16() || isLiteralImm(MVT::f16); 508 } 509 510 bool isVSrcV2F16() const { 511 return isVSrcF16() || isLiteralImm(MVT::v2f16); 512 } 513 514 bool isVISrcB32() const { 515 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 516 } 517 518 bool isVISrcB16() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 520 } 521 522 bool isVISrcV2B16() const { 523 return isVISrcB16(); 524 } 525 526 bool isVISrcF32() const { 527 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 528 } 529 530 bool isVISrcF16() const { 531 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 532 } 533 534 bool isVISrcV2F16() const { 535 return isVISrcF16() || isVISrcB32(); 536 } 537 538 bool isAISrcB32() const { 539 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 540 } 541 542 bool isAISrcB16() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 544 } 545 546 bool isAISrcV2B16() const { 547 return isAISrcB16(); 548 } 549 550 bool isAISrcF32() const { 551 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 552 } 553 554 bool isAISrcF16() const { 555 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 556 } 557 558 bool isAISrcV2F16() const { 559 return isAISrcF16() || isAISrcB32(); 560 } 561 562 bool isAISrc_128B32() const { 563 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 564 } 565 566 bool isAISrc_128B16() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 568 } 569 570 bool isAISrc_128V2B16() const { 571 return isAISrc_128B16(); 572 } 573 574 bool isAISrc_128F32() const { 575 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 576 } 577 578 bool isAISrc_128F16() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 580 } 581 582 bool isAISrc_128V2F16() const { 583 return isAISrc_128F16() || isAISrc_128B32(); 584 } 585 586 bool isAISrc_512B32() const { 587 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 588 } 589 590 bool isAISrc_512B16() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 592 } 593 594 bool isAISrc_512V2B16() const { 595 return isAISrc_512B16(); 596 } 597 598 bool isAISrc_512F32() const { 599 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 600 } 601 602 bool isAISrc_512F16() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 604 } 605 606 bool isAISrc_512V2F16() const { 607 return isAISrc_512F16() || isAISrc_512B32(); 608 } 609 610 bool isAISrc_1024B32() const { 611 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 612 } 613 614 bool isAISrc_1024B16() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 616 } 617 618 bool isAISrc_1024V2B16() const { 619 return isAISrc_1024B16(); 620 } 621 622 bool isAISrc_1024F32() const { 623 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 624 } 625 626 bool isAISrc_1024F16() const { 627 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 628 } 629 630 bool isAISrc_1024V2F16() const { 631 return isAISrc_1024F16() || isAISrc_1024B32(); 632 } 633 634 bool isKImmFP32() const { 635 return isLiteralImm(MVT::f32); 636 } 637 638 bool isKImmFP16() const { 639 return isLiteralImm(MVT::f16); 640 } 641 642 bool isMem() const override { 643 return false; 644 } 645 646 bool isExpr() const { 647 return Kind == Expression; 648 } 649 650 bool isSoppBrTarget() const { 651 return isExpr() || isImm(); 652 } 653 654 bool isSWaitCnt() const; 655 bool isHwreg() const; 656 bool isSendMsg() const; 657 bool isSwizzle() const; 658 bool isSMRDOffset8() const; 659 bool isSMRDOffset20() const; 660 bool isSMRDLiteralOffset() const; 661 bool isDPP8() const; 662 bool isDPPCtrl() const; 663 bool isBLGP() const; 664 bool isCBSZ() const; 665 bool isABID() const; 666 bool isGPRIdxMode() const; 667 bool isS16Imm() const; 668 bool isU16Imm() const; 669 bool isEndpgm() const; 670 671 StringRef getExpressionAsToken() const { 672 assert(isExpr()); 673 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 674 return S->getSymbol().getName(); 675 } 676 677 StringRef getToken() const { 678 assert(isToken()); 679 680 if (Kind == Expression) 681 return getExpressionAsToken(); 682 683 return StringRef(Tok.Data, Tok.Length); 684 } 685 686 int64_t getImm() const { 687 assert(isImm()); 688 return Imm.Val; 689 } 690 691 ImmTy getImmTy() const { 692 assert(isImm()); 693 return Imm.Type; 694 } 695 696 unsigned getReg() const override { 697 assert(isRegKind()); 698 return Reg.RegNo; 699 } 700 701 SMLoc getStartLoc() const override { 702 return StartLoc; 703 } 704 705 SMLoc getEndLoc() const override { 706 return EndLoc; 707 } 708 709 SMRange getLocRange() const { 710 return SMRange(StartLoc, EndLoc); 711 } 712 713 Modifiers getModifiers() const { 714 assert(isRegKind() || isImmTy(ImmTyNone)); 715 return isRegKind() ? Reg.Mods : Imm.Mods; 716 } 717 718 void setModifiers(Modifiers Mods) { 719 assert(isRegKind() || isImmTy(ImmTyNone)); 720 if (isRegKind()) 721 Reg.Mods = Mods; 722 else 723 Imm.Mods = Mods; 724 } 725 726 bool hasModifiers() const { 727 return getModifiers().hasModifiers(); 728 } 729 730 bool hasFPModifiers() const { 731 return getModifiers().hasFPModifiers(); 732 } 733 734 bool hasIntModifiers() const { 735 return getModifiers().hasIntModifiers(); 736 } 737 738 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 739 740 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 741 742 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 743 744 template <unsigned Bitwidth> 745 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 746 747 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 748 addKImmFPOperands<16>(Inst, N); 749 } 750 751 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 752 addKImmFPOperands<32>(Inst, N); 753 } 754 755 void addRegOperands(MCInst &Inst, unsigned N) const; 756 757 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 758 addRegOperands(Inst, N); 759 } 760 761 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 762 if (isRegKind()) 763 addRegOperands(Inst, N); 764 else if (isExpr()) 765 Inst.addOperand(MCOperand::createExpr(Expr)); 766 else 767 addImmOperands(Inst, N); 768 } 769 770 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 771 Modifiers Mods = getModifiers(); 772 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 773 if (isRegKind()) { 774 addRegOperands(Inst, N); 775 } else { 776 addImmOperands(Inst, N, false); 777 } 778 } 779 780 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 781 assert(!hasIntModifiers()); 782 addRegOrImmWithInputModsOperands(Inst, N); 783 } 784 785 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 786 assert(!hasFPModifiers()); 787 addRegOrImmWithInputModsOperands(Inst, N); 788 } 789 790 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 791 Modifiers Mods = getModifiers(); 792 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 793 assert(isRegKind()); 794 addRegOperands(Inst, N); 795 } 796 797 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasIntModifiers()); 799 addRegWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 803 assert(!hasFPModifiers()); 804 addRegWithInputModsOperands(Inst, N); 805 } 806 807 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 808 if (isImm()) 809 addImmOperands(Inst, N); 810 else { 811 assert(isExpr()); 812 Inst.addOperand(MCOperand::createExpr(Expr)); 813 } 814 } 815 816 static void printImmTy(raw_ostream& OS, ImmTy Type) { 817 switch (Type) { 818 case ImmTyNone: OS << "None"; break; 819 case ImmTyGDS: OS << "GDS"; break; 820 case ImmTyLDS: OS << "LDS"; break; 821 case ImmTyOffen: OS << "Offen"; break; 822 case ImmTyIdxen: OS << "Idxen"; break; 823 case ImmTyAddr64: OS << "Addr64"; break; 824 case ImmTyOffset: OS << "Offset"; break; 825 case ImmTyInstOffset: OS << "InstOffset"; break; 826 case ImmTyOffset0: OS << "Offset0"; break; 827 case ImmTyOffset1: OS << "Offset1"; break; 828 case ImmTyDLC: OS << "DLC"; break; 829 case ImmTyGLC: OS << "GLC"; break; 830 case ImmTySLC: OS << "SLC"; break; 831 case ImmTySWZ: OS << "SWZ"; break; 832 case ImmTyTFE: OS << "TFE"; break; 833 case ImmTyD16: OS << "D16"; break; 834 case ImmTyFORMAT: OS << "FORMAT"; break; 835 case ImmTyClampSI: OS << "ClampSI"; break; 836 case ImmTyOModSI: OS << "OModSI"; break; 837 case ImmTyDPP8: OS << "DPP8"; break; 838 case ImmTyDppCtrl: OS << "DppCtrl"; break; 839 case ImmTyDppRowMask: OS << "DppRowMask"; break; 840 case ImmTyDppBankMask: OS << "DppBankMask"; break; 841 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 842 case ImmTyDppFi: OS << "FI"; break; 843 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 844 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 845 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 846 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 847 case ImmTyDMask: OS << "DMask"; break; 848 case ImmTyDim: OS << "Dim"; break; 849 case ImmTyUNorm: OS << "UNorm"; break; 850 case ImmTyDA: OS << "DA"; break; 851 case ImmTyR128A16: OS << "R128A16"; break; 852 case ImmTyA16: OS << "A16"; break; 853 case ImmTyLWE: OS << "LWE"; break; 854 case ImmTyOff: OS << "Off"; break; 855 case ImmTyExpTgt: OS << "ExpTgt"; break; 856 case ImmTyExpCompr: OS << "ExpCompr"; break; 857 case ImmTyExpVM: OS << "ExpVM"; break; 858 case ImmTyHwreg: OS << "Hwreg"; break; 859 case ImmTySendMsg: OS << "SendMsg"; break; 860 case ImmTyInterpSlot: OS << "InterpSlot"; break; 861 case ImmTyInterpAttr: OS << "InterpAttr"; break; 862 case ImmTyAttrChan: OS << "AttrChan"; break; 863 case ImmTyOpSel: OS << "OpSel"; break; 864 case ImmTyOpSelHi: OS << "OpSelHi"; break; 865 case ImmTyNegLo: OS << "NegLo"; break; 866 case ImmTyNegHi: OS << "NegHi"; break; 867 case ImmTySwizzle: OS << "Swizzle"; break; 868 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 869 case ImmTyHigh: OS << "High"; break; 870 case ImmTyBLGP: OS << "BLGP"; break; 871 case ImmTyCBSZ: OS << "CBSZ"; break; 872 case ImmTyABID: OS << "ABID"; break; 873 case ImmTyEndpgm: OS << "Endpgm"; break; 874 } 875 } 876 877 void print(raw_ostream &OS) const override { 878 switch (Kind) { 879 case Register: 880 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 881 break; 882 case Immediate: 883 OS << '<' << getImm(); 884 if (getImmTy() != ImmTyNone) { 885 OS << " type: "; printImmTy(OS, getImmTy()); 886 } 887 OS << " mods: " << Imm.Mods << '>'; 888 break; 889 case Token: 890 OS << '\'' << getToken() << '\''; 891 break; 892 case Expression: 893 OS << "<expr " << *Expr << '>'; 894 break; 895 } 896 } 897 898 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 899 int64_t Val, SMLoc Loc, 900 ImmTy Type = ImmTyNone, 901 bool IsFPImm = false) { 902 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 903 Op->Imm.Val = Val; 904 Op->Imm.IsFPImm = IsFPImm; 905 Op->Imm.Type = Type; 906 Op->Imm.Mods = Modifiers(); 907 Op->StartLoc = Loc; 908 Op->EndLoc = Loc; 909 return Op; 910 } 911 912 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 913 StringRef Str, SMLoc Loc, 914 bool HasExplicitEncodingSize = true) { 915 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 916 Res->Tok.Data = Str.data(); 917 Res->Tok.Length = Str.size(); 918 Res->StartLoc = Loc; 919 Res->EndLoc = Loc; 920 return Res; 921 } 922 923 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 924 unsigned RegNo, SMLoc S, 925 SMLoc E) { 926 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 927 Op->Reg.RegNo = RegNo; 928 Op->Reg.Mods = Modifiers(); 929 Op->StartLoc = S; 930 Op->EndLoc = E; 931 return Op; 932 } 933 934 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 935 const class MCExpr *Expr, SMLoc S) { 936 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 937 Op->Expr = Expr; 938 Op->StartLoc = S; 939 Op->EndLoc = S; 940 return Op; 941 } 942 }; 943 944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 945 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 946 return OS; 947 } 948 949 //===----------------------------------------------------------------------===// 950 // AsmParser 951 //===----------------------------------------------------------------------===// 952 953 // Holds info related to the current kernel, e.g. count of SGPRs used. 954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 955 // .amdgpu_hsa_kernel or at EOF. 956 class KernelScopeInfo { 957 int SgprIndexUnusedMin = -1; 958 int VgprIndexUnusedMin = -1; 959 MCContext *Ctx = nullptr; 960 961 void usesSgprAt(int i) { 962 if (i >= SgprIndexUnusedMin) { 963 SgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 void usesVgprAt(int i) { 972 if (i >= VgprIndexUnusedMin) { 973 VgprIndexUnusedMin = ++i; 974 if (Ctx) { 975 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 976 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 977 } 978 } 979 } 980 981 public: 982 KernelScopeInfo() = default; 983 984 void initialize(MCContext &Context) { 985 Ctx = &Context; 986 usesSgprAt(SgprIndexUnusedMin = -1); 987 usesVgprAt(VgprIndexUnusedMin = -1); 988 } 989 990 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 991 switch (RegKind) { 992 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 993 case IS_AGPR: // fall through 994 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 995 default: break; 996 } 997 } 998 }; 999 1000 class AMDGPUAsmParser : public MCTargetAsmParser { 1001 MCAsmParser &Parser; 1002 1003 // Number of extra operands parsed after the first optional operand. 1004 // This may be necessary to skip hardcoded mandatory operands. 1005 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1006 1007 unsigned ForcedEncodingSize = 0; 1008 bool ForcedDPP = false; 1009 bool ForcedSDWA = false; 1010 KernelScopeInfo KernelScope; 1011 1012 /// @name Auto-generated Match Functions 1013 /// { 1014 1015 #define GET_ASSEMBLER_HEADER 1016 #include "AMDGPUGenAsmMatcher.inc" 1017 1018 /// } 1019 1020 private: 1021 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1022 bool OutOfRangeError(SMRange Range); 1023 /// Calculate VGPR/SGPR blocks required for given target, reserved 1024 /// registers, and user-specified NextFreeXGPR values. 1025 /// 1026 /// \param Features [in] Target features, used for bug corrections. 1027 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1028 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1029 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1030 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1031 /// descriptor field, if valid. 1032 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1033 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1034 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1035 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1036 /// \param VGPRBlocks [out] Result VGPR block count. 1037 /// \param SGPRBlocks [out] Result SGPR block count. 1038 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1039 bool FlatScrUsed, bool XNACKUsed, 1040 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1041 SMRange VGPRRange, unsigned NextFreeSGPR, 1042 SMRange SGPRRange, unsigned &VGPRBlocks, 1043 unsigned &SGPRBlocks); 1044 bool ParseDirectiveAMDGCNTarget(); 1045 bool ParseDirectiveAMDHSAKernel(); 1046 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1047 bool ParseDirectiveHSACodeObjectVersion(); 1048 bool ParseDirectiveHSACodeObjectISA(); 1049 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1050 bool ParseDirectiveAMDKernelCodeT(); 1051 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1052 bool ParseDirectiveAMDGPUHsaKernel(); 1053 1054 bool ParseDirectiveISAVersion(); 1055 bool ParseDirectiveHSAMetadata(); 1056 bool ParseDirectivePALMetadataBegin(); 1057 bool ParseDirectivePALMetadata(); 1058 bool ParseDirectiveAMDGPULDS(); 1059 1060 /// Common code to parse out a block of text (typically YAML) between start and 1061 /// end directives. 1062 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1063 const char *AssemblerDirectiveEnd, 1064 std::string &CollectString); 1065 1066 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1067 RegisterKind RegKind, unsigned Reg1); 1068 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1069 unsigned &RegNum, unsigned &RegWidth, 1070 bool RestoreOnFailure = false); 1071 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1072 unsigned &RegNum, unsigned &RegWidth, 1073 SmallVectorImpl<AsmToken> &Tokens); 1074 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1075 unsigned &RegWidth, 1076 SmallVectorImpl<AsmToken> &Tokens); 1077 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1078 unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1082 bool ParseRegRange(unsigned& Num, unsigned& Width); 1083 unsigned getRegularReg(RegisterKind RegKind, 1084 unsigned RegNum, 1085 unsigned RegWidth); 1086 1087 bool isRegister(); 1088 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1089 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1090 void initializeGprCountSymbol(RegisterKind RegKind); 1091 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1092 unsigned RegWidth); 1093 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1094 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1095 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1096 bool IsGdsHardcoded); 1097 1098 public: 1099 enum AMDGPUMatchResultTy { 1100 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1101 }; 1102 enum OperandMode { 1103 OperandMode_Default, 1104 OperandMode_NSA, 1105 }; 1106 1107 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1108 1109 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1110 const MCInstrInfo &MII, 1111 const MCTargetOptions &Options) 1112 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1113 MCAsmParserExtension::Initialize(Parser); 1114 1115 if (getFeatureBits().none()) { 1116 // Set default features. 1117 copySTI().ToggleFeature("southern-islands"); 1118 } 1119 1120 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1121 1122 { 1123 // TODO: make those pre-defined variables read-only. 1124 // Currently there is none suitable machinery in the core llvm-mc for this. 1125 // MCSymbol::isRedefinable is intended for another purpose, and 1126 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1127 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1128 MCContext &Ctx = getContext(); 1129 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1130 MCSymbol *Sym = 1131 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1132 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1133 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1135 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1136 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1137 } else { 1138 MCSymbol *Sym = 1139 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1140 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1141 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1143 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1144 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1145 } 1146 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1147 initializeGprCountSymbol(IS_VGPR); 1148 initializeGprCountSymbol(IS_SGPR); 1149 } else 1150 KernelScope.initialize(getContext()); 1151 } 1152 } 1153 1154 bool hasXNACK() const { 1155 return AMDGPU::hasXNACK(getSTI()); 1156 } 1157 1158 bool hasMIMG_R128() const { 1159 return AMDGPU::hasMIMG_R128(getSTI()); 1160 } 1161 1162 bool hasPackedD16() const { 1163 return AMDGPU::hasPackedD16(getSTI()); 1164 } 1165 1166 bool hasGFX10A16() const { 1167 return AMDGPU::hasGFX10A16(getSTI()); 1168 } 1169 1170 bool isSI() const { 1171 return AMDGPU::isSI(getSTI()); 1172 } 1173 1174 bool isCI() const { 1175 return AMDGPU::isCI(getSTI()); 1176 } 1177 1178 bool isVI() const { 1179 return AMDGPU::isVI(getSTI()); 1180 } 1181 1182 bool isGFX9() const { 1183 return AMDGPU::isGFX9(getSTI()); 1184 } 1185 1186 bool isGFX10() const { 1187 return AMDGPU::isGFX10(getSTI()); 1188 } 1189 1190 bool hasInv2PiInlineImm() const { 1191 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1192 } 1193 1194 bool hasFlatOffsets() const { 1195 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1196 } 1197 1198 bool hasSGPR102_SGPR103() const { 1199 return !isVI() && !isGFX9(); 1200 } 1201 1202 bool hasSGPR104_SGPR105() const { 1203 return isGFX10(); 1204 } 1205 1206 bool hasIntClamp() const { 1207 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1208 } 1209 1210 AMDGPUTargetStreamer &getTargetStreamer() { 1211 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1212 return static_cast<AMDGPUTargetStreamer &>(TS); 1213 } 1214 1215 const MCRegisterInfo *getMRI() const { 1216 // We need this const_cast because for some reason getContext() is not const 1217 // in MCAsmParser. 1218 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1219 } 1220 1221 const MCInstrInfo *getMII() const { 1222 return &MII; 1223 } 1224 1225 const FeatureBitset &getFeatureBits() const { 1226 return getSTI().getFeatureBits(); 1227 } 1228 1229 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1230 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1231 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1232 1233 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1234 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1235 bool isForcedDPP() const { return ForcedDPP; } 1236 bool isForcedSDWA() const { return ForcedSDWA; } 1237 ArrayRef<unsigned> getMatchedVariants() const; 1238 1239 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1240 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1241 bool RestoreOnFailure); 1242 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1243 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1244 SMLoc &EndLoc) override; 1245 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1246 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1247 unsigned Kind) override; 1248 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1249 OperandVector &Operands, MCStreamer &Out, 1250 uint64_t &ErrorInfo, 1251 bool MatchingInlineAsm) override; 1252 bool ParseDirective(AsmToken DirectiveID) override; 1253 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1254 OperandMode Mode = OperandMode_Default); 1255 StringRef parseMnemonicSuffix(StringRef Name); 1256 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1257 SMLoc NameLoc, OperandVector &Operands) override; 1258 //bool ProcessInstruction(MCInst &Inst); 1259 1260 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1261 1262 OperandMatchResultTy 1263 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1264 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1265 bool (*ConvertResult)(int64_t &) = nullptr); 1266 1267 OperandMatchResultTy 1268 parseOperandArrayWithPrefix(const char *Prefix, 1269 OperandVector &Operands, 1270 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1271 bool (*ConvertResult)(int64_t&) = nullptr); 1272 1273 OperandMatchResultTy 1274 parseNamedBit(const char *Name, OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1276 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1277 StringRef &Value); 1278 1279 bool isModifier(); 1280 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1281 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1282 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1283 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1284 bool parseSP3NegModifier(); 1285 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1286 OperandMatchResultTy parseReg(OperandVector &Operands); 1287 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1288 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1289 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1290 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1291 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1292 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1293 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1294 1295 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1296 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1297 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1298 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1299 1300 bool parseCnt(int64_t &IntVal); 1301 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1302 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1303 1304 private: 1305 struct OperandInfoTy { 1306 int64_t Id; 1307 bool IsSymbolic = false; 1308 bool IsDefined = false; 1309 1310 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1311 }; 1312 1313 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1314 bool validateSendMsg(const OperandInfoTy &Msg, 1315 const OperandInfoTy &Op, 1316 const OperandInfoTy &Stream, 1317 const SMLoc Loc); 1318 1319 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1320 bool validateHwreg(const OperandInfoTy &HwReg, 1321 const int64_t Offset, 1322 const int64_t Width, 1323 const SMLoc Loc); 1324 1325 void errorExpTgt(); 1326 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1327 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1328 1329 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1330 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1331 bool validateSOPLiteral(const MCInst &Inst) const; 1332 bool validateConstantBusLimitations(const MCInst &Inst); 1333 bool validateEarlyClobberLimitations(const MCInst &Inst); 1334 bool validateIntClampSupported(const MCInst &Inst); 1335 bool validateMIMGAtomicDMask(const MCInst &Inst); 1336 bool validateMIMGGatherDMask(const MCInst &Inst); 1337 bool validateMovrels(const MCInst &Inst); 1338 bool validateMIMGDataSize(const MCInst &Inst); 1339 bool validateMIMGAddrSize(const MCInst &Inst); 1340 bool validateMIMGD16(const MCInst &Inst); 1341 bool validateMIMGDim(const MCInst &Inst); 1342 bool validateLdsDirect(const MCInst &Inst); 1343 bool validateOpSel(const MCInst &Inst); 1344 bool validateVccOperand(unsigned Reg) const; 1345 bool validateVOP3Literal(const MCInst &Inst) const; 1346 unsigned getConstantBusLimit(unsigned Opcode) const; 1347 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1348 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1349 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1350 1351 bool isId(const StringRef Id) const; 1352 bool isId(const AsmToken &Token, const StringRef Id) const; 1353 bool isToken(const AsmToken::TokenKind Kind) const; 1354 bool trySkipId(const StringRef Id); 1355 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1356 bool trySkipToken(const AsmToken::TokenKind Kind); 1357 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1358 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1359 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1360 AsmToken::TokenKind getTokenKind() const; 1361 bool parseExpr(int64_t &Imm); 1362 bool parseExpr(OperandVector &Operands); 1363 StringRef getTokenStr() const; 1364 AsmToken peekToken(); 1365 AsmToken getToken() const; 1366 SMLoc getLoc() const; 1367 void lex(); 1368 1369 public: 1370 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1371 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1372 1373 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1374 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1375 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1376 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1377 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1378 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1379 1380 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1381 const unsigned MinVal, 1382 const unsigned MaxVal, 1383 const StringRef ErrMsg); 1384 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1385 bool parseSwizzleOffset(int64_t &Imm); 1386 bool parseSwizzleMacro(int64_t &Imm); 1387 bool parseSwizzleQuadPerm(int64_t &Imm); 1388 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1389 bool parseSwizzleBroadcast(int64_t &Imm); 1390 bool parseSwizzleSwap(int64_t &Imm); 1391 bool parseSwizzleReverse(int64_t &Imm); 1392 1393 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1394 int64_t parseGPRIdxMacro(); 1395 1396 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1397 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1398 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1399 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1400 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1401 1402 AMDGPUOperand::Ptr defaultDLC() const; 1403 AMDGPUOperand::Ptr defaultGLC() const; 1404 AMDGPUOperand::Ptr defaultSLC() const; 1405 1406 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1407 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1408 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1409 AMDGPUOperand::Ptr defaultFlatOffset() const; 1410 1411 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1412 1413 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1414 OptionalImmIndexMap &OptionalIdx); 1415 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1416 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1417 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1418 1419 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1420 1421 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1422 bool IsAtomic = false); 1423 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1424 1425 OperandMatchResultTy parseDim(OperandVector &Operands); 1426 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1427 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1428 AMDGPUOperand::Ptr defaultRowMask() const; 1429 AMDGPUOperand::Ptr defaultBankMask() const; 1430 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1431 AMDGPUOperand::Ptr defaultFI() const; 1432 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1433 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1434 1435 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1436 AMDGPUOperand::ImmTy Type); 1437 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1438 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1439 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1440 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1441 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1442 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1443 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1444 uint64_t BasicInstType, 1445 bool SkipDstVcc = false, 1446 bool SkipSrcVcc = false); 1447 1448 AMDGPUOperand::Ptr defaultBLGP() const; 1449 AMDGPUOperand::Ptr defaultCBSZ() const; 1450 AMDGPUOperand::Ptr defaultABID() const; 1451 1452 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1453 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1454 }; 1455 1456 struct OptionalOperand { 1457 const char *Name; 1458 AMDGPUOperand::ImmTy Type; 1459 bool IsBit; 1460 bool (*ConvertResult)(int64_t&); 1461 }; 1462 1463 } // end anonymous namespace 1464 1465 // May be called with integer type with equivalent bitwidth. 1466 static const fltSemantics *getFltSemantics(unsigned Size) { 1467 switch (Size) { 1468 case 4: 1469 return &APFloat::IEEEsingle(); 1470 case 8: 1471 return &APFloat::IEEEdouble(); 1472 case 2: 1473 return &APFloat::IEEEhalf(); 1474 default: 1475 llvm_unreachable("unsupported fp type"); 1476 } 1477 } 1478 1479 static const fltSemantics *getFltSemantics(MVT VT) { 1480 return getFltSemantics(VT.getSizeInBits() / 8); 1481 } 1482 1483 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1484 switch (OperandType) { 1485 case AMDGPU::OPERAND_REG_IMM_INT32: 1486 case AMDGPU::OPERAND_REG_IMM_FP32: 1487 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1488 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1489 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1490 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1491 return &APFloat::IEEEsingle(); 1492 case AMDGPU::OPERAND_REG_IMM_INT64: 1493 case AMDGPU::OPERAND_REG_IMM_FP64: 1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1496 return &APFloat::IEEEdouble(); 1497 case AMDGPU::OPERAND_REG_IMM_INT16: 1498 case AMDGPU::OPERAND_REG_IMM_FP16: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1501 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1502 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1503 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1504 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1505 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1506 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1507 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1508 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1509 return &APFloat::IEEEhalf(); 1510 default: 1511 llvm_unreachable("unsupported fp type"); 1512 } 1513 } 1514 1515 //===----------------------------------------------------------------------===// 1516 // Operand 1517 //===----------------------------------------------------------------------===// 1518 1519 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1520 bool Lost; 1521 1522 // Convert literal to single precision 1523 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1524 APFloat::rmNearestTiesToEven, 1525 &Lost); 1526 // We allow precision lost but not overflow or underflow 1527 if (Status != APFloat::opOK && 1528 Lost && 1529 ((Status & APFloat::opOverflow) != 0 || 1530 (Status & APFloat::opUnderflow) != 0)) { 1531 return false; 1532 } 1533 1534 return true; 1535 } 1536 1537 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1538 return isUIntN(Size, Val) || isIntN(Size, Val); 1539 } 1540 1541 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1542 1543 // This is a hack to enable named inline values like 1544 // shared_base with both 32-bit and 64-bit operands. 1545 // Note that these values are defined as 1546 // 32-bit operands only. 1547 if (isInlineValue()) { 1548 return true; 1549 } 1550 1551 if (!isImmTy(ImmTyNone)) { 1552 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1553 return false; 1554 } 1555 // TODO: We should avoid using host float here. It would be better to 1556 // check the float bit values which is what a few other places do. 1557 // We've had bot failures before due to weird NaN support on mips hosts. 1558 1559 APInt Literal(64, Imm.Val); 1560 1561 if (Imm.IsFPImm) { // We got fp literal token 1562 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1563 return AMDGPU::isInlinableLiteral64(Imm.Val, 1564 AsmParser->hasInv2PiInlineImm()); 1565 } 1566 1567 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1568 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1569 return false; 1570 1571 if (type.getScalarSizeInBits() == 16) { 1572 return AMDGPU::isInlinableLiteral16( 1573 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1574 AsmParser->hasInv2PiInlineImm()); 1575 } 1576 1577 // Check if single precision literal is inlinable 1578 return AMDGPU::isInlinableLiteral32( 1579 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1580 AsmParser->hasInv2PiInlineImm()); 1581 } 1582 1583 // We got int literal token. 1584 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1585 return AMDGPU::isInlinableLiteral64(Imm.Val, 1586 AsmParser->hasInv2PiInlineImm()); 1587 } 1588 1589 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1590 return false; 1591 } 1592 1593 if (type.getScalarSizeInBits() == 16) { 1594 return AMDGPU::isInlinableLiteral16( 1595 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1596 AsmParser->hasInv2PiInlineImm()); 1597 } 1598 1599 return AMDGPU::isInlinableLiteral32( 1600 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1601 AsmParser->hasInv2PiInlineImm()); 1602 } 1603 1604 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1605 // Check that this immediate can be added as literal 1606 if (!isImmTy(ImmTyNone)) { 1607 return false; 1608 } 1609 1610 if (!Imm.IsFPImm) { 1611 // We got int literal token. 1612 1613 if (type == MVT::f64 && hasFPModifiers()) { 1614 // Cannot apply fp modifiers to int literals preserving the same semantics 1615 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1616 // disable these cases. 1617 return false; 1618 } 1619 1620 unsigned Size = type.getSizeInBits(); 1621 if (Size == 64) 1622 Size = 32; 1623 1624 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1625 // types. 1626 return isSafeTruncation(Imm.Val, Size); 1627 } 1628 1629 // We got fp literal token 1630 if (type == MVT::f64) { // Expected 64-bit fp operand 1631 // We would set low 64-bits of literal to zeroes but we accept this literals 1632 return true; 1633 } 1634 1635 if (type == MVT::i64) { // Expected 64-bit int operand 1636 // We don't allow fp literals in 64-bit integer instructions. It is 1637 // unclear how we should encode them. 1638 return false; 1639 } 1640 1641 // We allow fp literals with f16x2 operands assuming that the specified 1642 // literal goes into the lower half and the upper half is zero. We also 1643 // require that the literal may be losslesly converted to f16. 1644 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1645 (type == MVT::v2i16)? MVT::i16 : type; 1646 1647 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1648 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1649 } 1650 1651 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1652 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1653 } 1654 1655 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1656 if (AsmParser->isVI()) 1657 return isVReg32(); 1658 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1659 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1660 else 1661 return false; 1662 } 1663 1664 bool AMDGPUOperand::isSDWAFP16Operand() const { 1665 return isSDWAOperand(MVT::f16); 1666 } 1667 1668 bool AMDGPUOperand::isSDWAFP32Operand() const { 1669 return isSDWAOperand(MVT::f32); 1670 } 1671 1672 bool AMDGPUOperand::isSDWAInt16Operand() const { 1673 return isSDWAOperand(MVT::i16); 1674 } 1675 1676 bool AMDGPUOperand::isSDWAInt32Operand() const { 1677 return isSDWAOperand(MVT::i32); 1678 } 1679 1680 bool AMDGPUOperand::isBoolReg() const { 1681 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1682 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1683 } 1684 1685 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1686 { 1687 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1688 assert(Size == 2 || Size == 4 || Size == 8); 1689 1690 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1691 1692 if (Imm.Mods.Abs) { 1693 Val &= ~FpSignMask; 1694 } 1695 if (Imm.Mods.Neg) { 1696 Val ^= FpSignMask; 1697 } 1698 1699 return Val; 1700 } 1701 1702 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1703 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1704 Inst.getNumOperands())) { 1705 addLiteralImmOperand(Inst, Imm.Val, 1706 ApplyModifiers & 1707 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1708 } else { 1709 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1710 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1711 } 1712 } 1713 1714 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1715 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1716 auto OpNum = Inst.getNumOperands(); 1717 // Check that this operand accepts literals 1718 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1719 1720 if (ApplyModifiers) { 1721 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1722 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1723 Val = applyInputFPModifiers(Val, Size); 1724 } 1725 1726 APInt Literal(64, Val); 1727 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1728 1729 if (Imm.IsFPImm) { // We got fp literal token 1730 switch (OpTy) { 1731 case AMDGPU::OPERAND_REG_IMM_INT64: 1732 case AMDGPU::OPERAND_REG_IMM_FP64: 1733 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1734 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1735 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1736 AsmParser->hasInv2PiInlineImm())) { 1737 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1738 return; 1739 } 1740 1741 // Non-inlineable 1742 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1743 // For fp operands we check if low 32 bits are zeros 1744 if (Literal.getLoBits(32) != 0) { 1745 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1746 "Can't encode literal as exact 64-bit floating-point operand. " 1747 "Low 32-bits will be set to zero"); 1748 } 1749 1750 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1751 return; 1752 } 1753 1754 // We don't allow fp literals in 64-bit integer instructions. It is 1755 // unclear how we should encode them. This case should be checked earlier 1756 // in predicate methods (isLiteralImm()) 1757 llvm_unreachable("fp literal in 64-bit integer instruction."); 1758 1759 case AMDGPU::OPERAND_REG_IMM_INT32: 1760 case AMDGPU::OPERAND_REG_IMM_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1764 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1765 case AMDGPU::OPERAND_REG_IMM_INT16: 1766 case AMDGPU::OPERAND_REG_IMM_FP16: 1767 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1768 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1769 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1771 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1772 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1773 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1774 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1775 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1776 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1777 bool lost; 1778 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1779 // Convert literal to single precision 1780 FPLiteral.convert(*getOpFltSemantics(OpTy), 1781 APFloat::rmNearestTiesToEven, &lost); 1782 // We allow precision lost but not overflow or underflow. This should be 1783 // checked earlier in isLiteralImm() 1784 1785 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1786 Inst.addOperand(MCOperand::createImm(ImmVal)); 1787 return; 1788 } 1789 default: 1790 llvm_unreachable("invalid operand size"); 1791 } 1792 1793 return; 1794 } 1795 1796 // We got int literal token. 1797 // Only sign extend inline immediates. 1798 switch (OpTy) { 1799 case AMDGPU::OPERAND_REG_IMM_INT32: 1800 case AMDGPU::OPERAND_REG_IMM_FP32: 1801 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1802 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1803 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1805 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1806 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1807 if (isSafeTruncation(Val, 32) && 1808 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1809 AsmParser->hasInv2PiInlineImm())) { 1810 Inst.addOperand(MCOperand::createImm(Val)); 1811 return; 1812 } 1813 1814 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1815 return; 1816 1817 case AMDGPU::OPERAND_REG_IMM_INT64: 1818 case AMDGPU::OPERAND_REG_IMM_FP64: 1819 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1820 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1821 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1822 Inst.addOperand(MCOperand::createImm(Val)); 1823 return; 1824 } 1825 1826 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1827 return; 1828 1829 case AMDGPU::OPERAND_REG_IMM_INT16: 1830 case AMDGPU::OPERAND_REG_IMM_FP16: 1831 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1832 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1833 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1834 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1835 if (isSafeTruncation(Val, 16) && 1836 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1837 AsmParser->hasInv2PiInlineImm())) { 1838 Inst.addOperand(MCOperand::createImm(Val)); 1839 return; 1840 } 1841 1842 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1843 return; 1844 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1849 assert(isSafeTruncation(Val, 16)); 1850 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1851 AsmParser->hasInv2PiInlineImm())); 1852 1853 Inst.addOperand(MCOperand::createImm(Val)); 1854 return; 1855 } 1856 default: 1857 llvm_unreachable("invalid operand size"); 1858 } 1859 } 1860 1861 template <unsigned Bitwidth> 1862 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1863 APInt Literal(64, Imm.Val); 1864 1865 if (!Imm.IsFPImm) { 1866 // We got int literal token. 1867 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1868 return; 1869 } 1870 1871 bool Lost; 1872 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1873 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1874 APFloat::rmNearestTiesToEven, &Lost); 1875 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1876 } 1877 1878 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1879 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1880 } 1881 1882 static bool isInlineValue(unsigned Reg) { 1883 switch (Reg) { 1884 case AMDGPU::SRC_SHARED_BASE: 1885 case AMDGPU::SRC_SHARED_LIMIT: 1886 case AMDGPU::SRC_PRIVATE_BASE: 1887 case AMDGPU::SRC_PRIVATE_LIMIT: 1888 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1889 return true; 1890 case AMDGPU::SRC_VCCZ: 1891 case AMDGPU::SRC_EXECZ: 1892 case AMDGPU::SRC_SCC: 1893 return true; 1894 case AMDGPU::SGPR_NULL: 1895 return true; 1896 default: 1897 return false; 1898 } 1899 } 1900 1901 bool AMDGPUOperand::isInlineValue() const { 1902 return isRegKind() && ::isInlineValue(getReg()); 1903 } 1904 1905 //===----------------------------------------------------------------------===// 1906 // AsmParser 1907 //===----------------------------------------------------------------------===// 1908 1909 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1910 if (Is == IS_VGPR) { 1911 switch (RegWidth) { 1912 default: return -1; 1913 case 1: return AMDGPU::VGPR_32RegClassID; 1914 case 2: return AMDGPU::VReg_64RegClassID; 1915 case 3: return AMDGPU::VReg_96RegClassID; 1916 case 4: return AMDGPU::VReg_128RegClassID; 1917 case 5: return AMDGPU::VReg_160RegClassID; 1918 case 8: return AMDGPU::VReg_256RegClassID; 1919 case 16: return AMDGPU::VReg_512RegClassID; 1920 case 32: return AMDGPU::VReg_1024RegClassID; 1921 } 1922 } else if (Is == IS_TTMP) { 1923 switch (RegWidth) { 1924 default: return -1; 1925 case 1: return AMDGPU::TTMP_32RegClassID; 1926 case 2: return AMDGPU::TTMP_64RegClassID; 1927 case 4: return AMDGPU::TTMP_128RegClassID; 1928 case 8: return AMDGPU::TTMP_256RegClassID; 1929 case 16: return AMDGPU::TTMP_512RegClassID; 1930 } 1931 } else if (Is == IS_SGPR) { 1932 switch (RegWidth) { 1933 default: return -1; 1934 case 1: return AMDGPU::SGPR_32RegClassID; 1935 case 2: return AMDGPU::SGPR_64RegClassID; 1936 case 4: return AMDGPU::SGPR_128RegClassID; 1937 case 8: return AMDGPU::SGPR_256RegClassID; 1938 case 16: return AMDGPU::SGPR_512RegClassID; 1939 } 1940 } else if (Is == IS_AGPR) { 1941 switch (RegWidth) { 1942 default: return -1; 1943 case 1: return AMDGPU::AGPR_32RegClassID; 1944 case 2: return AMDGPU::AReg_64RegClassID; 1945 case 4: return AMDGPU::AReg_128RegClassID; 1946 case 16: return AMDGPU::AReg_512RegClassID; 1947 case 32: return AMDGPU::AReg_1024RegClassID; 1948 } 1949 } 1950 return -1; 1951 } 1952 1953 static unsigned getSpecialRegForName(StringRef RegName) { 1954 return StringSwitch<unsigned>(RegName) 1955 .Case("exec", AMDGPU::EXEC) 1956 .Case("vcc", AMDGPU::VCC) 1957 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1958 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1959 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1960 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1961 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1962 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1963 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1964 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1965 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1966 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1967 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1968 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1969 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1970 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1971 .Case("m0", AMDGPU::M0) 1972 .Case("vccz", AMDGPU::SRC_VCCZ) 1973 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1974 .Case("execz", AMDGPU::SRC_EXECZ) 1975 .Case("src_execz", AMDGPU::SRC_EXECZ) 1976 .Case("scc", AMDGPU::SRC_SCC) 1977 .Case("src_scc", AMDGPU::SRC_SCC) 1978 .Case("tba", AMDGPU::TBA) 1979 .Case("tma", AMDGPU::TMA) 1980 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1981 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1982 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1983 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1984 .Case("vcc_lo", AMDGPU::VCC_LO) 1985 .Case("vcc_hi", AMDGPU::VCC_HI) 1986 .Case("exec_lo", AMDGPU::EXEC_LO) 1987 .Case("exec_hi", AMDGPU::EXEC_HI) 1988 .Case("tma_lo", AMDGPU::TMA_LO) 1989 .Case("tma_hi", AMDGPU::TMA_HI) 1990 .Case("tba_lo", AMDGPU::TBA_LO) 1991 .Case("tba_hi", AMDGPU::TBA_HI) 1992 .Case("null", AMDGPU::SGPR_NULL) 1993 .Default(AMDGPU::NoRegister); 1994 } 1995 1996 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1997 SMLoc &EndLoc, bool RestoreOnFailure) { 1998 auto R = parseRegister(); 1999 if (!R) return true; 2000 assert(R->isReg()); 2001 RegNo = R->getReg(); 2002 StartLoc = R->getStartLoc(); 2003 EndLoc = R->getEndLoc(); 2004 return false; 2005 } 2006 2007 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2008 SMLoc &EndLoc) { 2009 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2010 } 2011 2012 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2013 SMLoc &StartLoc, 2014 SMLoc &EndLoc) { 2015 bool Result = 2016 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2017 bool PendingErrors = getParser().hasPendingError(); 2018 getParser().clearPendingErrors(); 2019 if (PendingErrors) 2020 return MatchOperand_ParseFail; 2021 if (Result) 2022 return MatchOperand_NoMatch; 2023 return MatchOperand_Success; 2024 } 2025 2026 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2027 RegisterKind RegKind, unsigned Reg1) { 2028 switch (RegKind) { 2029 case IS_SPECIAL: 2030 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2031 Reg = AMDGPU::EXEC; 2032 RegWidth = 2; 2033 return true; 2034 } 2035 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2036 Reg = AMDGPU::FLAT_SCR; 2037 RegWidth = 2; 2038 return true; 2039 } 2040 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2041 Reg = AMDGPU::XNACK_MASK; 2042 RegWidth = 2; 2043 return true; 2044 } 2045 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2046 Reg = AMDGPU::VCC; 2047 RegWidth = 2; 2048 return true; 2049 } 2050 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2051 Reg = AMDGPU::TBA; 2052 RegWidth = 2; 2053 return true; 2054 } 2055 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2056 Reg = AMDGPU::TMA; 2057 RegWidth = 2; 2058 return true; 2059 } 2060 return false; 2061 case IS_VGPR: 2062 case IS_SGPR: 2063 case IS_AGPR: 2064 case IS_TTMP: 2065 if (Reg1 != Reg + RegWidth) { 2066 return false; 2067 } 2068 RegWidth++; 2069 return true; 2070 default: 2071 llvm_unreachable("unexpected register kind"); 2072 } 2073 } 2074 2075 struct RegInfo { 2076 StringLiteral Name; 2077 RegisterKind Kind; 2078 }; 2079 2080 static constexpr RegInfo RegularRegisters[] = { 2081 {{"v"}, IS_VGPR}, 2082 {{"s"}, IS_SGPR}, 2083 {{"ttmp"}, IS_TTMP}, 2084 {{"acc"}, IS_AGPR}, 2085 {{"a"}, IS_AGPR}, 2086 }; 2087 2088 static bool isRegularReg(RegisterKind Kind) { 2089 return Kind == IS_VGPR || 2090 Kind == IS_SGPR || 2091 Kind == IS_TTMP || 2092 Kind == IS_AGPR; 2093 } 2094 2095 static const RegInfo* getRegularRegInfo(StringRef Str) { 2096 for (const RegInfo &Reg : RegularRegisters) 2097 if (Str.startswith(Reg.Name)) 2098 return &Reg; 2099 return nullptr; 2100 } 2101 2102 static bool getRegNum(StringRef Str, unsigned& Num) { 2103 return !Str.getAsInteger(10, Num); 2104 } 2105 2106 bool 2107 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2108 const AsmToken &NextToken) const { 2109 2110 // A list of consecutive registers: [s0,s1,s2,s3] 2111 if (Token.is(AsmToken::LBrac)) 2112 return true; 2113 2114 if (!Token.is(AsmToken::Identifier)) 2115 return false; 2116 2117 // A single register like s0 or a range of registers like s[0:1] 2118 2119 StringRef Str = Token.getString(); 2120 const RegInfo *Reg = getRegularRegInfo(Str); 2121 if (Reg) { 2122 StringRef RegName = Reg->Name; 2123 StringRef RegSuffix = Str.substr(RegName.size()); 2124 if (!RegSuffix.empty()) { 2125 unsigned Num; 2126 // A single register with an index: rXX 2127 if (getRegNum(RegSuffix, Num)) 2128 return true; 2129 } else { 2130 // A range of registers: r[XX:YY]. 2131 if (NextToken.is(AsmToken::LBrac)) 2132 return true; 2133 } 2134 } 2135 2136 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2137 } 2138 2139 bool 2140 AMDGPUAsmParser::isRegister() 2141 { 2142 return isRegister(getToken(), peekToken()); 2143 } 2144 2145 unsigned 2146 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2147 unsigned RegNum, 2148 unsigned RegWidth) { 2149 2150 assert(isRegularReg(RegKind)); 2151 2152 unsigned AlignSize = 1; 2153 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2154 // SGPR and TTMP registers must be aligned. 2155 // Max required alignment is 4 dwords. 2156 AlignSize = std::min(RegWidth, 4u); 2157 } 2158 2159 if (RegNum % AlignSize != 0) 2160 return AMDGPU::NoRegister; 2161 2162 unsigned RegIdx = RegNum / AlignSize; 2163 int RCID = getRegClass(RegKind, RegWidth); 2164 if (RCID == -1) 2165 return AMDGPU::NoRegister; 2166 2167 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2168 const MCRegisterClass RC = TRI->getRegClass(RCID); 2169 if (RegIdx >= RC.getNumRegs()) 2170 return AMDGPU::NoRegister; 2171 2172 return RC.getRegister(RegIdx); 2173 } 2174 2175 bool 2176 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2177 int64_t RegLo, RegHi; 2178 if (!trySkipToken(AsmToken::LBrac)) 2179 return false; 2180 2181 if (!parseExpr(RegLo)) 2182 return false; 2183 2184 if (trySkipToken(AsmToken::Colon)) { 2185 if (!parseExpr(RegHi)) 2186 return false; 2187 } else { 2188 RegHi = RegLo; 2189 } 2190 2191 if (!trySkipToken(AsmToken::RBrac)) 2192 return false; 2193 2194 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2195 return false; 2196 2197 Num = static_cast<unsigned>(RegLo); 2198 Width = (RegHi - RegLo) + 1; 2199 return true; 2200 } 2201 2202 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2203 unsigned &RegNum, unsigned &RegWidth, 2204 SmallVectorImpl<AsmToken> &Tokens) { 2205 assert(isToken(AsmToken::Identifier)); 2206 unsigned Reg = getSpecialRegForName(getTokenStr()); 2207 if (Reg) { 2208 RegNum = 0; 2209 RegWidth = 1; 2210 RegKind = IS_SPECIAL; 2211 Tokens.push_back(getToken()); 2212 lex(); // skip register name 2213 } 2214 return Reg; 2215 } 2216 2217 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2218 unsigned &RegNum, unsigned &RegWidth, 2219 SmallVectorImpl<AsmToken> &Tokens) { 2220 assert(isToken(AsmToken::Identifier)); 2221 StringRef RegName = getTokenStr(); 2222 2223 const RegInfo *RI = getRegularRegInfo(RegName); 2224 if (!RI) 2225 return AMDGPU::NoRegister; 2226 Tokens.push_back(getToken()); 2227 lex(); // skip register name 2228 2229 RegKind = RI->Kind; 2230 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2231 if (!RegSuffix.empty()) { 2232 // Single 32-bit register: vXX. 2233 if (!getRegNum(RegSuffix, RegNum)) 2234 return AMDGPU::NoRegister; 2235 RegWidth = 1; 2236 } else { 2237 // Range of registers: v[XX:YY]. ":YY" is optional. 2238 if (!ParseRegRange(RegNum, RegWidth)) 2239 return AMDGPU::NoRegister; 2240 } 2241 2242 return getRegularReg(RegKind, RegNum, RegWidth); 2243 } 2244 2245 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2246 unsigned &RegWidth, 2247 SmallVectorImpl<AsmToken> &Tokens) { 2248 unsigned Reg = AMDGPU::NoRegister; 2249 2250 if (!trySkipToken(AsmToken::LBrac)) 2251 return AMDGPU::NoRegister; 2252 2253 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2254 2255 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2256 return AMDGPU::NoRegister; 2257 if (RegWidth != 1) 2258 return AMDGPU::NoRegister; 2259 2260 for (; trySkipToken(AsmToken::Comma); ) { 2261 RegisterKind NextRegKind; 2262 unsigned NextReg, NextRegNum, NextRegWidth; 2263 2264 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2265 Tokens)) 2266 return AMDGPU::NoRegister; 2267 if (NextRegWidth != 1) 2268 return AMDGPU::NoRegister; 2269 if (NextRegKind != RegKind) 2270 return AMDGPU::NoRegister; 2271 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2272 return AMDGPU::NoRegister; 2273 } 2274 2275 if (!trySkipToken(AsmToken::RBrac)) 2276 return AMDGPU::NoRegister; 2277 2278 if (isRegularReg(RegKind)) 2279 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2280 2281 return Reg; 2282 } 2283 2284 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2285 unsigned &RegNum, unsigned &RegWidth, 2286 SmallVectorImpl<AsmToken> &Tokens) { 2287 Reg = AMDGPU::NoRegister; 2288 2289 if (isToken(AsmToken::Identifier)) { 2290 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2291 if (Reg == AMDGPU::NoRegister) 2292 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2293 } else { 2294 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2295 } 2296 2297 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2298 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2299 } 2300 2301 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2302 unsigned &RegNum, unsigned &RegWidth, 2303 bool RestoreOnFailure) { 2304 Reg = AMDGPU::NoRegister; 2305 2306 SmallVector<AsmToken, 1> Tokens; 2307 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2308 if (RestoreOnFailure) { 2309 while (!Tokens.empty()) { 2310 getLexer().UnLex(Tokens.pop_back_val()); 2311 } 2312 } 2313 return true; 2314 } 2315 return false; 2316 } 2317 2318 Optional<StringRef> 2319 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2320 switch (RegKind) { 2321 case IS_VGPR: 2322 return StringRef(".amdgcn.next_free_vgpr"); 2323 case IS_SGPR: 2324 return StringRef(".amdgcn.next_free_sgpr"); 2325 default: 2326 return None; 2327 } 2328 } 2329 2330 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2331 auto SymbolName = getGprCountSymbolName(RegKind); 2332 assert(SymbolName && "initializing invalid register kind"); 2333 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2334 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2335 } 2336 2337 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2338 unsigned DwordRegIndex, 2339 unsigned RegWidth) { 2340 // Symbols are only defined for GCN targets 2341 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2342 return true; 2343 2344 auto SymbolName = getGprCountSymbolName(RegKind); 2345 if (!SymbolName) 2346 return true; 2347 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2348 2349 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2350 int64_t OldCount; 2351 2352 if (!Sym->isVariable()) 2353 return !Error(getParser().getTok().getLoc(), 2354 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2355 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2356 return !Error( 2357 getParser().getTok().getLoc(), 2358 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2359 2360 if (OldCount <= NewMax) 2361 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2362 2363 return true; 2364 } 2365 2366 std::unique_ptr<AMDGPUOperand> 2367 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2368 const auto &Tok = Parser.getTok(); 2369 SMLoc StartLoc = Tok.getLoc(); 2370 SMLoc EndLoc = Tok.getEndLoc(); 2371 RegisterKind RegKind; 2372 unsigned Reg, RegNum, RegWidth; 2373 2374 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2375 //FIXME: improve error messages (bug 41303). 2376 Error(StartLoc, "not a valid operand."); 2377 return nullptr; 2378 } 2379 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2380 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2381 return nullptr; 2382 } else 2383 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2384 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2385 } 2386 2387 OperandMatchResultTy 2388 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2389 // TODO: add syntactic sugar for 1/(2*PI) 2390 2391 assert(!isRegister()); 2392 assert(!isModifier()); 2393 2394 const auto& Tok = getToken(); 2395 const auto& NextTok = peekToken(); 2396 bool IsReal = Tok.is(AsmToken::Real); 2397 SMLoc S = getLoc(); 2398 bool Negate = false; 2399 2400 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2401 lex(); 2402 IsReal = true; 2403 Negate = true; 2404 } 2405 2406 if (IsReal) { 2407 // Floating-point expressions are not supported. 2408 // Can only allow floating-point literals with an 2409 // optional sign. 2410 2411 StringRef Num = getTokenStr(); 2412 lex(); 2413 2414 APFloat RealVal(APFloat::IEEEdouble()); 2415 auto roundMode = APFloat::rmNearestTiesToEven; 2416 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2417 return MatchOperand_ParseFail; 2418 } 2419 if (Negate) 2420 RealVal.changeSign(); 2421 2422 Operands.push_back( 2423 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2424 AMDGPUOperand::ImmTyNone, true)); 2425 2426 return MatchOperand_Success; 2427 2428 } else { 2429 int64_t IntVal; 2430 const MCExpr *Expr; 2431 SMLoc S = getLoc(); 2432 2433 if (HasSP3AbsModifier) { 2434 // This is a workaround for handling expressions 2435 // as arguments of SP3 'abs' modifier, for example: 2436 // |1.0| 2437 // |-1| 2438 // |1+x| 2439 // This syntax is not compatible with syntax of standard 2440 // MC expressions (due to the trailing '|'). 2441 SMLoc EndLoc; 2442 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2443 return MatchOperand_ParseFail; 2444 } else { 2445 if (Parser.parseExpression(Expr)) 2446 return MatchOperand_ParseFail; 2447 } 2448 2449 if (Expr->evaluateAsAbsolute(IntVal)) { 2450 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2451 } else { 2452 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2453 } 2454 2455 return MatchOperand_Success; 2456 } 2457 2458 return MatchOperand_NoMatch; 2459 } 2460 2461 OperandMatchResultTy 2462 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2463 if (!isRegister()) 2464 return MatchOperand_NoMatch; 2465 2466 if (auto R = parseRegister()) { 2467 assert(R->isReg()); 2468 Operands.push_back(std::move(R)); 2469 return MatchOperand_Success; 2470 } 2471 return MatchOperand_ParseFail; 2472 } 2473 2474 OperandMatchResultTy 2475 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2476 auto res = parseReg(Operands); 2477 if (res != MatchOperand_NoMatch) { 2478 return res; 2479 } else if (isModifier()) { 2480 return MatchOperand_NoMatch; 2481 } else { 2482 return parseImm(Operands, HasSP3AbsMod); 2483 } 2484 } 2485 2486 bool 2487 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2488 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2489 const auto &str = Token.getString(); 2490 return str == "abs" || str == "neg" || str == "sext"; 2491 } 2492 return false; 2493 } 2494 2495 bool 2496 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2497 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2498 } 2499 2500 bool 2501 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2502 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2503 } 2504 2505 bool 2506 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2507 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2508 } 2509 2510 // Check if this is an operand modifier or an opcode modifier 2511 // which may look like an expression but it is not. We should 2512 // avoid parsing these modifiers as expressions. Currently 2513 // recognized sequences are: 2514 // |...| 2515 // abs(...) 2516 // neg(...) 2517 // sext(...) 2518 // -reg 2519 // -|...| 2520 // -abs(...) 2521 // name:... 2522 // Note that simple opcode modifiers like 'gds' may be parsed as 2523 // expressions; this is a special case. See getExpressionAsToken. 2524 // 2525 bool 2526 AMDGPUAsmParser::isModifier() { 2527 2528 AsmToken Tok = getToken(); 2529 AsmToken NextToken[2]; 2530 peekTokens(NextToken); 2531 2532 return isOperandModifier(Tok, NextToken[0]) || 2533 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2534 isOpcodeModifierWithVal(Tok, NextToken[0]); 2535 } 2536 2537 // Check if the current token is an SP3 'neg' modifier. 2538 // Currently this modifier is allowed in the following context: 2539 // 2540 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2541 // 2. Before an 'abs' modifier: -abs(...) 2542 // 3. Before an SP3 'abs' modifier: -|...| 2543 // 2544 // In all other cases "-" is handled as a part 2545 // of an expression that follows the sign. 2546 // 2547 // Note: When "-" is followed by an integer literal, 2548 // this is interpreted as integer negation rather 2549 // than a floating-point NEG modifier applied to N. 2550 // Beside being contr-intuitive, such use of floating-point 2551 // NEG modifier would have resulted in different meaning 2552 // of integer literals used with VOP1/2/C and VOP3, 2553 // for example: 2554 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2555 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2556 // Negative fp literals with preceding "-" are 2557 // handled likewise for unifomtity 2558 // 2559 bool 2560 AMDGPUAsmParser::parseSP3NegModifier() { 2561 2562 AsmToken NextToken[2]; 2563 peekTokens(NextToken); 2564 2565 if (isToken(AsmToken::Minus) && 2566 (isRegister(NextToken[0], NextToken[1]) || 2567 NextToken[0].is(AsmToken::Pipe) || 2568 isId(NextToken[0], "abs"))) { 2569 lex(); 2570 return true; 2571 } 2572 2573 return false; 2574 } 2575 2576 OperandMatchResultTy 2577 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2578 bool AllowImm) { 2579 bool Neg, SP3Neg; 2580 bool Abs, SP3Abs; 2581 SMLoc Loc; 2582 2583 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2584 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2585 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2586 return MatchOperand_ParseFail; 2587 } 2588 2589 SP3Neg = parseSP3NegModifier(); 2590 2591 Loc = getLoc(); 2592 Neg = trySkipId("neg"); 2593 if (Neg && SP3Neg) { 2594 Error(Loc, "expected register or immediate"); 2595 return MatchOperand_ParseFail; 2596 } 2597 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2598 return MatchOperand_ParseFail; 2599 2600 Abs = trySkipId("abs"); 2601 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2602 return MatchOperand_ParseFail; 2603 2604 Loc = getLoc(); 2605 SP3Abs = trySkipToken(AsmToken::Pipe); 2606 if (Abs && SP3Abs) { 2607 Error(Loc, "expected register or immediate"); 2608 return MatchOperand_ParseFail; 2609 } 2610 2611 OperandMatchResultTy Res; 2612 if (AllowImm) { 2613 Res = parseRegOrImm(Operands, SP3Abs); 2614 } else { 2615 Res = parseReg(Operands); 2616 } 2617 if (Res != MatchOperand_Success) { 2618 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2619 } 2620 2621 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2622 return MatchOperand_ParseFail; 2623 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2624 return MatchOperand_ParseFail; 2625 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2626 return MatchOperand_ParseFail; 2627 2628 AMDGPUOperand::Modifiers Mods; 2629 Mods.Abs = Abs || SP3Abs; 2630 Mods.Neg = Neg || SP3Neg; 2631 2632 if (Mods.hasFPModifiers()) { 2633 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2634 if (Op.isExpr()) { 2635 Error(Op.getStartLoc(), "expected an absolute expression"); 2636 return MatchOperand_ParseFail; 2637 } 2638 Op.setModifiers(Mods); 2639 } 2640 return MatchOperand_Success; 2641 } 2642 2643 OperandMatchResultTy 2644 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2645 bool AllowImm) { 2646 bool Sext = trySkipId("sext"); 2647 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2648 return MatchOperand_ParseFail; 2649 2650 OperandMatchResultTy Res; 2651 if (AllowImm) { 2652 Res = parseRegOrImm(Operands); 2653 } else { 2654 Res = parseReg(Operands); 2655 } 2656 if (Res != MatchOperand_Success) { 2657 return Sext? MatchOperand_ParseFail : Res; 2658 } 2659 2660 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2661 return MatchOperand_ParseFail; 2662 2663 AMDGPUOperand::Modifiers Mods; 2664 Mods.Sext = Sext; 2665 2666 if (Mods.hasIntModifiers()) { 2667 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2668 if (Op.isExpr()) { 2669 Error(Op.getStartLoc(), "expected an absolute expression"); 2670 return MatchOperand_ParseFail; 2671 } 2672 Op.setModifiers(Mods); 2673 } 2674 2675 return MatchOperand_Success; 2676 } 2677 2678 OperandMatchResultTy 2679 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2680 return parseRegOrImmWithFPInputMods(Operands, false); 2681 } 2682 2683 OperandMatchResultTy 2684 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2685 return parseRegOrImmWithIntInputMods(Operands, false); 2686 } 2687 2688 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2689 auto Loc = getLoc(); 2690 if (trySkipId("off")) { 2691 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2692 AMDGPUOperand::ImmTyOff, false)); 2693 return MatchOperand_Success; 2694 } 2695 2696 if (!isRegister()) 2697 return MatchOperand_NoMatch; 2698 2699 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2700 if (Reg) { 2701 Operands.push_back(std::move(Reg)); 2702 return MatchOperand_Success; 2703 } 2704 2705 return MatchOperand_ParseFail; 2706 2707 } 2708 2709 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2710 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2711 2712 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2713 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2714 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2715 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2716 return Match_InvalidOperand; 2717 2718 if ((TSFlags & SIInstrFlags::VOP3) && 2719 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2720 getForcedEncodingSize() != 64) 2721 return Match_PreferE32; 2722 2723 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2724 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2725 // v_mac_f32/16 allow only dst_sel == DWORD; 2726 auto OpNum = 2727 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2728 const auto &Op = Inst.getOperand(OpNum); 2729 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2730 return Match_InvalidOperand; 2731 } 2732 } 2733 2734 return Match_Success; 2735 } 2736 2737 // What asm variants we should check 2738 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2739 if (getForcedEncodingSize() == 32) { 2740 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2741 return makeArrayRef(Variants); 2742 } 2743 2744 if (isForcedVOP3()) { 2745 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2746 return makeArrayRef(Variants); 2747 } 2748 2749 if (isForcedSDWA()) { 2750 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2751 AMDGPUAsmVariants::SDWA9}; 2752 return makeArrayRef(Variants); 2753 } 2754 2755 if (isForcedDPP()) { 2756 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2757 return makeArrayRef(Variants); 2758 } 2759 2760 static const unsigned Variants[] = { 2761 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2762 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2763 }; 2764 2765 return makeArrayRef(Variants); 2766 } 2767 2768 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2769 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2770 const unsigned Num = Desc.getNumImplicitUses(); 2771 for (unsigned i = 0; i < Num; ++i) { 2772 unsigned Reg = Desc.ImplicitUses[i]; 2773 switch (Reg) { 2774 case AMDGPU::FLAT_SCR: 2775 case AMDGPU::VCC: 2776 case AMDGPU::VCC_LO: 2777 case AMDGPU::VCC_HI: 2778 case AMDGPU::M0: 2779 return Reg; 2780 default: 2781 break; 2782 } 2783 } 2784 return AMDGPU::NoRegister; 2785 } 2786 2787 // NB: This code is correct only when used to check constant 2788 // bus limitations because GFX7 support no f16 inline constants. 2789 // Note that there are no cases when a GFX7 opcode violates 2790 // constant bus limitations due to the use of an f16 constant. 2791 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2792 unsigned OpIdx) const { 2793 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2794 2795 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2796 return false; 2797 } 2798 2799 const MCOperand &MO = Inst.getOperand(OpIdx); 2800 2801 int64_t Val = MO.getImm(); 2802 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2803 2804 switch (OpSize) { // expected operand size 2805 case 8: 2806 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2807 case 4: 2808 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2809 case 2: { 2810 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2811 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2812 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2813 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2814 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2815 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2816 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2817 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2818 } else { 2819 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2820 } 2821 } 2822 default: 2823 llvm_unreachable("invalid operand size"); 2824 } 2825 } 2826 2827 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2828 if (!isGFX10()) 2829 return 1; 2830 2831 switch (Opcode) { 2832 // 64-bit shift instructions can use only one scalar value input 2833 case AMDGPU::V_LSHLREV_B64: 2834 case AMDGPU::V_LSHLREV_B64_gfx10: 2835 case AMDGPU::V_LSHL_B64: 2836 case AMDGPU::V_LSHRREV_B64: 2837 case AMDGPU::V_LSHRREV_B64_gfx10: 2838 case AMDGPU::V_LSHR_B64: 2839 case AMDGPU::V_ASHRREV_I64: 2840 case AMDGPU::V_ASHRREV_I64_gfx10: 2841 case AMDGPU::V_ASHR_I64: 2842 return 1; 2843 default: 2844 return 2; 2845 } 2846 } 2847 2848 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2849 const MCOperand &MO = Inst.getOperand(OpIdx); 2850 if (MO.isImm()) { 2851 return !isInlineConstant(Inst, OpIdx); 2852 } else if (MO.isReg()) { 2853 auto Reg = MO.getReg(); 2854 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2855 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2856 } else { 2857 return true; 2858 } 2859 } 2860 2861 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2862 const unsigned Opcode = Inst.getOpcode(); 2863 const MCInstrDesc &Desc = MII.get(Opcode); 2864 unsigned ConstantBusUseCount = 0; 2865 unsigned NumLiterals = 0; 2866 unsigned LiteralSize; 2867 2868 if (Desc.TSFlags & 2869 (SIInstrFlags::VOPC | 2870 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2871 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2872 SIInstrFlags::SDWA)) { 2873 // Check special imm operands (used by madmk, etc) 2874 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2875 ++ConstantBusUseCount; 2876 } 2877 2878 SmallDenseSet<unsigned> SGPRsUsed; 2879 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2880 if (SGPRUsed != AMDGPU::NoRegister) { 2881 SGPRsUsed.insert(SGPRUsed); 2882 ++ConstantBusUseCount; 2883 } 2884 2885 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2886 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2887 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2888 2889 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2890 2891 for (int OpIdx : OpIndices) { 2892 if (OpIdx == -1) break; 2893 2894 const MCOperand &MO = Inst.getOperand(OpIdx); 2895 if (usesConstantBus(Inst, OpIdx)) { 2896 if (MO.isReg()) { 2897 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2898 // Pairs of registers with a partial intersections like these 2899 // s0, s[0:1] 2900 // flat_scratch_lo, flat_scratch 2901 // flat_scratch_lo, flat_scratch_hi 2902 // are theoretically valid but they are disabled anyway. 2903 // Note that this code mimics SIInstrInfo::verifyInstruction 2904 if (!SGPRsUsed.count(Reg)) { 2905 SGPRsUsed.insert(Reg); 2906 ++ConstantBusUseCount; 2907 } 2908 } else { // Expression or a literal 2909 2910 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2911 continue; // special operand like VINTERP attr_chan 2912 2913 // An instruction may use only one literal. 2914 // This has been validated on the previous step. 2915 // See validateVOP3Literal. 2916 // This literal may be used as more than one operand. 2917 // If all these operands are of the same size, 2918 // this literal counts as one scalar value. 2919 // Otherwise it counts as 2 scalar values. 2920 // See "GFX10 Shader Programming", section 3.6.2.3. 2921 2922 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2923 if (Size < 4) Size = 4; 2924 2925 if (NumLiterals == 0) { 2926 NumLiterals = 1; 2927 LiteralSize = Size; 2928 } else if (LiteralSize != Size) { 2929 NumLiterals = 2; 2930 } 2931 } 2932 } 2933 } 2934 } 2935 ConstantBusUseCount += NumLiterals; 2936 2937 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2938 } 2939 2940 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2941 const unsigned Opcode = Inst.getOpcode(); 2942 const MCInstrDesc &Desc = MII.get(Opcode); 2943 2944 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2945 if (DstIdx == -1 || 2946 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2947 return true; 2948 } 2949 2950 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2951 2952 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2953 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2954 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2955 2956 assert(DstIdx != -1); 2957 const MCOperand &Dst = Inst.getOperand(DstIdx); 2958 assert(Dst.isReg()); 2959 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2960 2961 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2962 2963 for (int SrcIdx : SrcIndices) { 2964 if (SrcIdx == -1) break; 2965 const MCOperand &Src = Inst.getOperand(SrcIdx); 2966 if (Src.isReg()) { 2967 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2968 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2969 return false; 2970 } 2971 } 2972 } 2973 2974 return true; 2975 } 2976 2977 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2978 2979 const unsigned Opc = Inst.getOpcode(); 2980 const MCInstrDesc &Desc = MII.get(Opc); 2981 2982 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2983 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2984 assert(ClampIdx != -1); 2985 return Inst.getOperand(ClampIdx).getImm() == 0; 2986 } 2987 2988 return true; 2989 } 2990 2991 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2992 2993 const unsigned Opc = Inst.getOpcode(); 2994 const MCInstrDesc &Desc = MII.get(Opc); 2995 2996 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2997 return true; 2998 2999 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3000 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3001 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3002 3003 assert(VDataIdx != -1); 3004 assert(DMaskIdx != -1); 3005 assert(TFEIdx != -1); 3006 3007 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3008 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3009 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3010 if (DMask == 0) 3011 DMask = 1; 3012 3013 unsigned DataSize = 3014 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3015 if (hasPackedD16()) { 3016 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3017 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3018 DataSize = (DataSize + 1) / 2; 3019 } 3020 3021 return (VDataSize / 4) == DataSize + TFESize; 3022 } 3023 3024 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3025 const unsigned Opc = Inst.getOpcode(); 3026 const MCInstrDesc &Desc = MII.get(Opc); 3027 3028 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3029 return true; 3030 3031 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3032 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3033 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3034 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3035 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3036 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3037 3038 assert(VAddr0Idx != -1); 3039 assert(SrsrcIdx != -1); 3040 assert(DimIdx != -1); 3041 assert(SrsrcIdx > VAddr0Idx); 3042 3043 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3044 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3045 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3046 unsigned VAddrSize = 3047 IsNSA ? SrsrcIdx - VAddr0Idx 3048 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3049 3050 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3051 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3052 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3053 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3054 if (!IsNSA) { 3055 if (AddrSize > 8) 3056 AddrSize = 16; 3057 else if (AddrSize > 4) 3058 AddrSize = 8; 3059 } 3060 3061 return VAddrSize == AddrSize; 3062 } 3063 3064 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3065 3066 const unsigned Opc = Inst.getOpcode(); 3067 const MCInstrDesc &Desc = MII.get(Opc); 3068 3069 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3070 return true; 3071 if (!Desc.mayLoad() || !Desc.mayStore()) 3072 return true; // Not atomic 3073 3074 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3075 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3076 3077 // This is an incomplete check because image_atomic_cmpswap 3078 // may only use 0x3 and 0xf while other atomic operations 3079 // may use 0x1 and 0x3. However these limitations are 3080 // verified when we check that dmask matches dst size. 3081 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3082 } 3083 3084 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3085 3086 const unsigned Opc = Inst.getOpcode(); 3087 const MCInstrDesc &Desc = MII.get(Opc); 3088 3089 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3090 return true; 3091 3092 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3093 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3094 3095 // GATHER4 instructions use dmask in a different fashion compared to 3096 // other MIMG instructions. The only useful DMASK values are 3097 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3098 // (red,red,red,red) etc.) The ISA document doesn't mention 3099 // this. 3100 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3101 } 3102 3103 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3104 { 3105 switch (Opcode) { 3106 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3107 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3108 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3109 return true; 3110 default: 3111 return false; 3112 } 3113 } 3114 3115 // movrels* opcodes should only allow VGPRS as src0. 3116 // This is specified in .td description for vop1/vop3, 3117 // but sdwa is handled differently. See isSDWAOperand. 3118 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3119 3120 const unsigned Opc = Inst.getOpcode(); 3121 const MCInstrDesc &Desc = MII.get(Opc); 3122 3123 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3124 return true; 3125 3126 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3127 assert(Src0Idx != -1); 3128 3129 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3130 if (!Src0.isReg()) 3131 return false; 3132 3133 auto Reg = Src0.getReg(); 3134 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3135 return !isSGPR(mc2PseudoReg(Reg), TRI); 3136 } 3137 3138 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3139 3140 const unsigned Opc = Inst.getOpcode(); 3141 const MCInstrDesc &Desc = MII.get(Opc); 3142 3143 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3144 return true; 3145 3146 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3147 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3148 if (isCI() || isSI()) 3149 return false; 3150 } 3151 3152 return true; 3153 } 3154 3155 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3156 const unsigned Opc = Inst.getOpcode(); 3157 const MCInstrDesc &Desc = MII.get(Opc); 3158 3159 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3160 return true; 3161 3162 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3163 if (DimIdx < 0) 3164 return true; 3165 3166 long Imm = Inst.getOperand(DimIdx).getImm(); 3167 if (Imm < 0 || Imm >= 8) 3168 return false; 3169 3170 return true; 3171 } 3172 3173 static bool IsRevOpcode(const unsigned Opcode) 3174 { 3175 switch (Opcode) { 3176 case AMDGPU::V_SUBREV_F32_e32: 3177 case AMDGPU::V_SUBREV_F32_e64: 3178 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3179 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3180 case AMDGPU::V_SUBREV_F32_e32_vi: 3181 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3182 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3183 case AMDGPU::V_SUBREV_F32_e64_vi: 3184 3185 case AMDGPU::V_SUBREV_I32_e32: 3186 case AMDGPU::V_SUBREV_I32_e64: 3187 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3188 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3189 3190 case AMDGPU::V_SUBBREV_U32_e32: 3191 case AMDGPU::V_SUBBREV_U32_e64: 3192 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3193 case AMDGPU::V_SUBBREV_U32_e32_vi: 3194 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3195 case AMDGPU::V_SUBBREV_U32_e64_vi: 3196 3197 case AMDGPU::V_SUBREV_U32_e32: 3198 case AMDGPU::V_SUBREV_U32_e64: 3199 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3200 case AMDGPU::V_SUBREV_U32_e32_vi: 3201 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3202 case AMDGPU::V_SUBREV_U32_e64_vi: 3203 3204 case AMDGPU::V_SUBREV_F16_e32: 3205 case AMDGPU::V_SUBREV_F16_e64: 3206 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3207 case AMDGPU::V_SUBREV_F16_e32_vi: 3208 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3209 case AMDGPU::V_SUBREV_F16_e64_vi: 3210 3211 case AMDGPU::V_SUBREV_U16_e32: 3212 case AMDGPU::V_SUBREV_U16_e64: 3213 case AMDGPU::V_SUBREV_U16_e32_vi: 3214 case AMDGPU::V_SUBREV_U16_e64_vi: 3215 3216 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3217 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3218 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3219 3220 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3221 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3222 3223 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3224 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3225 3226 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3227 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3228 3229 case AMDGPU::V_LSHRREV_B32_e32: 3230 case AMDGPU::V_LSHRREV_B32_e64: 3231 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3232 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3233 case AMDGPU::V_LSHRREV_B32_e32_vi: 3234 case AMDGPU::V_LSHRREV_B32_e64_vi: 3235 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3236 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3237 3238 case AMDGPU::V_ASHRREV_I32_e32: 3239 case AMDGPU::V_ASHRREV_I32_e64: 3240 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3241 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3242 case AMDGPU::V_ASHRREV_I32_e32_vi: 3243 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3244 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3245 case AMDGPU::V_ASHRREV_I32_e64_vi: 3246 3247 case AMDGPU::V_LSHLREV_B32_e32: 3248 case AMDGPU::V_LSHLREV_B32_e64: 3249 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3250 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3251 case AMDGPU::V_LSHLREV_B32_e32_vi: 3252 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3253 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3254 case AMDGPU::V_LSHLREV_B32_e64_vi: 3255 3256 case AMDGPU::V_LSHLREV_B16_e32: 3257 case AMDGPU::V_LSHLREV_B16_e64: 3258 case AMDGPU::V_LSHLREV_B16_e32_vi: 3259 case AMDGPU::V_LSHLREV_B16_e64_vi: 3260 case AMDGPU::V_LSHLREV_B16_gfx10: 3261 3262 case AMDGPU::V_LSHRREV_B16_e32: 3263 case AMDGPU::V_LSHRREV_B16_e64: 3264 case AMDGPU::V_LSHRREV_B16_e32_vi: 3265 case AMDGPU::V_LSHRREV_B16_e64_vi: 3266 case AMDGPU::V_LSHRREV_B16_gfx10: 3267 3268 case AMDGPU::V_ASHRREV_I16_e32: 3269 case AMDGPU::V_ASHRREV_I16_e64: 3270 case AMDGPU::V_ASHRREV_I16_e32_vi: 3271 case AMDGPU::V_ASHRREV_I16_e64_vi: 3272 case AMDGPU::V_ASHRREV_I16_gfx10: 3273 3274 case AMDGPU::V_LSHLREV_B64: 3275 case AMDGPU::V_LSHLREV_B64_gfx10: 3276 case AMDGPU::V_LSHLREV_B64_vi: 3277 3278 case AMDGPU::V_LSHRREV_B64: 3279 case AMDGPU::V_LSHRREV_B64_gfx10: 3280 case AMDGPU::V_LSHRREV_B64_vi: 3281 3282 case AMDGPU::V_ASHRREV_I64: 3283 case AMDGPU::V_ASHRREV_I64_gfx10: 3284 case AMDGPU::V_ASHRREV_I64_vi: 3285 3286 case AMDGPU::V_PK_LSHLREV_B16: 3287 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3288 case AMDGPU::V_PK_LSHLREV_B16_vi: 3289 3290 case AMDGPU::V_PK_LSHRREV_B16: 3291 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3292 case AMDGPU::V_PK_LSHRREV_B16_vi: 3293 case AMDGPU::V_PK_ASHRREV_I16: 3294 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3295 case AMDGPU::V_PK_ASHRREV_I16_vi: 3296 return true; 3297 default: 3298 return false; 3299 } 3300 } 3301 3302 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3303 3304 using namespace SIInstrFlags; 3305 const unsigned Opcode = Inst.getOpcode(); 3306 const MCInstrDesc &Desc = MII.get(Opcode); 3307 3308 // lds_direct register is defined so that it can be used 3309 // with 9-bit operands only. Ignore encodings which do not accept these. 3310 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3311 return true; 3312 3313 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3314 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3315 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3316 3317 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3318 3319 // lds_direct cannot be specified as either src1 or src2. 3320 for (int SrcIdx : SrcIndices) { 3321 if (SrcIdx == -1) break; 3322 const MCOperand &Src = Inst.getOperand(SrcIdx); 3323 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3324 return false; 3325 } 3326 } 3327 3328 if (Src0Idx == -1) 3329 return true; 3330 3331 const MCOperand &Src = Inst.getOperand(Src0Idx); 3332 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3333 return true; 3334 3335 // lds_direct is specified as src0. Check additional limitations. 3336 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3337 } 3338 3339 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3340 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3341 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3342 if (Op.isFlatOffset()) 3343 return Op.getStartLoc(); 3344 } 3345 return getLoc(); 3346 } 3347 3348 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3349 const OperandVector &Operands) { 3350 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3351 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3352 return true; 3353 3354 auto Opcode = Inst.getOpcode(); 3355 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3356 assert(OpNum != -1); 3357 3358 const auto &Op = Inst.getOperand(OpNum); 3359 if (!hasFlatOffsets() && Op.getImm() != 0) { 3360 Error(getFlatOffsetLoc(Operands), 3361 "flat offset modifier is not supported on this GPU"); 3362 return false; 3363 } 3364 3365 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3366 // For FLAT segment the offset must be positive; 3367 // MSB is ignored and forced to zero. 3368 unsigned OffsetSize = isGFX9() ? 13 : 12; 3369 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3370 if (!isIntN(OffsetSize, Op.getImm())) { 3371 Error(getFlatOffsetLoc(Operands), 3372 isGFX9() ? "expected a 13-bit signed offset" : 3373 "expected a 12-bit signed offset"); 3374 return false; 3375 } 3376 } else { 3377 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3378 Error(getFlatOffsetLoc(Operands), 3379 isGFX9() ? "expected a 12-bit unsigned offset" : 3380 "expected an 11-bit unsigned offset"); 3381 return false; 3382 } 3383 } 3384 3385 return true; 3386 } 3387 3388 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3389 unsigned Opcode = Inst.getOpcode(); 3390 const MCInstrDesc &Desc = MII.get(Opcode); 3391 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3392 return true; 3393 3394 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3395 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3396 3397 const int OpIndices[] = { Src0Idx, Src1Idx }; 3398 3399 unsigned NumExprs = 0; 3400 unsigned NumLiterals = 0; 3401 uint32_t LiteralValue; 3402 3403 for (int OpIdx : OpIndices) { 3404 if (OpIdx == -1) break; 3405 3406 const MCOperand &MO = Inst.getOperand(OpIdx); 3407 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3408 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3409 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3410 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3411 if (NumLiterals == 0 || LiteralValue != Value) { 3412 LiteralValue = Value; 3413 ++NumLiterals; 3414 } 3415 } else if (MO.isExpr()) { 3416 ++NumExprs; 3417 } 3418 } 3419 } 3420 3421 return NumLiterals + NumExprs <= 1; 3422 } 3423 3424 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3425 const unsigned Opc = Inst.getOpcode(); 3426 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3427 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3428 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3429 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3430 3431 if (OpSel & ~3) 3432 return false; 3433 } 3434 return true; 3435 } 3436 3437 // Check if VCC register matches wavefront size 3438 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3439 auto FB = getFeatureBits(); 3440 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3441 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3442 } 3443 3444 // VOP3 literal is only allowed in GFX10+ and only one can be used 3445 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3446 unsigned Opcode = Inst.getOpcode(); 3447 const MCInstrDesc &Desc = MII.get(Opcode); 3448 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3449 return true; 3450 3451 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3452 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3453 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3454 3455 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3456 3457 unsigned NumExprs = 0; 3458 unsigned NumLiterals = 0; 3459 uint32_t LiteralValue; 3460 3461 for (int OpIdx : OpIndices) { 3462 if (OpIdx == -1) break; 3463 3464 const MCOperand &MO = Inst.getOperand(OpIdx); 3465 if (!MO.isImm() && !MO.isExpr()) 3466 continue; 3467 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3468 continue; 3469 3470 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3471 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3472 return false; 3473 3474 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3475 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3476 if (NumLiterals == 0 || LiteralValue != Value) { 3477 LiteralValue = Value; 3478 ++NumLiterals; 3479 } 3480 } else if (MO.isExpr()) { 3481 ++NumExprs; 3482 } 3483 } 3484 NumLiterals += NumExprs; 3485 3486 return !NumLiterals || 3487 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3488 } 3489 3490 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3491 const SMLoc &IDLoc, 3492 const OperandVector &Operands) { 3493 if (!validateLdsDirect(Inst)) { 3494 Error(IDLoc, 3495 "invalid use of lds_direct"); 3496 return false; 3497 } 3498 if (!validateSOPLiteral(Inst)) { 3499 Error(IDLoc, 3500 "only one literal operand is allowed"); 3501 return false; 3502 } 3503 if (!validateVOP3Literal(Inst)) { 3504 Error(IDLoc, 3505 "invalid literal operand"); 3506 return false; 3507 } 3508 if (!validateConstantBusLimitations(Inst)) { 3509 Error(IDLoc, 3510 "invalid operand (violates constant bus restrictions)"); 3511 return false; 3512 } 3513 if (!validateEarlyClobberLimitations(Inst)) { 3514 Error(IDLoc, 3515 "destination must be different than all sources"); 3516 return false; 3517 } 3518 if (!validateIntClampSupported(Inst)) { 3519 Error(IDLoc, 3520 "integer clamping is not supported on this GPU"); 3521 return false; 3522 } 3523 if (!validateOpSel(Inst)) { 3524 Error(IDLoc, 3525 "invalid op_sel operand"); 3526 return false; 3527 } 3528 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3529 if (!validateMIMGD16(Inst)) { 3530 Error(IDLoc, 3531 "d16 modifier is not supported on this GPU"); 3532 return false; 3533 } 3534 if (!validateMIMGDim(Inst)) { 3535 Error(IDLoc, "dim modifier is required on this GPU"); 3536 return false; 3537 } 3538 if (!validateMIMGDataSize(Inst)) { 3539 Error(IDLoc, 3540 "image data size does not match dmask and tfe"); 3541 return false; 3542 } 3543 if (!validateMIMGAddrSize(Inst)) { 3544 Error(IDLoc, 3545 "image address size does not match dim and a16"); 3546 return false; 3547 } 3548 if (!validateMIMGAtomicDMask(Inst)) { 3549 Error(IDLoc, 3550 "invalid atomic image dmask"); 3551 return false; 3552 } 3553 if (!validateMIMGGatherDMask(Inst)) { 3554 Error(IDLoc, 3555 "invalid image_gather dmask: only one bit must be set"); 3556 return false; 3557 } 3558 if (!validateMovrels(Inst)) { 3559 Error(IDLoc, "source operand must be a VGPR"); 3560 return false; 3561 } 3562 if (!validateFlatOffset(Inst, Operands)) { 3563 return false; 3564 } 3565 3566 return true; 3567 } 3568 3569 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3570 const FeatureBitset &FBS, 3571 unsigned VariantID = 0); 3572 3573 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3574 OperandVector &Operands, 3575 MCStreamer &Out, 3576 uint64_t &ErrorInfo, 3577 bool MatchingInlineAsm) { 3578 MCInst Inst; 3579 unsigned Result = Match_Success; 3580 for (auto Variant : getMatchedVariants()) { 3581 uint64_t EI; 3582 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3583 Variant); 3584 // We order match statuses from least to most specific. We use most specific 3585 // status as resulting 3586 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3587 if ((R == Match_Success) || 3588 (R == Match_PreferE32) || 3589 (R == Match_MissingFeature && Result != Match_PreferE32) || 3590 (R == Match_InvalidOperand && Result != Match_MissingFeature 3591 && Result != Match_PreferE32) || 3592 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3593 && Result != Match_MissingFeature 3594 && Result != Match_PreferE32)) { 3595 Result = R; 3596 ErrorInfo = EI; 3597 } 3598 if (R == Match_Success) 3599 break; 3600 } 3601 3602 switch (Result) { 3603 default: break; 3604 case Match_Success: 3605 if (!validateInstruction(Inst, IDLoc, Operands)) { 3606 return true; 3607 } 3608 Inst.setLoc(IDLoc); 3609 Out.emitInstruction(Inst, getSTI()); 3610 return false; 3611 3612 case Match_MissingFeature: 3613 return Error(IDLoc, "instruction not supported on this GPU"); 3614 3615 case Match_MnemonicFail: { 3616 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3617 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3618 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3619 return Error(IDLoc, "invalid instruction" + Suggestion, 3620 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3621 } 3622 3623 case Match_InvalidOperand: { 3624 SMLoc ErrorLoc = IDLoc; 3625 if (ErrorInfo != ~0ULL) { 3626 if (ErrorInfo >= Operands.size()) { 3627 return Error(IDLoc, "too few operands for instruction"); 3628 } 3629 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3630 if (ErrorLoc == SMLoc()) 3631 ErrorLoc = IDLoc; 3632 } 3633 return Error(ErrorLoc, "invalid operand for instruction"); 3634 } 3635 3636 case Match_PreferE32: 3637 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3638 "should be encoded as e32"); 3639 } 3640 llvm_unreachable("Implement any new match types added!"); 3641 } 3642 3643 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3644 int64_t Tmp = -1; 3645 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3646 return true; 3647 } 3648 if (getParser().parseAbsoluteExpression(Tmp)) { 3649 return true; 3650 } 3651 Ret = static_cast<uint32_t>(Tmp); 3652 return false; 3653 } 3654 3655 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3656 uint32_t &Minor) { 3657 if (ParseAsAbsoluteExpression(Major)) 3658 return TokError("invalid major version"); 3659 3660 if (getLexer().isNot(AsmToken::Comma)) 3661 return TokError("minor version number required, comma expected"); 3662 Lex(); 3663 3664 if (ParseAsAbsoluteExpression(Minor)) 3665 return TokError("invalid minor version"); 3666 3667 return false; 3668 } 3669 3670 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3671 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3672 return TokError("directive only supported for amdgcn architecture"); 3673 3674 std::string Target; 3675 3676 SMLoc TargetStart = getTok().getLoc(); 3677 if (getParser().parseEscapedString(Target)) 3678 return true; 3679 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3680 3681 std::string ExpectedTarget; 3682 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3683 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3684 3685 if (Target != ExpectedTargetOS.str()) 3686 return getParser().Error(TargetRange.Start, "target must match options", 3687 TargetRange); 3688 3689 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3690 return false; 3691 } 3692 3693 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3694 return getParser().Error(Range.Start, "value out of range", Range); 3695 } 3696 3697 bool AMDGPUAsmParser::calculateGPRBlocks( 3698 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3699 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3700 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3701 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3702 // TODO(scott.linder): These calculations are duplicated from 3703 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3704 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3705 3706 unsigned NumVGPRs = NextFreeVGPR; 3707 unsigned NumSGPRs = NextFreeSGPR; 3708 3709 if (Version.Major >= 10) 3710 NumSGPRs = 0; 3711 else { 3712 unsigned MaxAddressableNumSGPRs = 3713 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3714 3715 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3716 NumSGPRs > MaxAddressableNumSGPRs) 3717 return OutOfRangeError(SGPRRange); 3718 3719 NumSGPRs += 3720 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3721 3722 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3723 NumSGPRs > MaxAddressableNumSGPRs) 3724 return OutOfRangeError(SGPRRange); 3725 3726 if (Features.test(FeatureSGPRInitBug)) 3727 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3728 } 3729 3730 VGPRBlocks = 3731 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3732 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3733 3734 return false; 3735 } 3736 3737 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3738 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3739 return TokError("directive only supported for amdgcn architecture"); 3740 3741 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3742 return TokError("directive only supported for amdhsa OS"); 3743 3744 StringRef KernelName; 3745 if (getParser().parseIdentifier(KernelName)) 3746 return true; 3747 3748 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3749 3750 StringSet<> Seen; 3751 3752 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3753 3754 SMRange VGPRRange; 3755 uint64_t NextFreeVGPR = 0; 3756 SMRange SGPRRange; 3757 uint64_t NextFreeSGPR = 0; 3758 unsigned UserSGPRCount = 0; 3759 bool ReserveVCC = true; 3760 bool ReserveFlatScr = true; 3761 bool ReserveXNACK = hasXNACK(); 3762 Optional<bool> EnableWavefrontSize32; 3763 3764 while (true) { 3765 while (getLexer().is(AsmToken::EndOfStatement)) 3766 Lex(); 3767 3768 if (getLexer().isNot(AsmToken::Identifier)) 3769 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3770 3771 StringRef ID = getTok().getIdentifier(); 3772 SMRange IDRange = getTok().getLocRange(); 3773 Lex(); 3774 3775 if (ID == ".end_amdhsa_kernel") 3776 break; 3777 3778 if (Seen.find(ID) != Seen.end()) 3779 return TokError(".amdhsa_ directives cannot be repeated"); 3780 Seen.insert(ID); 3781 3782 SMLoc ValStart = getTok().getLoc(); 3783 int64_t IVal; 3784 if (getParser().parseAbsoluteExpression(IVal)) 3785 return true; 3786 SMLoc ValEnd = getTok().getLoc(); 3787 SMRange ValRange = SMRange(ValStart, ValEnd); 3788 3789 if (IVal < 0) 3790 return OutOfRangeError(ValRange); 3791 3792 uint64_t Val = IVal; 3793 3794 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3795 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3796 return OutOfRangeError(RANGE); \ 3797 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3798 3799 if (ID == ".amdhsa_group_segment_fixed_size") { 3800 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3801 return OutOfRangeError(ValRange); 3802 KD.group_segment_fixed_size = Val; 3803 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3804 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3805 return OutOfRangeError(ValRange); 3806 KD.private_segment_fixed_size = Val; 3807 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3808 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3809 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3810 Val, ValRange); 3811 if (Val) 3812 UserSGPRCount += 4; 3813 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3814 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3815 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3816 ValRange); 3817 if (Val) 3818 UserSGPRCount += 2; 3819 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3820 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3821 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3822 ValRange); 3823 if (Val) 3824 UserSGPRCount += 2; 3825 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3826 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3827 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3828 Val, ValRange); 3829 if (Val) 3830 UserSGPRCount += 2; 3831 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3832 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3833 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3834 ValRange); 3835 if (Val) 3836 UserSGPRCount += 2; 3837 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3838 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3839 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3840 ValRange); 3841 if (Val) 3842 UserSGPRCount += 2; 3843 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3844 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3845 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3846 Val, ValRange); 3847 if (Val) 3848 UserSGPRCount += 1; 3849 } else if (ID == ".amdhsa_wavefront_size32") { 3850 if (IVersion.Major < 10) 3851 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3852 IDRange); 3853 EnableWavefrontSize32 = Val; 3854 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3855 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3856 Val, ValRange); 3857 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3858 PARSE_BITS_ENTRY( 3859 KD.compute_pgm_rsrc2, 3860 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3861 ValRange); 3862 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3864 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3865 ValRange); 3866 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3868 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3869 ValRange); 3870 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3872 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3873 ValRange); 3874 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3875 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3876 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3877 ValRange); 3878 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3880 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3881 ValRange); 3882 } else if (ID == ".amdhsa_next_free_vgpr") { 3883 VGPRRange = ValRange; 3884 NextFreeVGPR = Val; 3885 } else if (ID == ".amdhsa_next_free_sgpr") { 3886 SGPRRange = ValRange; 3887 NextFreeSGPR = Val; 3888 } else if (ID == ".amdhsa_reserve_vcc") { 3889 if (!isUInt<1>(Val)) 3890 return OutOfRangeError(ValRange); 3891 ReserveVCC = Val; 3892 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3893 if (IVersion.Major < 7) 3894 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3895 IDRange); 3896 if (!isUInt<1>(Val)) 3897 return OutOfRangeError(ValRange); 3898 ReserveFlatScr = Val; 3899 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3900 if (IVersion.Major < 8) 3901 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3902 IDRange); 3903 if (!isUInt<1>(Val)) 3904 return OutOfRangeError(ValRange); 3905 ReserveXNACK = Val; 3906 } else if (ID == ".amdhsa_float_round_mode_32") { 3907 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3908 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3909 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3910 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3911 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3912 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3913 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3914 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3915 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3916 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3917 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3918 ValRange); 3919 } else if (ID == ".amdhsa_dx10_clamp") { 3920 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3921 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3922 } else if (ID == ".amdhsa_ieee_mode") { 3923 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3924 Val, ValRange); 3925 } else if (ID == ".amdhsa_fp16_overflow") { 3926 if (IVersion.Major < 9) 3927 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3928 IDRange); 3929 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3930 ValRange); 3931 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3932 if (IVersion.Major < 10) 3933 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3934 IDRange); 3935 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3936 ValRange); 3937 } else if (ID == ".amdhsa_memory_ordered") { 3938 if (IVersion.Major < 10) 3939 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3940 IDRange); 3941 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3942 ValRange); 3943 } else if (ID == ".amdhsa_forward_progress") { 3944 if (IVersion.Major < 10) 3945 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3946 IDRange); 3947 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3948 ValRange); 3949 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3950 PARSE_BITS_ENTRY( 3951 KD.compute_pgm_rsrc2, 3952 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3953 ValRange); 3954 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3955 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3956 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3957 Val, ValRange); 3958 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3959 PARSE_BITS_ENTRY( 3960 KD.compute_pgm_rsrc2, 3961 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3962 ValRange); 3963 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3964 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3965 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3966 Val, ValRange); 3967 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3968 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3969 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3970 Val, ValRange); 3971 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3972 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3973 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3974 Val, ValRange); 3975 } else if (ID == ".amdhsa_exception_int_div_zero") { 3976 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3977 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3978 Val, ValRange); 3979 } else { 3980 return getParser().Error(IDRange.Start, 3981 "unknown .amdhsa_kernel directive", IDRange); 3982 } 3983 3984 #undef PARSE_BITS_ENTRY 3985 } 3986 3987 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3988 return TokError(".amdhsa_next_free_vgpr directive is required"); 3989 3990 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3991 return TokError(".amdhsa_next_free_sgpr directive is required"); 3992 3993 unsigned VGPRBlocks; 3994 unsigned SGPRBlocks; 3995 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3996 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3997 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3998 SGPRBlocks)) 3999 return true; 4000 4001 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4002 VGPRBlocks)) 4003 return OutOfRangeError(VGPRRange); 4004 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4005 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4006 4007 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4008 SGPRBlocks)) 4009 return OutOfRangeError(SGPRRange); 4010 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4011 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4012 SGPRBlocks); 4013 4014 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4015 return TokError("too many user SGPRs enabled"); 4016 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4017 UserSGPRCount); 4018 4019 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4020 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4021 ReserveFlatScr, ReserveXNACK); 4022 return false; 4023 } 4024 4025 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4026 uint32_t Major; 4027 uint32_t Minor; 4028 4029 if (ParseDirectiveMajorMinor(Major, Minor)) 4030 return true; 4031 4032 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4033 return false; 4034 } 4035 4036 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4037 uint32_t Major; 4038 uint32_t Minor; 4039 uint32_t Stepping; 4040 StringRef VendorName; 4041 StringRef ArchName; 4042 4043 // If this directive has no arguments, then use the ISA version for the 4044 // targeted GPU. 4045 if (getLexer().is(AsmToken::EndOfStatement)) { 4046 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4047 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4048 ISA.Stepping, 4049 "AMD", "AMDGPU"); 4050 return false; 4051 } 4052 4053 if (ParseDirectiveMajorMinor(Major, Minor)) 4054 return true; 4055 4056 if (getLexer().isNot(AsmToken::Comma)) 4057 return TokError("stepping version number required, comma expected"); 4058 Lex(); 4059 4060 if (ParseAsAbsoluteExpression(Stepping)) 4061 return TokError("invalid stepping version"); 4062 4063 if (getLexer().isNot(AsmToken::Comma)) 4064 return TokError("vendor name required, comma expected"); 4065 Lex(); 4066 4067 if (getLexer().isNot(AsmToken::String)) 4068 return TokError("invalid vendor name"); 4069 4070 VendorName = getLexer().getTok().getStringContents(); 4071 Lex(); 4072 4073 if (getLexer().isNot(AsmToken::Comma)) 4074 return TokError("arch name required, comma expected"); 4075 Lex(); 4076 4077 if (getLexer().isNot(AsmToken::String)) 4078 return TokError("invalid arch name"); 4079 4080 ArchName = getLexer().getTok().getStringContents(); 4081 Lex(); 4082 4083 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4084 VendorName, ArchName); 4085 return false; 4086 } 4087 4088 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4089 amd_kernel_code_t &Header) { 4090 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4091 // assembly for backwards compatibility. 4092 if (ID == "max_scratch_backing_memory_byte_size") { 4093 Parser.eatToEndOfStatement(); 4094 return false; 4095 } 4096 4097 SmallString<40> ErrStr; 4098 raw_svector_ostream Err(ErrStr); 4099 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4100 return TokError(Err.str()); 4101 } 4102 Lex(); 4103 4104 if (ID == "enable_wavefront_size32") { 4105 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4106 if (!isGFX10()) 4107 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4108 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4109 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4110 } else { 4111 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4112 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4113 } 4114 } 4115 4116 if (ID == "wavefront_size") { 4117 if (Header.wavefront_size == 5) { 4118 if (!isGFX10()) 4119 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4120 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4121 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4122 } else if (Header.wavefront_size == 6) { 4123 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4124 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4125 } 4126 } 4127 4128 if (ID == "enable_wgp_mode") { 4129 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4130 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4131 } 4132 4133 if (ID == "enable_mem_ordered") { 4134 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4135 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4136 } 4137 4138 if (ID == "enable_fwd_progress") { 4139 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4140 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4141 } 4142 4143 return false; 4144 } 4145 4146 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4147 amd_kernel_code_t Header; 4148 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4149 4150 while (true) { 4151 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4152 // will set the current token to EndOfStatement. 4153 while(getLexer().is(AsmToken::EndOfStatement)) 4154 Lex(); 4155 4156 if (getLexer().isNot(AsmToken::Identifier)) 4157 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4158 4159 StringRef ID = getLexer().getTok().getIdentifier(); 4160 Lex(); 4161 4162 if (ID == ".end_amd_kernel_code_t") 4163 break; 4164 4165 if (ParseAMDKernelCodeTValue(ID, Header)) 4166 return true; 4167 } 4168 4169 getTargetStreamer().EmitAMDKernelCodeT(Header); 4170 4171 return false; 4172 } 4173 4174 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4175 if (getLexer().isNot(AsmToken::Identifier)) 4176 return TokError("expected symbol name"); 4177 4178 StringRef KernelName = Parser.getTok().getString(); 4179 4180 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4181 ELF::STT_AMDGPU_HSA_KERNEL); 4182 Lex(); 4183 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4184 KernelScope.initialize(getContext()); 4185 return false; 4186 } 4187 4188 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4189 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4190 return Error(getParser().getTok().getLoc(), 4191 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4192 "architectures"); 4193 } 4194 4195 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4196 4197 std::string ISAVersionStringFromSTI; 4198 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4199 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4200 4201 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4202 return Error(getParser().getTok().getLoc(), 4203 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4204 "arguments specified through the command line"); 4205 } 4206 4207 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4208 Lex(); 4209 4210 return false; 4211 } 4212 4213 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4214 const char *AssemblerDirectiveBegin; 4215 const char *AssemblerDirectiveEnd; 4216 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4217 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4218 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4219 HSAMD::V3::AssemblerDirectiveEnd) 4220 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4221 HSAMD::AssemblerDirectiveEnd); 4222 4223 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4224 return Error(getParser().getTok().getLoc(), 4225 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4226 "not available on non-amdhsa OSes")).str()); 4227 } 4228 4229 std::string HSAMetadataString; 4230 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4231 HSAMetadataString)) 4232 return true; 4233 4234 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4235 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4236 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4237 } else { 4238 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4239 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4240 } 4241 4242 return false; 4243 } 4244 4245 /// Common code to parse out a block of text (typically YAML) between start and 4246 /// end directives. 4247 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4248 const char *AssemblerDirectiveEnd, 4249 std::string &CollectString) { 4250 4251 raw_string_ostream CollectStream(CollectString); 4252 4253 getLexer().setSkipSpace(false); 4254 4255 bool FoundEnd = false; 4256 while (!getLexer().is(AsmToken::Eof)) { 4257 while (getLexer().is(AsmToken::Space)) { 4258 CollectStream << getLexer().getTok().getString(); 4259 Lex(); 4260 } 4261 4262 if (getLexer().is(AsmToken::Identifier)) { 4263 StringRef ID = getLexer().getTok().getIdentifier(); 4264 if (ID == AssemblerDirectiveEnd) { 4265 Lex(); 4266 FoundEnd = true; 4267 break; 4268 } 4269 } 4270 4271 CollectStream << Parser.parseStringToEndOfStatement() 4272 << getContext().getAsmInfo()->getSeparatorString(); 4273 4274 Parser.eatToEndOfStatement(); 4275 } 4276 4277 getLexer().setSkipSpace(true); 4278 4279 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4280 return TokError(Twine("expected directive ") + 4281 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4282 } 4283 4284 CollectStream.flush(); 4285 return false; 4286 } 4287 4288 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4289 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4290 std::string String; 4291 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4292 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4293 return true; 4294 4295 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4296 if (!PALMetadata->setFromString(String)) 4297 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4298 return false; 4299 } 4300 4301 /// Parse the assembler directive for old linear-format PAL metadata. 4302 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4303 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4304 return Error(getParser().getTok().getLoc(), 4305 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4306 "not available on non-amdpal OSes")).str()); 4307 } 4308 4309 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4310 PALMetadata->setLegacy(); 4311 for (;;) { 4312 uint32_t Key, Value; 4313 if (ParseAsAbsoluteExpression(Key)) { 4314 return TokError(Twine("invalid value in ") + 4315 Twine(PALMD::AssemblerDirective)); 4316 } 4317 if (getLexer().isNot(AsmToken::Comma)) { 4318 return TokError(Twine("expected an even number of values in ") + 4319 Twine(PALMD::AssemblerDirective)); 4320 } 4321 Lex(); 4322 if (ParseAsAbsoluteExpression(Value)) { 4323 return TokError(Twine("invalid value in ") + 4324 Twine(PALMD::AssemblerDirective)); 4325 } 4326 PALMetadata->setRegister(Key, Value); 4327 if (getLexer().isNot(AsmToken::Comma)) 4328 break; 4329 Lex(); 4330 } 4331 return false; 4332 } 4333 4334 /// ParseDirectiveAMDGPULDS 4335 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4336 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4337 if (getParser().checkForValidSection()) 4338 return true; 4339 4340 StringRef Name; 4341 SMLoc NameLoc = getLexer().getLoc(); 4342 if (getParser().parseIdentifier(Name)) 4343 return TokError("expected identifier in directive"); 4344 4345 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4346 if (parseToken(AsmToken::Comma, "expected ','")) 4347 return true; 4348 4349 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4350 4351 int64_t Size; 4352 SMLoc SizeLoc = getLexer().getLoc(); 4353 if (getParser().parseAbsoluteExpression(Size)) 4354 return true; 4355 if (Size < 0) 4356 return Error(SizeLoc, "size must be non-negative"); 4357 if (Size > LocalMemorySize) 4358 return Error(SizeLoc, "size is too large"); 4359 4360 int64_t Align = 4; 4361 if (getLexer().is(AsmToken::Comma)) { 4362 Lex(); 4363 SMLoc AlignLoc = getLexer().getLoc(); 4364 if (getParser().parseAbsoluteExpression(Align)) 4365 return true; 4366 if (Align < 0 || !isPowerOf2_64(Align)) 4367 return Error(AlignLoc, "alignment must be a power of two"); 4368 4369 // Alignment larger than the size of LDS is possible in theory, as long 4370 // as the linker manages to place to symbol at address 0, but we do want 4371 // to make sure the alignment fits nicely into a 32-bit integer. 4372 if (Align >= 1u << 31) 4373 return Error(AlignLoc, "alignment is too large"); 4374 } 4375 4376 if (parseToken(AsmToken::EndOfStatement, 4377 "unexpected token in '.amdgpu_lds' directive")) 4378 return true; 4379 4380 Symbol->redefineIfPossible(); 4381 if (!Symbol->isUndefined()) 4382 return Error(NameLoc, "invalid symbol redefinition"); 4383 4384 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4385 return false; 4386 } 4387 4388 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4389 StringRef IDVal = DirectiveID.getString(); 4390 4391 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4392 if (IDVal == ".amdgcn_target") 4393 return ParseDirectiveAMDGCNTarget(); 4394 4395 if (IDVal == ".amdhsa_kernel") 4396 return ParseDirectiveAMDHSAKernel(); 4397 4398 // TODO: Restructure/combine with PAL metadata directive. 4399 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4400 return ParseDirectiveHSAMetadata(); 4401 } else { 4402 if (IDVal == ".hsa_code_object_version") 4403 return ParseDirectiveHSACodeObjectVersion(); 4404 4405 if (IDVal == ".hsa_code_object_isa") 4406 return ParseDirectiveHSACodeObjectISA(); 4407 4408 if (IDVal == ".amd_kernel_code_t") 4409 return ParseDirectiveAMDKernelCodeT(); 4410 4411 if (IDVal == ".amdgpu_hsa_kernel") 4412 return ParseDirectiveAMDGPUHsaKernel(); 4413 4414 if (IDVal == ".amd_amdgpu_isa") 4415 return ParseDirectiveISAVersion(); 4416 4417 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4418 return ParseDirectiveHSAMetadata(); 4419 } 4420 4421 if (IDVal == ".amdgpu_lds") 4422 return ParseDirectiveAMDGPULDS(); 4423 4424 if (IDVal == PALMD::AssemblerDirectiveBegin) 4425 return ParseDirectivePALMetadataBegin(); 4426 4427 if (IDVal == PALMD::AssemblerDirective) 4428 return ParseDirectivePALMetadata(); 4429 4430 return true; 4431 } 4432 4433 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4434 unsigned RegNo) const { 4435 4436 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4437 R.isValid(); ++R) { 4438 if (*R == RegNo) 4439 return isGFX9() || isGFX10(); 4440 } 4441 4442 // GFX10 has 2 more SGPRs 104 and 105. 4443 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4444 R.isValid(); ++R) { 4445 if (*R == RegNo) 4446 return hasSGPR104_SGPR105(); 4447 } 4448 4449 switch (RegNo) { 4450 case AMDGPU::SRC_SHARED_BASE: 4451 case AMDGPU::SRC_SHARED_LIMIT: 4452 case AMDGPU::SRC_PRIVATE_BASE: 4453 case AMDGPU::SRC_PRIVATE_LIMIT: 4454 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4455 return !isCI() && !isSI() && !isVI(); 4456 case AMDGPU::TBA: 4457 case AMDGPU::TBA_LO: 4458 case AMDGPU::TBA_HI: 4459 case AMDGPU::TMA: 4460 case AMDGPU::TMA_LO: 4461 case AMDGPU::TMA_HI: 4462 return !isGFX9() && !isGFX10(); 4463 case AMDGPU::XNACK_MASK: 4464 case AMDGPU::XNACK_MASK_LO: 4465 case AMDGPU::XNACK_MASK_HI: 4466 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4467 case AMDGPU::SGPR_NULL: 4468 return isGFX10(); 4469 default: 4470 break; 4471 } 4472 4473 if (isCI()) 4474 return true; 4475 4476 if (isSI() || isGFX10()) { 4477 // No flat_scr on SI. 4478 // On GFX10 flat scratch is not a valid register operand and can only be 4479 // accessed with s_setreg/s_getreg. 4480 switch (RegNo) { 4481 case AMDGPU::FLAT_SCR: 4482 case AMDGPU::FLAT_SCR_LO: 4483 case AMDGPU::FLAT_SCR_HI: 4484 return false; 4485 default: 4486 return true; 4487 } 4488 } 4489 4490 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4491 // SI/CI have. 4492 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4493 R.isValid(); ++R) { 4494 if (*R == RegNo) 4495 return hasSGPR102_SGPR103(); 4496 } 4497 4498 return true; 4499 } 4500 4501 OperandMatchResultTy 4502 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4503 OperandMode Mode) { 4504 // Try to parse with a custom parser 4505 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4506 4507 // If we successfully parsed the operand or if there as an error parsing, 4508 // we are done. 4509 // 4510 // If we are parsing after we reach EndOfStatement then this means we 4511 // are appending default values to the Operands list. This is only done 4512 // by custom parser, so we shouldn't continue on to the generic parsing. 4513 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4514 getLexer().is(AsmToken::EndOfStatement)) 4515 return ResTy; 4516 4517 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4518 unsigned Prefix = Operands.size(); 4519 SMLoc LBraceLoc = getTok().getLoc(); 4520 Parser.Lex(); // eat the '[' 4521 4522 for (;;) { 4523 ResTy = parseReg(Operands); 4524 if (ResTy != MatchOperand_Success) 4525 return ResTy; 4526 4527 if (getLexer().is(AsmToken::RBrac)) 4528 break; 4529 4530 if (getLexer().isNot(AsmToken::Comma)) 4531 return MatchOperand_ParseFail; 4532 Parser.Lex(); 4533 } 4534 4535 if (Operands.size() - Prefix > 1) { 4536 Operands.insert(Operands.begin() + Prefix, 4537 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4538 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4539 getTok().getLoc())); 4540 } 4541 4542 Parser.Lex(); // eat the ']' 4543 return MatchOperand_Success; 4544 } 4545 4546 return parseRegOrImm(Operands); 4547 } 4548 4549 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4550 // Clear any forced encodings from the previous instruction. 4551 setForcedEncodingSize(0); 4552 setForcedDPP(false); 4553 setForcedSDWA(false); 4554 4555 if (Name.endswith("_e64")) { 4556 setForcedEncodingSize(64); 4557 return Name.substr(0, Name.size() - 4); 4558 } else if (Name.endswith("_e32")) { 4559 setForcedEncodingSize(32); 4560 return Name.substr(0, Name.size() - 4); 4561 } else if (Name.endswith("_dpp")) { 4562 setForcedDPP(true); 4563 return Name.substr(0, Name.size() - 4); 4564 } else if (Name.endswith("_sdwa")) { 4565 setForcedSDWA(true); 4566 return Name.substr(0, Name.size() - 5); 4567 } 4568 return Name; 4569 } 4570 4571 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4572 StringRef Name, 4573 SMLoc NameLoc, OperandVector &Operands) { 4574 // Add the instruction mnemonic 4575 Name = parseMnemonicSuffix(Name); 4576 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4577 4578 bool IsMIMG = Name.startswith("image_"); 4579 4580 while (!getLexer().is(AsmToken::EndOfStatement)) { 4581 OperandMode Mode = OperandMode_Default; 4582 if (IsMIMG && isGFX10() && Operands.size() == 2) 4583 Mode = OperandMode_NSA; 4584 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4585 4586 // Eat the comma or space if there is one. 4587 if (getLexer().is(AsmToken::Comma)) 4588 Parser.Lex(); 4589 4590 switch (Res) { 4591 case MatchOperand_Success: break; 4592 case MatchOperand_ParseFail: 4593 // FIXME: use real operand location rather than the current location. 4594 Error(getLexer().getLoc(), "failed parsing operand."); 4595 while (!getLexer().is(AsmToken::EndOfStatement)) { 4596 Parser.Lex(); 4597 } 4598 return true; 4599 case MatchOperand_NoMatch: 4600 // FIXME: use real operand location rather than the current location. 4601 Error(getLexer().getLoc(), "not a valid operand."); 4602 while (!getLexer().is(AsmToken::EndOfStatement)) { 4603 Parser.Lex(); 4604 } 4605 return true; 4606 } 4607 } 4608 4609 return false; 4610 } 4611 4612 //===----------------------------------------------------------------------===// 4613 // Utility functions 4614 //===----------------------------------------------------------------------===// 4615 4616 OperandMatchResultTy 4617 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4618 4619 if (!trySkipId(Prefix, AsmToken::Colon)) 4620 return MatchOperand_NoMatch; 4621 4622 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4623 } 4624 4625 OperandMatchResultTy 4626 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4627 AMDGPUOperand::ImmTy ImmTy, 4628 bool (*ConvertResult)(int64_t&)) { 4629 SMLoc S = getLoc(); 4630 int64_t Value = 0; 4631 4632 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4633 if (Res != MatchOperand_Success) 4634 return Res; 4635 4636 if (ConvertResult && !ConvertResult(Value)) { 4637 Error(S, "invalid " + StringRef(Prefix) + " value."); 4638 } 4639 4640 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4641 return MatchOperand_Success; 4642 } 4643 4644 OperandMatchResultTy 4645 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4646 OperandVector &Operands, 4647 AMDGPUOperand::ImmTy ImmTy, 4648 bool (*ConvertResult)(int64_t&)) { 4649 SMLoc S = getLoc(); 4650 if (!trySkipId(Prefix, AsmToken::Colon)) 4651 return MatchOperand_NoMatch; 4652 4653 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4654 return MatchOperand_ParseFail; 4655 4656 unsigned Val = 0; 4657 const unsigned MaxSize = 4; 4658 4659 // FIXME: How to verify the number of elements matches the number of src 4660 // operands? 4661 for (int I = 0; ; ++I) { 4662 int64_t Op; 4663 SMLoc Loc = getLoc(); 4664 if (!parseExpr(Op)) 4665 return MatchOperand_ParseFail; 4666 4667 if (Op != 0 && Op != 1) { 4668 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4669 return MatchOperand_ParseFail; 4670 } 4671 4672 Val |= (Op << I); 4673 4674 if (trySkipToken(AsmToken::RBrac)) 4675 break; 4676 4677 if (I + 1 == MaxSize) { 4678 Error(getLoc(), "expected a closing square bracket"); 4679 return MatchOperand_ParseFail; 4680 } 4681 4682 if (!skipToken(AsmToken::Comma, "expected a comma")) 4683 return MatchOperand_ParseFail; 4684 } 4685 4686 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4687 return MatchOperand_Success; 4688 } 4689 4690 OperandMatchResultTy 4691 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4692 AMDGPUOperand::ImmTy ImmTy) { 4693 int64_t Bit = 0; 4694 SMLoc S = Parser.getTok().getLoc(); 4695 4696 // We are at the end of the statement, and this is a default argument, so 4697 // use a default value. 4698 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4699 switch(getLexer().getKind()) { 4700 case AsmToken::Identifier: { 4701 StringRef Tok = Parser.getTok().getString(); 4702 if (Tok == Name) { 4703 if (Tok == "r128" && !hasMIMG_R128()) 4704 Error(S, "r128 modifier is not supported on this GPU"); 4705 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4706 Error(S, "a16 modifier is not supported on this GPU"); 4707 Bit = 1; 4708 Parser.Lex(); 4709 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4710 Bit = 0; 4711 Parser.Lex(); 4712 } else { 4713 return MatchOperand_NoMatch; 4714 } 4715 break; 4716 } 4717 default: 4718 return MatchOperand_NoMatch; 4719 } 4720 } 4721 4722 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4723 return MatchOperand_ParseFail; 4724 4725 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4726 ImmTy = AMDGPUOperand::ImmTyR128A16; 4727 4728 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4729 return MatchOperand_Success; 4730 } 4731 4732 static void addOptionalImmOperand( 4733 MCInst& Inst, const OperandVector& Operands, 4734 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4735 AMDGPUOperand::ImmTy ImmT, 4736 int64_t Default = 0) { 4737 auto i = OptionalIdx.find(ImmT); 4738 if (i != OptionalIdx.end()) { 4739 unsigned Idx = i->second; 4740 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4741 } else { 4742 Inst.addOperand(MCOperand::createImm(Default)); 4743 } 4744 } 4745 4746 OperandMatchResultTy 4747 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4748 if (getLexer().isNot(AsmToken::Identifier)) { 4749 return MatchOperand_NoMatch; 4750 } 4751 StringRef Tok = Parser.getTok().getString(); 4752 if (Tok != Prefix) { 4753 return MatchOperand_NoMatch; 4754 } 4755 4756 Parser.Lex(); 4757 if (getLexer().isNot(AsmToken::Colon)) { 4758 return MatchOperand_ParseFail; 4759 } 4760 4761 Parser.Lex(); 4762 if (getLexer().isNot(AsmToken::Identifier)) { 4763 return MatchOperand_ParseFail; 4764 } 4765 4766 Value = Parser.getTok().getString(); 4767 return MatchOperand_Success; 4768 } 4769 4770 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4771 // values to live in a joint format operand in the MCInst encoding. 4772 OperandMatchResultTy 4773 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4774 SMLoc S = Parser.getTok().getLoc(); 4775 int64_t Dfmt = 0, Nfmt = 0; 4776 // dfmt and nfmt can appear in either order, and each is optional. 4777 bool GotDfmt = false, GotNfmt = false; 4778 while (!GotDfmt || !GotNfmt) { 4779 if (!GotDfmt) { 4780 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4781 if (Res != MatchOperand_NoMatch) { 4782 if (Res != MatchOperand_Success) 4783 return Res; 4784 if (Dfmt >= 16) { 4785 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4786 return MatchOperand_ParseFail; 4787 } 4788 GotDfmt = true; 4789 Parser.Lex(); 4790 continue; 4791 } 4792 } 4793 if (!GotNfmt) { 4794 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4795 if (Res != MatchOperand_NoMatch) { 4796 if (Res != MatchOperand_Success) 4797 return Res; 4798 if (Nfmt >= 8) { 4799 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4800 return MatchOperand_ParseFail; 4801 } 4802 GotNfmt = true; 4803 Parser.Lex(); 4804 continue; 4805 } 4806 } 4807 break; 4808 } 4809 if (!GotDfmt && !GotNfmt) 4810 return MatchOperand_NoMatch; 4811 auto Format = Dfmt | Nfmt << 4; 4812 Operands.push_back( 4813 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4814 return MatchOperand_Success; 4815 } 4816 4817 //===----------------------------------------------------------------------===// 4818 // ds 4819 //===----------------------------------------------------------------------===// 4820 4821 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4822 const OperandVector &Operands) { 4823 OptionalImmIndexMap OptionalIdx; 4824 4825 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4826 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4827 4828 // Add the register arguments 4829 if (Op.isReg()) { 4830 Op.addRegOperands(Inst, 1); 4831 continue; 4832 } 4833 4834 // Handle optional arguments 4835 OptionalIdx[Op.getImmTy()] = i; 4836 } 4837 4838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4840 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4841 4842 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4843 } 4844 4845 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4846 bool IsGdsHardcoded) { 4847 OptionalImmIndexMap OptionalIdx; 4848 4849 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4850 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4851 4852 // Add the register arguments 4853 if (Op.isReg()) { 4854 Op.addRegOperands(Inst, 1); 4855 continue; 4856 } 4857 4858 if (Op.isToken() && Op.getToken() == "gds") { 4859 IsGdsHardcoded = true; 4860 continue; 4861 } 4862 4863 // Handle optional arguments 4864 OptionalIdx[Op.getImmTy()] = i; 4865 } 4866 4867 AMDGPUOperand::ImmTy OffsetType = 4868 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4869 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4870 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4871 AMDGPUOperand::ImmTyOffset; 4872 4873 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4874 4875 if (!IsGdsHardcoded) { 4876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4877 } 4878 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4879 } 4880 4881 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4882 OptionalImmIndexMap OptionalIdx; 4883 4884 unsigned OperandIdx[4]; 4885 unsigned EnMask = 0; 4886 int SrcIdx = 0; 4887 4888 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4889 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4890 4891 // Add the register arguments 4892 if (Op.isReg()) { 4893 assert(SrcIdx < 4); 4894 OperandIdx[SrcIdx] = Inst.size(); 4895 Op.addRegOperands(Inst, 1); 4896 ++SrcIdx; 4897 continue; 4898 } 4899 4900 if (Op.isOff()) { 4901 assert(SrcIdx < 4); 4902 OperandIdx[SrcIdx] = Inst.size(); 4903 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4904 ++SrcIdx; 4905 continue; 4906 } 4907 4908 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4909 Op.addImmOperands(Inst, 1); 4910 continue; 4911 } 4912 4913 if (Op.isToken() && Op.getToken() == "done") 4914 continue; 4915 4916 // Handle optional arguments 4917 OptionalIdx[Op.getImmTy()] = i; 4918 } 4919 4920 assert(SrcIdx == 4); 4921 4922 bool Compr = false; 4923 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4924 Compr = true; 4925 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4926 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4927 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4928 } 4929 4930 for (auto i = 0; i < SrcIdx; ++i) { 4931 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4932 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4933 } 4934 } 4935 4936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4938 4939 Inst.addOperand(MCOperand::createImm(EnMask)); 4940 } 4941 4942 //===----------------------------------------------------------------------===// 4943 // s_waitcnt 4944 //===----------------------------------------------------------------------===// 4945 4946 static bool 4947 encodeCnt( 4948 const AMDGPU::IsaVersion ISA, 4949 int64_t &IntVal, 4950 int64_t CntVal, 4951 bool Saturate, 4952 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4953 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4954 { 4955 bool Failed = false; 4956 4957 IntVal = encode(ISA, IntVal, CntVal); 4958 if (CntVal != decode(ISA, IntVal)) { 4959 if (Saturate) { 4960 IntVal = encode(ISA, IntVal, -1); 4961 } else { 4962 Failed = true; 4963 } 4964 } 4965 return Failed; 4966 } 4967 4968 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4969 4970 SMLoc CntLoc = getLoc(); 4971 StringRef CntName = getTokenStr(); 4972 4973 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4974 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4975 return false; 4976 4977 int64_t CntVal; 4978 SMLoc ValLoc = getLoc(); 4979 if (!parseExpr(CntVal)) 4980 return false; 4981 4982 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4983 4984 bool Failed = true; 4985 bool Sat = CntName.endswith("_sat"); 4986 4987 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4988 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4989 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4990 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4991 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4992 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4993 } else { 4994 Error(CntLoc, "invalid counter name " + CntName); 4995 return false; 4996 } 4997 4998 if (Failed) { 4999 Error(ValLoc, "too large value for " + CntName); 5000 return false; 5001 } 5002 5003 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5004 return false; 5005 5006 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5007 if (isToken(AsmToken::EndOfStatement)) { 5008 Error(getLoc(), "expected a counter name"); 5009 return false; 5010 } 5011 } 5012 5013 return true; 5014 } 5015 5016 OperandMatchResultTy 5017 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5018 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5019 int64_t Waitcnt = getWaitcntBitMask(ISA); 5020 SMLoc S = getLoc(); 5021 5022 // If parse failed, do not return error code 5023 // to avoid excessive error messages. 5024 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5025 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5026 } else { 5027 parseExpr(Waitcnt); 5028 } 5029 5030 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5031 return MatchOperand_Success; 5032 } 5033 5034 bool 5035 AMDGPUOperand::isSWaitCnt() const { 5036 return isImm(); 5037 } 5038 5039 //===----------------------------------------------------------------------===// 5040 // hwreg 5041 //===----------------------------------------------------------------------===// 5042 5043 bool 5044 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5045 int64_t &Offset, 5046 int64_t &Width) { 5047 using namespace llvm::AMDGPU::Hwreg; 5048 5049 // The register may be specified by name or using a numeric code 5050 if (isToken(AsmToken::Identifier) && 5051 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5052 HwReg.IsSymbolic = true; 5053 lex(); // skip message name 5054 } else if (!parseExpr(HwReg.Id)) { 5055 return false; 5056 } 5057 5058 if (trySkipToken(AsmToken::RParen)) 5059 return true; 5060 5061 // parse optional params 5062 return 5063 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5064 parseExpr(Offset) && 5065 skipToken(AsmToken::Comma, "expected a comma") && 5066 parseExpr(Width) && 5067 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5068 } 5069 5070 bool 5071 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5072 const int64_t Offset, 5073 const int64_t Width, 5074 const SMLoc Loc) { 5075 5076 using namespace llvm::AMDGPU::Hwreg; 5077 5078 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5079 Error(Loc, "specified hardware register is not supported on this GPU"); 5080 return false; 5081 } else if (!isValidHwreg(HwReg.Id)) { 5082 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5083 return false; 5084 } else if (!isValidHwregOffset(Offset)) { 5085 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5086 return false; 5087 } else if (!isValidHwregWidth(Width)) { 5088 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5089 return false; 5090 } 5091 return true; 5092 } 5093 5094 OperandMatchResultTy 5095 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5096 using namespace llvm::AMDGPU::Hwreg; 5097 5098 int64_t ImmVal = 0; 5099 SMLoc Loc = getLoc(); 5100 5101 // If parse failed, do not return error code 5102 // to avoid excessive error messages. 5103 if (trySkipId("hwreg", AsmToken::LParen)) { 5104 OperandInfoTy HwReg(ID_UNKNOWN_); 5105 int64_t Offset = OFFSET_DEFAULT_; 5106 int64_t Width = WIDTH_DEFAULT_; 5107 if (parseHwregBody(HwReg, Offset, Width) && 5108 validateHwreg(HwReg, Offset, Width, Loc)) { 5109 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5110 } 5111 } else if (parseExpr(ImmVal)) { 5112 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5113 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5114 } 5115 5116 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5117 return MatchOperand_Success; 5118 } 5119 5120 bool AMDGPUOperand::isHwreg() const { 5121 return isImmTy(ImmTyHwreg); 5122 } 5123 5124 //===----------------------------------------------------------------------===// 5125 // sendmsg 5126 //===----------------------------------------------------------------------===// 5127 5128 bool 5129 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5130 OperandInfoTy &Op, 5131 OperandInfoTy &Stream) { 5132 using namespace llvm::AMDGPU::SendMsg; 5133 5134 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5135 Msg.IsSymbolic = true; 5136 lex(); // skip message name 5137 } else if (!parseExpr(Msg.Id)) { 5138 return false; 5139 } 5140 5141 if (trySkipToken(AsmToken::Comma)) { 5142 Op.IsDefined = true; 5143 if (isToken(AsmToken::Identifier) && 5144 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5145 lex(); // skip operation name 5146 } else if (!parseExpr(Op.Id)) { 5147 return false; 5148 } 5149 5150 if (trySkipToken(AsmToken::Comma)) { 5151 Stream.IsDefined = true; 5152 if (!parseExpr(Stream.Id)) 5153 return false; 5154 } 5155 } 5156 5157 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5158 } 5159 5160 bool 5161 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5162 const OperandInfoTy &Op, 5163 const OperandInfoTy &Stream, 5164 const SMLoc S) { 5165 using namespace llvm::AMDGPU::SendMsg; 5166 5167 // Validation strictness depends on whether message is specified 5168 // in a symbolc or in a numeric form. In the latter case 5169 // only encoding possibility is checked. 5170 bool Strict = Msg.IsSymbolic; 5171 5172 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5173 Error(S, "invalid message id"); 5174 return false; 5175 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5176 Error(S, Op.IsDefined ? 5177 "message does not support operations" : 5178 "missing message operation"); 5179 return false; 5180 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5181 Error(S, "invalid operation id"); 5182 return false; 5183 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5184 Error(S, "message operation does not support streams"); 5185 return false; 5186 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5187 Error(S, "invalid message stream id"); 5188 return false; 5189 } 5190 return true; 5191 } 5192 5193 OperandMatchResultTy 5194 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5195 using namespace llvm::AMDGPU::SendMsg; 5196 5197 int64_t ImmVal = 0; 5198 SMLoc Loc = getLoc(); 5199 5200 // If parse failed, do not return error code 5201 // to avoid excessive error messages. 5202 if (trySkipId("sendmsg", AsmToken::LParen)) { 5203 OperandInfoTy Msg(ID_UNKNOWN_); 5204 OperandInfoTy Op(OP_NONE_); 5205 OperandInfoTy Stream(STREAM_ID_NONE_); 5206 if (parseSendMsgBody(Msg, Op, Stream) && 5207 validateSendMsg(Msg, Op, Stream, Loc)) { 5208 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5209 } 5210 } else if (parseExpr(ImmVal)) { 5211 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5212 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5213 } 5214 5215 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5216 return MatchOperand_Success; 5217 } 5218 5219 bool AMDGPUOperand::isSendMsg() const { 5220 return isImmTy(ImmTySendMsg); 5221 } 5222 5223 //===----------------------------------------------------------------------===// 5224 // v_interp 5225 //===----------------------------------------------------------------------===// 5226 5227 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5228 if (getLexer().getKind() != AsmToken::Identifier) 5229 return MatchOperand_NoMatch; 5230 5231 StringRef Str = Parser.getTok().getString(); 5232 int Slot = StringSwitch<int>(Str) 5233 .Case("p10", 0) 5234 .Case("p20", 1) 5235 .Case("p0", 2) 5236 .Default(-1); 5237 5238 SMLoc S = Parser.getTok().getLoc(); 5239 if (Slot == -1) 5240 return MatchOperand_ParseFail; 5241 5242 Parser.Lex(); 5243 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5244 AMDGPUOperand::ImmTyInterpSlot)); 5245 return MatchOperand_Success; 5246 } 5247 5248 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5249 if (getLexer().getKind() != AsmToken::Identifier) 5250 return MatchOperand_NoMatch; 5251 5252 StringRef Str = Parser.getTok().getString(); 5253 if (!Str.startswith("attr")) 5254 return MatchOperand_NoMatch; 5255 5256 StringRef Chan = Str.take_back(2); 5257 int AttrChan = StringSwitch<int>(Chan) 5258 .Case(".x", 0) 5259 .Case(".y", 1) 5260 .Case(".z", 2) 5261 .Case(".w", 3) 5262 .Default(-1); 5263 if (AttrChan == -1) 5264 return MatchOperand_ParseFail; 5265 5266 Str = Str.drop_back(2).drop_front(4); 5267 5268 uint8_t Attr; 5269 if (Str.getAsInteger(10, Attr)) 5270 return MatchOperand_ParseFail; 5271 5272 SMLoc S = Parser.getTok().getLoc(); 5273 Parser.Lex(); 5274 if (Attr > 63) { 5275 Error(S, "out of bounds attr"); 5276 return MatchOperand_Success; 5277 } 5278 5279 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5280 5281 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5282 AMDGPUOperand::ImmTyInterpAttr)); 5283 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5284 AMDGPUOperand::ImmTyAttrChan)); 5285 return MatchOperand_Success; 5286 } 5287 5288 //===----------------------------------------------------------------------===// 5289 // exp 5290 //===----------------------------------------------------------------------===// 5291 5292 void AMDGPUAsmParser::errorExpTgt() { 5293 Error(Parser.getTok().getLoc(), "invalid exp target"); 5294 } 5295 5296 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5297 uint8_t &Val) { 5298 if (Str == "null") { 5299 Val = 9; 5300 return MatchOperand_Success; 5301 } 5302 5303 if (Str.startswith("mrt")) { 5304 Str = Str.drop_front(3); 5305 if (Str == "z") { // == mrtz 5306 Val = 8; 5307 return MatchOperand_Success; 5308 } 5309 5310 if (Str.getAsInteger(10, Val)) 5311 return MatchOperand_ParseFail; 5312 5313 if (Val > 7) 5314 errorExpTgt(); 5315 5316 return MatchOperand_Success; 5317 } 5318 5319 if (Str.startswith("pos")) { 5320 Str = Str.drop_front(3); 5321 if (Str.getAsInteger(10, Val)) 5322 return MatchOperand_ParseFail; 5323 5324 if (Val > 4 || (Val == 4 && !isGFX10())) 5325 errorExpTgt(); 5326 5327 Val += 12; 5328 return MatchOperand_Success; 5329 } 5330 5331 if (isGFX10() && Str == "prim") { 5332 Val = 20; 5333 return MatchOperand_Success; 5334 } 5335 5336 if (Str.startswith("param")) { 5337 Str = Str.drop_front(5); 5338 if (Str.getAsInteger(10, Val)) 5339 return MatchOperand_ParseFail; 5340 5341 if (Val >= 32) 5342 errorExpTgt(); 5343 5344 Val += 32; 5345 return MatchOperand_Success; 5346 } 5347 5348 if (Str.startswith("invalid_target_")) { 5349 Str = Str.drop_front(15); 5350 if (Str.getAsInteger(10, Val)) 5351 return MatchOperand_ParseFail; 5352 5353 errorExpTgt(); 5354 return MatchOperand_Success; 5355 } 5356 5357 return MatchOperand_NoMatch; 5358 } 5359 5360 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5361 uint8_t Val; 5362 StringRef Str = Parser.getTok().getString(); 5363 5364 auto Res = parseExpTgtImpl(Str, Val); 5365 if (Res != MatchOperand_Success) 5366 return Res; 5367 5368 SMLoc S = Parser.getTok().getLoc(); 5369 Parser.Lex(); 5370 5371 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5372 AMDGPUOperand::ImmTyExpTgt)); 5373 return MatchOperand_Success; 5374 } 5375 5376 //===----------------------------------------------------------------------===// 5377 // parser helpers 5378 //===----------------------------------------------------------------------===// 5379 5380 bool 5381 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5382 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5383 } 5384 5385 bool 5386 AMDGPUAsmParser::isId(const StringRef Id) const { 5387 return isId(getToken(), Id); 5388 } 5389 5390 bool 5391 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5392 return getTokenKind() == Kind; 5393 } 5394 5395 bool 5396 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5397 if (isId(Id)) { 5398 lex(); 5399 return true; 5400 } 5401 return false; 5402 } 5403 5404 bool 5405 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5406 if (isId(Id) && peekToken().is(Kind)) { 5407 lex(); 5408 lex(); 5409 return true; 5410 } 5411 return false; 5412 } 5413 5414 bool 5415 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5416 if (isToken(Kind)) { 5417 lex(); 5418 return true; 5419 } 5420 return false; 5421 } 5422 5423 bool 5424 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5425 const StringRef ErrMsg) { 5426 if (!trySkipToken(Kind)) { 5427 Error(getLoc(), ErrMsg); 5428 return false; 5429 } 5430 return true; 5431 } 5432 5433 bool 5434 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5435 return !getParser().parseAbsoluteExpression(Imm); 5436 } 5437 5438 bool 5439 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5440 SMLoc S = getLoc(); 5441 5442 const MCExpr *Expr; 5443 if (Parser.parseExpression(Expr)) 5444 return false; 5445 5446 int64_t IntVal; 5447 if (Expr->evaluateAsAbsolute(IntVal)) { 5448 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5449 } else { 5450 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5451 } 5452 return true; 5453 } 5454 5455 bool 5456 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5457 if (isToken(AsmToken::String)) { 5458 Val = getToken().getStringContents(); 5459 lex(); 5460 return true; 5461 } else { 5462 Error(getLoc(), ErrMsg); 5463 return false; 5464 } 5465 } 5466 5467 AsmToken 5468 AMDGPUAsmParser::getToken() const { 5469 return Parser.getTok(); 5470 } 5471 5472 AsmToken 5473 AMDGPUAsmParser::peekToken() { 5474 return getLexer().peekTok(); 5475 } 5476 5477 void 5478 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5479 auto TokCount = getLexer().peekTokens(Tokens); 5480 5481 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5482 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5483 } 5484 5485 AsmToken::TokenKind 5486 AMDGPUAsmParser::getTokenKind() const { 5487 return getLexer().getKind(); 5488 } 5489 5490 SMLoc 5491 AMDGPUAsmParser::getLoc() const { 5492 return getToken().getLoc(); 5493 } 5494 5495 StringRef 5496 AMDGPUAsmParser::getTokenStr() const { 5497 return getToken().getString(); 5498 } 5499 5500 void 5501 AMDGPUAsmParser::lex() { 5502 Parser.Lex(); 5503 } 5504 5505 //===----------------------------------------------------------------------===// 5506 // swizzle 5507 //===----------------------------------------------------------------------===// 5508 5509 LLVM_READNONE 5510 static unsigned 5511 encodeBitmaskPerm(const unsigned AndMask, 5512 const unsigned OrMask, 5513 const unsigned XorMask) { 5514 using namespace llvm::AMDGPU::Swizzle; 5515 5516 return BITMASK_PERM_ENC | 5517 (AndMask << BITMASK_AND_SHIFT) | 5518 (OrMask << BITMASK_OR_SHIFT) | 5519 (XorMask << BITMASK_XOR_SHIFT); 5520 } 5521 5522 bool 5523 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5524 const unsigned MinVal, 5525 const unsigned MaxVal, 5526 const StringRef ErrMsg) { 5527 for (unsigned i = 0; i < OpNum; ++i) { 5528 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5529 return false; 5530 } 5531 SMLoc ExprLoc = Parser.getTok().getLoc(); 5532 if (!parseExpr(Op[i])) { 5533 return false; 5534 } 5535 if (Op[i] < MinVal || Op[i] > MaxVal) { 5536 Error(ExprLoc, ErrMsg); 5537 return false; 5538 } 5539 } 5540 5541 return true; 5542 } 5543 5544 bool 5545 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5546 using namespace llvm::AMDGPU::Swizzle; 5547 5548 int64_t Lane[LANE_NUM]; 5549 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5550 "expected a 2-bit lane id")) { 5551 Imm = QUAD_PERM_ENC; 5552 for (unsigned I = 0; I < LANE_NUM; ++I) { 5553 Imm |= Lane[I] << (LANE_SHIFT * I); 5554 } 5555 return true; 5556 } 5557 return false; 5558 } 5559 5560 bool 5561 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5562 using namespace llvm::AMDGPU::Swizzle; 5563 5564 SMLoc S = Parser.getTok().getLoc(); 5565 int64_t GroupSize; 5566 int64_t LaneIdx; 5567 5568 if (!parseSwizzleOperands(1, &GroupSize, 5569 2, 32, 5570 "group size must be in the interval [2,32]")) { 5571 return false; 5572 } 5573 if (!isPowerOf2_64(GroupSize)) { 5574 Error(S, "group size must be a power of two"); 5575 return false; 5576 } 5577 if (parseSwizzleOperands(1, &LaneIdx, 5578 0, GroupSize - 1, 5579 "lane id must be in the interval [0,group size - 1]")) { 5580 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5581 return true; 5582 } 5583 return false; 5584 } 5585 5586 bool 5587 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5588 using namespace llvm::AMDGPU::Swizzle; 5589 5590 SMLoc S = Parser.getTok().getLoc(); 5591 int64_t GroupSize; 5592 5593 if (!parseSwizzleOperands(1, &GroupSize, 5594 2, 32, "group size must be in the interval [2,32]")) { 5595 return false; 5596 } 5597 if (!isPowerOf2_64(GroupSize)) { 5598 Error(S, "group size must be a power of two"); 5599 return false; 5600 } 5601 5602 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5603 return true; 5604 } 5605 5606 bool 5607 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5608 using namespace llvm::AMDGPU::Swizzle; 5609 5610 SMLoc S = Parser.getTok().getLoc(); 5611 int64_t GroupSize; 5612 5613 if (!parseSwizzleOperands(1, &GroupSize, 5614 1, 16, "group size must be in the interval [1,16]")) { 5615 return false; 5616 } 5617 if (!isPowerOf2_64(GroupSize)) { 5618 Error(S, "group size must be a power of two"); 5619 return false; 5620 } 5621 5622 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5623 return true; 5624 } 5625 5626 bool 5627 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5628 using namespace llvm::AMDGPU::Swizzle; 5629 5630 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5631 return false; 5632 } 5633 5634 StringRef Ctl; 5635 SMLoc StrLoc = Parser.getTok().getLoc(); 5636 if (!parseString(Ctl)) { 5637 return false; 5638 } 5639 if (Ctl.size() != BITMASK_WIDTH) { 5640 Error(StrLoc, "expected a 5-character mask"); 5641 return false; 5642 } 5643 5644 unsigned AndMask = 0; 5645 unsigned OrMask = 0; 5646 unsigned XorMask = 0; 5647 5648 for (size_t i = 0; i < Ctl.size(); ++i) { 5649 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5650 switch(Ctl[i]) { 5651 default: 5652 Error(StrLoc, "invalid mask"); 5653 return false; 5654 case '0': 5655 break; 5656 case '1': 5657 OrMask |= Mask; 5658 break; 5659 case 'p': 5660 AndMask |= Mask; 5661 break; 5662 case 'i': 5663 AndMask |= Mask; 5664 XorMask |= Mask; 5665 break; 5666 } 5667 } 5668 5669 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5670 return true; 5671 } 5672 5673 bool 5674 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5675 5676 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5677 5678 if (!parseExpr(Imm)) { 5679 return false; 5680 } 5681 if (!isUInt<16>(Imm)) { 5682 Error(OffsetLoc, "expected a 16-bit offset"); 5683 return false; 5684 } 5685 return true; 5686 } 5687 5688 bool 5689 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5690 using namespace llvm::AMDGPU::Swizzle; 5691 5692 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5693 5694 SMLoc ModeLoc = Parser.getTok().getLoc(); 5695 bool Ok = false; 5696 5697 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5698 Ok = parseSwizzleQuadPerm(Imm); 5699 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5700 Ok = parseSwizzleBitmaskPerm(Imm); 5701 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5702 Ok = parseSwizzleBroadcast(Imm); 5703 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5704 Ok = parseSwizzleSwap(Imm); 5705 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5706 Ok = parseSwizzleReverse(Imm); 5707 } else { 5708 Error(ModeLoc, "expected a swizzle mode"); 5709 } 5710 5711 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5712 } 5713 5714 return false; 5715 } 5716 5717 OperandMatchResultTy 5718 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5719 SMLoc S = Parser.getTok().getLoc(); 5720 int64_t Imm = 0; 5721 5722 if (trySkipId("offset")) { 5723 5724 bool Ok = false; 5725 if (skipToken(AsmToken::Colon, "expected a colon")) { 5726 if (trySkipId("swizzle")) { 5727 Ok = parseSwizzleMacro(Imm); 5728 } else { 5729 Ok = parseSwizzleOffset(Imm); 5730 } 5731 } 5732 5733 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5734 5735 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5736 } else { 5737 // Swizzle "offset" operand is optional. 5738 // If it is omitted, try parsing other optional operands. 5739 return parseOptionalOpr(Operands); 5740 } 5741 } 5742 5743 bool 5744 AMDGPUOperand::isSwizzle() const { 5745 return isImmTy(ImmTySwizzle); 5746 } 5747 5748 //===----------------------------------------------------------------------===// 5749 // VGPR Index Mode 5750 //===----------------------------------------------------------------------===// 5751 5752 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5753 5754 using namespace llvm::AMDGPU::VGPRIndexMode; 5755 5756 if (trySkipToken(AsmToken::RParen)) { 5757 return OFF; 5758 } 5759 5760 int64_t Imm = 0; 5761 5762 while (true) { 5763 unsigned Mode = 0; 5764 SMLoc S = Parser.getTok().getLoc(); 5765 5766 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5767 if (trySkipId(IdSymbolic[ModeId])) { 5768 Mode = 1 << ModeId; 5769 break; 5770 } 5771 } 5772 5773 if (Mode == 0) { 5774 Error(S, (Imm == 0)? 5775 "expected a VGPR index mode or a closing parenthesis" : 5776 "expected a VGPR index mode"); 5777 break; 5778 } 5779 5780 if (Imm & Mode) { 5781 Error(S, "duplicate VGPR index mode"); 5782 break; 5783 } 5784 Imm |= Mode; 5785 5786 if (trySkipToken(AsmToken::RParen)) 5787 break; 5788 if (!skipToken(AsmToken::Comma, 5789 "expected a comma or a closing parenthesis")) 5790 break; 5791 } 5792 5793 return Imm; 5794 } 5795 5796 OperandMatchResultTy 5797 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5798 5799 int64_t Imm = 0; 5800 SMLoc S = Parser.getTok().getLoc(); 5801 5802 if (getLexer().getKind() == AsmToken::Identifier && 5803 Parser.getTok().getString() == "gpr_idx" && 5804 getLexer().peekTok().is(AsmToken::LParen)) { 5805 5806 Parser.Lex(); 5807 Parser.Lex(); 5808 5809 // If parse failed, trigger an error but do not return error code 5810 // to avoid excessive error messages. 5811 Imm = parseGPRIdxMacro(); 5812 5813 } else { 5814 if (getParser().parseAbsoluteExpression(Imm)) 5815 return MatchOperand_NoMatch; 5816 if (Imm < 0 || !isUInt<4>(Imm)) { 5817 Error(S, "invalid immediate: only 4-bit values are legal"); 5818 } 5819 } 5820 5821 Operands.push_back( 5822 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5823 return MatchOperand_Success; 5824 } 5825 5826 bool AMDGPUOperand::isGPRIdxMode() const { 5827 return isImmTy(ImmTyGprIdxMode); 5828 } 5829 5830 //===----------------------------------------------------------------------===// 5831 // sopp branch targets 5832 //===----------------------------------------------------------------------===// 5833 5834 OperandMatchResultTy 5835 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5836 5837 // Make sure we are not parsing something 5838 // that looks like a label or an expression but is not. 5839 // This will improve error messages. 5840 if (isRegister() || isModifier()) 5841 return MatchOperand_NoMatch; 5842 5843 if (parseExpr(Operands)) { 5844 5845 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5846 assert(Opr.isImm() || Opr.isExpr()); 5847 SMLoc Loc = Opr.getStartLoc(); 5848 5849 // Currently we do not support arbitrary expressions as branch targets. 5850 // Only labels and absolute expressions are accepted. 5851 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5852 Error(Loc, "expected an absolute expression or a label"); 5853 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5854 Error(Loc, "expected a 16-bit signed jump offset"); 5855 } 5856 } 5857 5858 return MatchOperand_Success; // avoid excessive error messages 5859 } 5860 5861 //===----------------------------------------------------------------------===// 5862 // Boolean holding registers 5863 //===----------------------------------------------------------------------===// 5864 5865 OperandMatchResultTy 5866 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5867 return parseReg(Operands); 5868 } 5869 5870 //===----------------------------------------------------------------------===// 5871 // mubuf 5872 //===----------------------------------------------------------------------===// 5873 5874 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5875 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5876 } 5877 5878 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5879 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5880 } 5881 5882 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5883 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5884 } 5885 5886 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5887 const OperandVector &Operands, 5888 bool IsAtomic, 5889 bool IsAtomicReturn, 5890 bool IsLds) { 5891 bool IsLdsOpcode = IsLds; 5892 bool HasLdsModifier = false; 5893 OptionalImmIndexMap OptionalIdx; 5894 assert(IsAtomicReturn ? IsAtomic : true); 5895 unsigned FirstOperandIdx = 1; 5896 5897 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5898 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5899 5900 // Add the register arguments 5901 if (Op.isReg()) { 5902 Op.addRegOperands(Inst, 1); 5903 // Insert a tied src for atomic return dst. 5904 // This cannot be postponed as subsequent calls to 5905 // addImmOperands rely on correct number of MC operands. 5906 if (IsAtomicReturn && i == FirstOperandIdx) 5907 Op.addRegOperands(Inst, 1); 5908 continue; 5909 } 5910 5911 // Handle the case where soffset is an immediate 5912 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5913 Op.addImmOperands(Inst, 1); 5914 continue; 5915 } 5916 5917 HasLdsModifier |= Op.isLDS(); 5918 5919 // Handle tokens like 'offen' which are sometimes hard-coded into the 5920 // asm string. There are no MCInst operands for these. 5921 if (Op.isToken()) { 5922 continue; 5923 } 5924 assert(Op.isImm()); 5925 5926 // Handle optional arguments 5927 OptionalIdx[Op.getImmTy()] = i; 5928 } 5929 5930 // This is a workaround for an llvm quirk which may result in an 5931 // incorrect instruction selection. Lds and non-lds versions of 5932 // MUBUF instructions are identical except that lds versions 5933 // have mandatory 'lds' modifier. However this modifier follows 5934 // optional modifiers and llvm asm matcher regards this 'lds' 5935 // modifier as an optional one. As a result, an lds version 5936 // of opcode may be selected even if it has no 'lds' modifier. 5937 if (IsLdsOpcode && !HasLdsModifier) { 5938 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5939 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5940 Inst.setOpcode(NoLdsOpcode); 5941 IsLdsOpcode = false; 5942 } 5943 } 5944 5945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5946 if (!IsAtomic) { // glc is hard-coded. 5947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5948 } 5949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5950 5951 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5953 } 5954 5955 if (isGFX10()) 5956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5957 } 5958 5959 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5960 OptionalImmIndexMap OptionalIdx; 5961 5962 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5963 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5964 5965 // Add the register arguments 5966 if (Op.isReg()) { 5967 Op.addRegOperands(Inst, 1); 5968 continue; 5969 } 5970 5971 // Handle the case where soffset is an immediate 5972 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5973 Op.addImmOperands(Inst, 1); 5974 continue; 5975 } 5976 5977 // Handle tokens like 'offen' which are sometimes hard-coded into the 5978 // asm string. There are no MCInst operands for these. 5979 if (Op.isToken()) { 5980 continue; 5981 } 5982 assert(Op.isImm()); 5983 5984 // Handle optional arguments 5985 OptionalIdx[Op.getImmTy()] = i; 5986 } 5987 5988 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5989 AMDGPUOperand::ImmTyOffset); 5990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5994 5995 if (isGFX10()) 5996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5997 } 5998 5999 //===----------------------------------------------------------------------===// 6000 // mimg 6001 //===----------------------------------------------------------------------===// 6002 6003 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6004 bool IsAtomic) { 6005 unsigned I = 1; 6006 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6007 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6008 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6009 } 6010 6011 if (IsAtomic) { 6012 // Add src, same as dst 6013 assert(Desc.getNumDefs() == 1); 6014 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6015 } 6016 6017 OptionalImmIndexMap OptionalIdx; 6018 6019 for (unsigned E = Operands.size(); I != E; ++I) { 6020 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6021 6022 // Add the register arguments 6023 if (Op.isReg()) { 6024 Op.addRegOperands(Inst, 1); 6025 } else if (Op.isImmModifier()) { 6026 OptionalIdx[Op.getImmTy()] = I; 6027 } else if (!Op.isToken()) { 6028 llvm_unreachable("unexpected operand type"); 6029 } 6030 } 6031 6032 bool IsGFX10 = isGFX10(); 6033 6034 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6035 if (IsGFX10) 6036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6038 if (IsGFX10) 6039 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6041 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6043 if (IsGFX10) 6044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6047 if (!IsGFX10) 6048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6050 } 6051 6052 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6053 cvtMIMG(Inst, Operands, true); 6054 } 6055 6056 //===----------------------------------------------------------------------===// 6057 // smrd 6058 //===----------------------------------------------------------------------===// 6059 6060 bool AMDGPUOperand::isSMRDOffset8() const { 6061 return isImm() && isUInt<8>(getImm()); 6062 } 6063 6064 bool AMDGPUOperand::isSMRDOffset20() const { 6065 return isImm() && isUInt<20>(getImm()); 6066 } 6067 6068 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6069 // 32-bit literals are only supported on CI and we only want to use them 6070 // when the offset is > 8-bits. 6071 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6072 } 6073 6074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6075 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6076 } 6077 6078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 6079 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6080 } 6081 6082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6083 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6084 } 6085 6086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6087 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6088 } 6089 6090 //===----------------------------------------------------------------------===// 6091 // vop3 6092 //===----------------------------------------------------------------------===// 6093 6094 static bool ConvertOmodMul(int64_t &Mul) { 6095 if (Mul != 1 && Mul != 2 && Mul != 4) 6096 return false; 6097 6098 Mul >>= 1; 6099 return true; 6100 } 6101 6102 static bool ConvertOmodDiv(int64_t &Div) { 6103 if (Div == 1) { 6104 Div = 0; 6105 return true; 6106 } 6107 6108 if (Div == 2) { 6109 Div = 3; 6110 return true; 6111 } 6112 6113 return false; 6114 } 6115 6116 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6117 if (BoundCtrl == 0) { 6118 BoundCtrl = 1; 6119 return true; 6120 } 6121 6122 if (BoundCtrl == -1) { 6123 BoundCtrl = 0; 6124 return true; 6125 } 6126 6127 return false; 6128 } 6129 6130 // Note: the order in this table matches the order of operands in AsmString. 6131 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6132 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6133 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6134 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6135 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6136 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6137 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6138 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6139 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6140 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6141 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6142 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6143 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6144 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6145 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6146 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6147 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6148 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6149 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6150 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6151 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6152 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6153 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6154 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6155 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6156 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6157 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6158 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6159 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6160 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6161 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6162 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6163 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6164 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6165 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6166 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6167 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6168 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6169 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6170 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6171 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6172 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6173 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6174 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6175 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6176 }; 6177 6178 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6179 6180 OperandMatchResultTy res = parseOptionalOpr(Operands); 6181 6182 // This is a hack to enable hardcoded mandatory operands which follow 6183 // optional operands. 6184 // 6185 // Current design assumes that all operands after the first optional operand 6186 // are also optional. However implementation of some instructions violates 6187 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6188 // 6189 // To alleviate this problem, we have to (implicitly) parse extra operands 6190 // to make sure autogenerated parser of custom operands never hit hardcoded 6191 // mandatory operands. 6192 6193 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6194 if (res != MatchOperand_Success || 6195 isToken(AsmToken::EndOfStatement)) 6196 break; 6197 6198 trySkipToken(AsmToken::Comma); 6199 res = parseOptionalOpr(Operands); 6200 } 6201 6202 return res; 6203 } 6204 6205 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6206 OperandMatchResultTy res; 6207 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6208 // try to parse any optional operand here 6209 if (Op.IsBit) { 6210 res = parseNamedBit(Op.Name, Operands, Op.Type); 6211 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6212 res = parseOModOperand(Operands); 6213 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6214 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6215 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6216 res = parseSDWASel(Operands, Op.Name, Op.Type); 6217 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6218 res = parseSDWADstUnused(Operands); 6219 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6220 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6221 Op.Type == AMDGPUOperand::ImmTyNegLo || 6222 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6223 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6224 Op.ConvertResult); 6225 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6226 res = parseDim(Operands); 6227 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6228 res = parseDfmtNfmt(Operands); 6229 } else { 6230 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6231 } 6232 if (res != MatchOperand_NoMatch) { 6233 return res; 6234 } 6235 } 6236 return MatchOperand_NoMatch; 6237 } 6238 6239 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6240 StringRef Name = Parser.getTok().getString(); 6241 if (Name == "mul") { 6242 return parseIntWithPrefix("mul", Operands, 6243 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6244 } 6245 6246 if (Name == "div") { 6247 return parseIntWithPrefix("div", Operands, 6248 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6249 } 6250 6251 return MatchOperand_NoMatch; 6252 } 6253 6254 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6255 cvtVOP3P(Inst, Operands); 6256 6257 int Opc = Inst.getOpcode(); 6258 6259 int SrcNum; 6260 const int Ops[] = { AMDGPU::OpName::src0, 6261 AMDGPU::OpName::src1, 6262 AMDGPU::OpName::src2 }; 6263 for (SrcNum = 0; 6264 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6265 ++SrcNum); 6266 assert(SrcNum > 0); 6267 6268 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6269 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6270 6271 if ((OpSel & (1 << SrcNum)) != 0) { 6272 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6273 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6274 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6275 } 6276 } 6277 6278 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6279 // 1. This operand is input modifiers 6280 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6281 // 2. This is not last operand 6282 && Desc.NumOperands > (OpNum + 1) 6283 // 3. Next operand is register class 6284 && Desc.OpInfo[OpNum + 1].RegClass != -1 6285 // 4. Next register is not tied to any other operand 6286 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6287 } 6288 6289 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6290 { 6291 OptionalImmIndexMap OptionalIdx; 6292 unsigned Opc = Inst.getOpcode(); 6293 6294 unsigned I = 1; 6295 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6296 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6297 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6298 } 6299 6300 for (unsigned E = Operands.size(); I != E; ++I) { 6301 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6302 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6303 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6304 } else if (Op.isInterpSlot() || 6305 Op.isInterpAttr() || 6306 Op.isAttrChan()) { 6307 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6308 } else if (Op.isImmModifier()) { 6309 OptionalIdx[Op.getImmTy()] = I; 6310 } else { 6311 llvm_unreachable("unhandled operand type"); 6312 } 6313 } 6314 6315 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6317 } 6318 6319 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6321 } 6322 6323 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6325 } 6326 } 6327 6328 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6329 OptionalImmIndexMap &OptionalIdx) { 6330 unsigned Opc = Inst.getOpcode(); 6331 6332 unsigned I = 1; 6333 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6334 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6335 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6336 } 6337 6338 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6339 // This instruction has src modifiers 6340 for (unsigned E = Operands.size(); I != E; ++I) { 6341 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6342 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6343 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6344 } else if (Op.isImmModifier()) { 6345 OptionalIdx[Op.getImmTy()] = I; 6346 } else if (Op.isRegOrImm()) { 6347 Op.addRegOrImmOperands(Inst, 1); 6348 } else { 6349 llvm_unreachable("unhandled operand type"); 6350 } 6351 } 6352 } else { 6353 // No src modifiers 6354 for (unsigned E = Operands.size(); I != E; ++I) { 6355 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6356 if (Op.isMod()) { 6357 OptionalIdx[Op.getImmTy()] = I; 6358 } else { 6359 Op.addRegOrImmOperands(Inst, 1); 6360 } 6361 } 6362 } 6363 6364 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6366 } 6367 6368 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6370 } 6371 6372 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6373 // it has src2 register operand that is tied to dst operand 6374 // we don't allow modifiers for this operand in assembler so src2_modifiers 6375 // should be 0. 6376 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6377 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6378 Opc == AMDGPU::V_MAC_F32_e64_vi || 6379 Opc == AMDGPU::V_MAC_F16_e64_vi || 6380 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6381 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6382 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6383 auto it = Inst.begin(); 6384 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6385 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6386 ++it; 6387 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6388 } 6389 } 6390 6391 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6392 OptionalImmIndexMap OptionalIdx; 6393 cvtVOP3(Inst, Operands, OptionalIdx); 6394 } 6395 6396 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6397 const OperandVector &Operands) { 6398 OptionalImmIndexMap OptIdx; 6399 const int Opc = Inst.getOpcode(); 6400 const MCInstrDesc &Desc = MII.get(Opc); 6401 6402 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6403 6404 cvtVOP3(Inst, Operands, OptIdx); 6405 6406 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6407 assert(!IsPacked); 6408 Inst.addOperand(Inst.getOperand(0)); 6409 } 6410 6411 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6412 // instruction, and then figure out where to actually put the modifiers 6413 6414 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6415 6416 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6417 if (OpSelHiIdx != -1) { 6418 int DefaultVal = IsPacked ? -1 : 0; 6419 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6420 DefaultVal); 6421 } 6422 6423 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6424 if (NegLoIdx != -1) { 6425 assert(IsPacked); 6426 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6427 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6428 } 6429 6430 const int Ops[] = { AMDGPU::OpName::src0, 6431 AMDGPU::OpName::src1, 6432 AMDGPU::OpName::src2 }; 6433 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6434 AMDGPU::OpName::src1_modifiers, 6435 AMDGPU::OpName::src2_modifiers }; 6436 6437 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6438 6439 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6440 unsigned OpSelHi = 0; 6441 unsigned NegLo = 0; 6442 unsigned NegHi = 0; 6443 6444 if (OpSelHiIdx != -1) { 6445 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6446 } 6447 6448 if (NegLoIdx != -1) { 6449 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6450 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6451 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6452 } 6453 6454 for (int J = 0; J < 3; ++J) { 6455 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6456 if (OpIdx == -1) 6457 break; 6458 6459 uint32_t ModVal = 0; 6460 6461 if ((OpSel & (1 << J)) != 0) 6462 ModVal |= SISrcMods::OP_SEL_0; 6463 6464 if ((OpSelHi & (1 << J)) != 0) 6465 ModVal |= SISrcMods::OP_SEL_1; 6466 6467 if ((NegLo & (1 << J)) != 0) 6468 ModVal |= SISrcMods::NEG; 6469 6470 if ((NegHi & (1 << J)) != 0) 6471 ModVal |= SISrcMods::NEG_HI; 6472 6473 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6474 6475 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6476 } 6477 } 6478 6479 //===----------------------------------------------------------------------===// 6480 // dpp 6481 //===----------------------------------------------------------------------===// 6482 6483 bool AMDGPUOperand::isDPP8() const { 6484 return isImmTy(ImmTyDPP8); 6485 } 6486 6487 bool AMDGPUOperand::isDPPCtrl() const { 6488 using namespace AMDGPU::DPP; 6489 6490 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6491 if (result) { 6492 int64_t Imm = getImm(); 6493 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6494 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6495 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6496 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6497 (Imm == DppCtrl::WAVE_SHL1) || 6498 (Imm == DppCtrl::WAVE_ROL1) || 6499 (Imm == DppCtrl::WAVE_SHR1) || 6500 (Imm == DppCtrl::WAVE_ROR1) || 6501 (Imm == DppCtrl::ROW_MIRROR) || 6502 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6503 (Imm == DppCtrl::BCAST15) || 6504 (Imm == DppCtrl::BCAST31) || 6505 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6506 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6507 } 6508 return false; 6509 } 6510 6511 //===----------------------------------------------------------------------===// 6512 // mAI 6513 //===----------------------------------------------------------------------===// 6514 6515 bool AMDGPUOperand::isBLGP() const { 6516 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6517 } 6518 6519 bool AMDGPUOperand::isCBSZ() const { 6520 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6521 } 6522 6523 bool AMDGPUOperand::isABID() const { 6524 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6525 } 6526 6527 bool AMDGPUOperand::isS16Imm() const { 6528 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6529 } 6530 6531 bool AMDGPUOperand::isU16Imm() const { 6532 return isImm() && isUInt<16>(getImm()); 6533 } 6534 6535 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6536 if (!isGFX10()) 6537 return MatchOperand_NoMatch; 6538 6539 SMLoc S = Parser.getTok().getLoc(); 6540 6541 if (getLexer().isNot(AsmToken::Identifier)) 6542 return MatchOperand_NoMatch; 6543 if (getLexer().getTok().getString() != "dim") 6544 return MatchOperand_NoMatch; 6545 6546 Parser.Lex(); 6547 if (getLexer().isNot(AsmToken::Colon)) 6548 return MatchOperand_ParseFail; 6549 6550 Parser.Lex(); 6551 6552 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6553 // integer. 6554 std::string Token; 6555 if (getLexer().is(AsmToken::Integer)) { 6556 SMLoc Loc = getLexer().getTok().getEndLoc(); 6557 Token = std::string(getLexer().getTok().getString()); 6558 Parser.Lex(); 6559 if (getLexer().getTok().getLoc() != Loc) 6560 return MatchOperand_ParseFail; 6561 } 6562 if (getLexer().isNot(AsmToken::Identifier)) 6563 return MatchOperand_ParseFail; 6564 Token += getLexer().getTok().getString(); 6565 6566 StringRef DimId = Token; 6567 if (DimId.startswith("SQ_RSRC_IMG_")) 6568 DimId = DimId.substr(12); 6569 6570 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6571 if (!DimInfo) 6572 return MatchOperand_ParseFail; 6573 6574 Parser.Lex(); 6575 6576 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6577 AMDGPUOperand::ImmTyDim)); 6578 return MatchOperand_Success; 6579 } 6580 6581 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6582 SMLoc S = Parser.getTok().getLoc(); 6583 StringRef Prefix; 6584 6585 if (getLexer().getKind() == AsmToken::Identifier) { 6586 Prefix = Parser.getTok().getString(); 6587 } else { 6588 return MatchOperand_NoMatch; 6589 } 6590 6591 if (Prefix != "dpp8") 6592 return parseDPPCtrl(Operands); 6593 if (!isGFX10()) 6594 return MatchOperand_NoMatch; 6595 6596 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6597 6598 int64_t Sels[8]; 6599 6600 Parser.Lex(); 6601 if (getLexer().isNot(AsmToken::Colon)) 6602 return MatchOperand_ParseFail; 6603 6604 Parser.Lex(); 6605 if (getLexer().isNot(AsmToken::LBrac)) 6606 return MatchOperand_ParseFail; 6607 6608 Parser.Lex(); 6609 if (getParser().parseAbsoluteExpression(Sels[0])) 6610 return MatchOperand_ParseFail; 6611 if (0 > Sels[0] || 7 < Sels[0]) 6612 return MatchOperand_ParseFail; 6613 6614 for (size_t i = 1; i < 8; ++i) { 6615 if (getLexer().isNot(AsmToken::Comma)) 6616 return MatchOperand_ParseFail; 6617 6618 Parser.Lex(); 6619 if (getParser().parseAbsoluteExpression(Sels[i])) 6620 return MatchOperand_ParseFail; 6621 if (0 > Sels[i] || 7 < Sels[i]) 6622 return MatchOperand_ParseFail; 6623 } 6624 6625 if (getLexer().isNot(AsmToken::RBrac)) 6626 return MatchOperand_ParseFail; 6627 Parser.Lex(); 6628 6629 unsigned DPP8 = 0; 6630 for (size_t i = 0; i < 8; ++i) 6631 DPP8 |= (Sels[i] << (i * 3)); 6632 6633 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6634 return MatchOperand_Success; 6635 } 6636 6637 OperandMatchResultTy 6638 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6639 using namespace AMDGPU::DPP; 6640 6641 SMLoc S = Parser.getTok().getLoc(); 6642 StringRef Prefix; 6643 int64_t Int; 6644 6645 if (getLexer().getKind() == AsmToken::Identifier) { 6646 Prefix = Parser.getTok().getString(); 6647 } else { 6648 return MatchOperand_NoMatch; 6649 } 6650 6651 if (Prefix == "row_mirror") { 6652 Int = DppCtrl::ROW_MIRROR; 6653 Parser.Lex(); 6654 } else if (Prefix == "row_half_mirror") { 6655 Int = DppCtrl::ROW_HALF_MIRROR; 6656 Parser.Lex(); 6657 } else { 6658 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6659 if (Prefix != "quad_perm" 6660 && Prefix != "row_shl" 6661 && Prefix != "row_shr" 6662 && Prefix != "row_ror" 6663 && Prefix != "wave_shl" 6664 && Prefix != "wave_rol" 6665 && Prefix != "wave_shr" 6666 && Prefix != "wave_ror" 6667 && Prefix != "row_bcast" 6668 && Prefix != "row_share" 6669 && Prefix != "row_xmask") { 6670 return MatchOperand_NoMatch; 6671 } 6672 6673 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6674 return MatchOperand_NoMatch; 6675 6676 if (!isVI() && !isGFX9() && 6677 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6678 Prefix == "wave_rol" || Prefix == "wave_ror" || 6679 Prefix == "row_bcast")) 6680 return MatchOperand_NoMatch; 6681 6682 Parser.Lex(); 6683 if (getLexer().isNot(AsmToken::Colon)) 6684 return MatchOperand_ParseFail; 6685 6686 if (Prefix == "quad_perm") { 6687 // quad_perm:[%d,%d,%d,%d] 6688 Parser.Lex(); 6689 if (getLexer().isNot(AsmToken::LBrac)) 6690 return MatchOperand_ParseFail; 6691 Parser.Lex(); 6692 6693 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6694 return MatchOperand_ParseFail; 6695 6696 for (int i = 0; i < 3; ++i) { 6697 if (getLexer().isNot(AsmToken::Comma)) 6698 return MatchOperand_ParseFail; 6699 Parser.Lex(); 6700 6701 int64_t Temp; 6702 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6703 return MatchOperand_ParseFail; 6704 const int shift = i*2 + 2; 6705 Int += (Temp << shift); 6706 } 6707 6708 if (getLexer().isNot(AsmToken::RBrac)) 6709 return MatchOperand_ParseFail; 6710 Parser.Lex(); 6711 } else { 6712 // sel:%d 6713 Parser.Lex(); 6714 if (getParser().parseAbsoluteExpression(Int)) 6715 return MatchOperand_ParseFail; 6716 6717 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6718 Int |= DppCtrl::ROW_SHL0; 6719 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6720 Int |= DppCtrl::ROW_SHR0; 6721 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6722 Int |= DppCtrl::ROW_ROR0; 6723 } else if (Prefix == "wave_shl" && 1 == Int) { 6724 Int = DppCtrl::WAVE_SHL1; 6725 } else if (Prefix == "wave_rol" && 1 == Int) { 6726 Int = DppCtrl::WAVE_ROL1; 6727 } else if (Prefix == "wave_shr" && 1 == Int) { 6728 Int = DppCtrl::WAVE_SHR1; 6729 } else if (Prefix == "wave_ror" && 1 == Int) { 6730 Int = DppCtrl::WAVE_ROR1; 6731 } else if (Prefix == "row_bcast") { 6732 if (Int == 15) { 6733 Int = DppCtrl::BCAST15; 6734 } else if (Int == 31) { 6735 Int = DppCtrl::BCAST31; 6736 } else { 6737 return MatchOperand_ParseFail; 6738 } 6739 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6740 Int |= DppCtrl::ROW_SHARE_FIRST; 6741 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6742 Int |= DppCtrl::ROW_XMASK_FIRST; 6743 } else { 6744 return MatchOperand_ParseFail; 6745 } 6746 } 6747 } 6748 6749 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6750 return MatchOperand_Success; 6751 } 6752 6753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6754 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6755 } 6756 6757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6758 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6759 } 6760 6761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6762 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6763 } 6764 6765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6766 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6767 } 6768 6769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6770 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6771 } 6772 6773 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6774 OptionalImmIndexMap OptionalIdx; 6775 6776 unsigned I = 1; 6777 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6778 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6779 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6780 } 6781 6782 int Fi = 0; 6783 for (unsigned E = Operands.size(); I != E; ++I) { 6784 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6785 MCOI::TIED_TO); 6786 if (TiedTo != -1) { 6787 assert((unsigned)TiedTo < Inst.getNumOperands()); 6788 // handle tied old or src2 for MAC instructions 6789 Inst.addOperand(Inst.getOperand(TiedTo)); 6790 } 6791 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6792 // Add the register arguments 6793 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6794 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6795 // Skip it. 6796 continue; 6797 } 6798 6799 if (IsDPP8) { 6800 if (Op.isDPP8()) { 6801 Op.addImmOperands(Inst, 1); 6802 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6803 Op.addRegWithFPInputModsOperands(Inst, 2); 6804 } else if (Op.isFI()) { 6805 Fi = Op.getImm(); 6806 } else if (Op.isReg()) { 6807 Op.addRegOperands(Inst, 1); 6808 } else { 6809 llvm_unreachable("Invalid operand type"); 6810 } 6811 } else { 6812 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6813 Op.addRegWithFPInputModsOperands(Inst, 2); 6814 } else if (Op.isDPPCtrl()) { 6815 Op.addImmOperands(Inst, 1); 6816 } else if (Op.isImm()) { 6817 // Handle optional arguments 6818 OptionalIdx[Op.getImmTy()] = I; 6819 } else { 6820 llvm_unreachable("Invalid operand type"); 6821 } 6822 } 6823 } 6824 6825 if (IsDPP8) { 6826 using namespace llvm::AMDGPU::DPP; 6827 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6828 } else { 6829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6830 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6832 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6834 } 6835 } 6836 } 6837 6838 //===----------------------------------------------------------------------===// 6839 // sdwa 6840 //===----------------------------------------------------------------------===// 6841 6842 OperandMatchResultTy 6843 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6844 AMDGPUOperand::ImmTy Type) { 6845 using namespace llvm::AMDGPU::SDWA; 6846 6847 SMLoc S = Parser.getTok().getLoc(); 6848 StringRef Value; 6849 OperandMatchResultTy res; 6850 6851 res = parseStringWithPrefix(Prefix, Value); 6852 if (res != MatchOperand_Success) { 6853 return res; 6854 } 6855 6856 int64_t Int; 6857 Int = StringSwitch<int64_t>(Value) 6858 .Case("BYTE_0", SdwaSel::BYTE_0) 6859 .Case("BYTE_1", SdwaSel::BYTE_1) 6860 .Case("BYTE_2", SdwaSel::BYTE_2) 6861 .Case("BYTE_3", SdwaSel::BYTE_3) 6862 .Case("WORD_0", SdwaSel::WORD_0) 6863 .Case("WORD_1", SdwaSel::WORD_1) 6864 .Case("DWORD", SdwaSel::DWORD) 6865 .Default(0xffffffff); 6866 Parser.Lex(); // eat last token 6867 6868 if (Int == 0xffffffff) { 6869 return MatchOperand_ParseFail; 6870 } 6871 6872 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6873 return MatchOperand_Success; 6874 } 6875 6876 OperandMatchResultTy 6877 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6878 using namespace llvm::AMDGPU::SDWA; 6879 6880 SMLoc S = Parser.getTok().getLoc(); 6881 StringRef Value; 6882 OperandMatchResultTy res; 6883 6884 res = parseStringWithPrefix("dst_unused", Value); 6885 if (res != MatchOperand_Success) { 6886 return res; 6887 } 6888 6889 int64_t Int; 6890 Int = StringSwitch<int64_t>(Value) 6891 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6892 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6893 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6894 .Default(0xffffffff); 6895 Parser.Lex(); // eat last token 6896 6897 if (Int == 0xffffffff) { 6898 return MatchOperand_ParseFail; 6899 } 6900 6901 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6902 return MatchOperand_Success; 6903 } 6904 6905 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6906 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6907 } 6908 6909 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6910 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6911 } 6912 6913 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6914 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6915 } 6916 6917 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6918 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6919 } 6920 6921 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6922 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6923 } 6924 6925 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6926 uint64_t BasicInstType, 6927 bool SkipDstVcc, 6928 bool SkipSrcVcc) { 6929 using namespace llvm::AMDGPU::SDWA; 6930 6931 OptionalImmIndexMap OptionalIdx; 6932 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6933 bool SkippedVcc = false; 6934 6935 unsigned I = 1; 6936 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6937 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6938 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6939 } 6940 6941 for (unsigned E = Operands.size(); I != E; ++I) { 6942 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6943 if (SkipVcc && !SkippedVcc && Op.isReg() && 6944 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6945 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6946 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6947 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6948 // Skip VCC only if we didn't skip it on previous iteration. 6949 // Note that src0 and src1 occupy 2 slots each because of modifiers. 6950 if (BasicInstType == SIInstrFlags::VOP2 && 6951 ((SkipDstVcc && Inst.getNumOperands() == 1) || 6952 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 6953 SkippedVcc = true; 6954 continue; 6955 } else if (BasicInstType == SIInstrFlags::VOPC && 6956 Inst.getNumOperands() == 0) { 6957 SkippedVcc = true; 6958 continue; 6959 } 6960 } 6961 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6962 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6963 } else if (Op.isImm()) { 6964 // Handle optional arguments 6965 OptionalIdx[Op.getImmTy()] = I; 6966 } else { 6967 llvm_unreachable("Invalid operand type"); 6968 } 6969 SkippedVcc = false; 6970 } 6971 6972 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6973 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6974 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6975 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6976 switch (BasicInstType) { 6977 case SIInstrFlags::VOP1: 6978 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6979 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6981 } 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6985 break; 6986 6987 case SIInstrFlags::VOP2: 6988 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6989 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6991 } 6992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6996 break; 6997 6998 case SIInstrFlags::VOPC: 6999 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7003 break; 7004 7005 default: 7006 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7007 } 7008 } 7009 7010 // special case v_mac_{f16, f32}: 7011 // it has src2 register operand that is tied to dst operand 7012 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7013 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7014 auto it = Inst.begin(); 7015 std::advance( 7016 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7017 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7018 } 7019 } 7020 7021 //===----------------------------------------------------------------------===// 7022 // mAI 7023 //===----------------------------------------------------------------------===// 7024 7025 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7026 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7027 } 7028 7029 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7030 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7031 } 7032 7033 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7034 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7035 } 7036 7037 /// Force static initialization. 7038 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7039 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7040 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7041 } 7042 7043 #define GET_REGISTER_MATCHER 7044 #define GET_MATCHER_IMPLEMENTATION 7045 #define GET_MNEMONIC_SPELL_CHECKER 7046 #include "AMDGPUGenAsmMatcher.inc" 7047 7048 // This fuction should be defined after auto-generated include so that we have 7049 // MatchClassKind enum defined 7050 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7051 unsigned Kind) { 7052 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7053 // But MatchInstructionImpl() expects to meet token and fails to validate 7054 // operand. This method checks if we are given immediate operand but expect to 7055 // get corresponding token. 7056 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7057 switch (Kind) { 7058 case MCK_addr64: 7059 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7060 case MCK_gds: 7061 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7062 case MCK_lds: 7063 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7064 case MCK_glc: 7065 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7066 case MCK_idxen: 7067 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7068 case MCK_offen: 7069 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7070 case MCK_SSrcB32: 7071 // When operands have expression values, they will return true for isToken, 7072 // because it is not possible to distinguish between a token and an 7073 // expression at parse time. MatchInstructionImpl() will always try to 7074 // match an operand as a token, when isToken returns true, and when the 7075 // name of the expression is not a valid token, the match will fail, 7076 // so we need to handle it here. 7077 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7078 case MCK_SSrcF32: 7079 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7080 case MCK_SoppBrTarget: 7081 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7082 case MCK_VReg32OrOff: 7083 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7084 case MCK_InterpSlot: 7085 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7086 case MCK_Attr: 7087 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7088 case MCK_AttrChan: 7089 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7090 case MCK_SReg_64: 7091 case MCK_SReg_64_XEXEC: 7092 // Null is defined as a 32-bit register but 7093 // it should also be enabled with 64-bit operands. 7094 // The following code enables it for SReg_64 operands 7095 // used as source and destination. Remaining source 7096 // operands are handled in isInlinableImm. 7097 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7098 default: 7099 return Match_InvalidOperand; 7100 } 7101 } 7102 7103 //===----------------------------------------------------------------------===// 7104 // endpgm 7105 //===----------------------------------------------------------------------===// 7106 7107 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7108 SMLoc S = Parser.getTok().getLoc(); 7109 int64_t Imm = 0; 7110 7111 if (!parseExpr(Imm)) { 7112 // The operand is optional, if not present default to 0 7113 Imm = 0; 7114 } 7115 7116 if (!isUInt<16>(Imm)) { 7117 Error(S, "expected a 16-bit value"); 7118 return MatchOperand_ParseFail; 7119 } 7120 7121 Operands.push_back( 7122 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7123 return MatchOperand_Success; 7124 } 7125 7126 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7127