1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 enum ImmKindTy { 192 ImmKindTyNone, 193 ImmKindTyLiteral, 194 ImmKindTyConst, 195 }; 196 197 private: 198 struct TokOp { 199 const char *Data; 200 unsigned Length; 201 }; 202 203 struct ImmOp { 204 int64_t Val; 205 ImmTy Type; 206 bool IsFPImm; 207 mutable ImmKindTy Kind; 208 Modifiers Mods; 209 }; 210 211 struct RegOp { 212 unsigned RegNo; 213 Modifiers Mods; 214 }; 215 216 union { 217 TokOp Tok; 218 ImmOp Imm; 219 RegOp Reg; 220 const MCExpr *Expr; 221 }; 222 223 public: 224 bool isToken() const override { 225 if (Kind == Token) 226 return true; 227 228 // When parsing operands, we can't always tell if something was meant to be 229 // a token, like 'gds', or an expression that references a global variable. 230 // In this case, we assume the string is an expression, and if we need to 231 // interpret is a token, then we treat the symbol name as the token. 232 return isSymbolRefExpr(); 233 } 234 235 bool isSymbolRefExpr() const { 236 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 237 } 238 239 bool isImm() const override { 240 return Kind == Immediate; 241 } 242 243 void setImmKindNone() const { 244 assert(isImm()); 245 Imm.Kind = ImmKindTyNone; 246 } 247 248 void setImmKindLiteral() const { 249 assert(isImm()); 250 Imm.Kind = ImmKindTyLiteral; 251 } 252 253 void setImmKindConst() const { 254 assert(isImm()); 255 Imm.Kind = ImmKindTyConst; 256 } 257 258 bool IsImmKindLiteral() const { 259 return isImm() && Imm.Kind == ImmKindTyLiteral; 260 } 261 262 bool isImmKindConst() const { 263 return isImm() && Imm.Kind == ImmKindTyConst; 264 } 265 266 bool isInlinableImm(MVT type) const; 267 bool isLiteralImm(MVT type) const; 268 269 bool isRegKind() const { 270 return Kind == Register; 271 } 272 273 bool isReg() const override { 274 return isRegKind() && !hasModifiers(); 275 } 276 277 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 278 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 279 } 280 281 bool isRegOrImmWithInt16InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 283 } 284 285 bool isRegOrImmWithInt32InputMods() const { 286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 287 } 288 289 bool isRegOrImmWithInt64InputMods() const { 290 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 291 } 292 293 bool isRegOrImmWithFP16InputMods() const { 294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 295 } 296 297 bool isRegOrImmWithFP32InputMods() const { 298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 299 } 300 301 bool isRegOrImmWithFP64InputMods() const { 302 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 303 } 304 305 bool isVReg() const { 306 return isRegClass(AMDGPU::VGPR_32RegClassID) || 307 isRegClass(AMDGPU::VReg_64RegClassID) || 308 isRegClass(AMDGPU::VReg_96RegClassID) || 309 isRegClass(AMDGPU::VReg_128RegClassID) || 310 isRegClass(AMDGPU::VReg_160RegClassID) || 311 isRegClass(AMDGPU::VReg_192RegClassID) || 312 isRegClass(AMDGPU::VReg_256RegClassID) || 313 isRegClass(AMDGPU::VReg_512RegClassID) || 314 isRegClass(AMDGPU::VReg_1024RegClassID); 315 } 316 317 bool isVReg32() const { 318 return isRegClass(AMDGPU::VGPR_32RegClassID); 319 } 320 321 bool isVReg32OrOff() const { 322 return isOff() || isVReg32(); 323 } 324 325 bool isNull() const { 326 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 327 } 328 329 bool isSDWAOperand(MVT type) const; 330 bool isSDWAFP16Operand() const; 331 bool isSDWAFP32Operand() const; 332 bool isSDWAInt16Operand() const; 333 bool isSDWAInt32Operand() const; 334 335 bool isImmTy(ImmTy ImmT) const { 336 return isImm() && Imm.Type == ImmT; 337 } 338 339 bool isImmModifier() const { 340 return isImm() && Imm.Type != ImmTyNone; 341 } 342 343 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 344 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 345 bool isDMask() const { return isImmTy(ImmTyDMask); } 346 bool isDim() const { return isImmTy(ImmTyDim); } 347 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 348 bool isDA() const { return isImmTy(ImmTyDA); } 349 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 350 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 351 bool isLWE() const { return isImmTy(ImmTyLWE); } 352 bool isOff() const { return isImmTy(ImmTyOff); } 353 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 354 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 355 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 356 bool isOffen() const { return isImmTy(ImmTyOffen); } 357 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 358 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 359 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 360 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 361 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 362 363 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 364 bool isGDS() const { return isImmTy(ImmTyGDS); } 365 bool isLDS() const { return isImmTy(ImmTyLDS); } 366 bool isDLC() const { return isImmTy(ImmTyDLC); } 367 bool isGLC() const { return isImmTy(ImmTyGLC); } 368 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 369 // value of the GLC operand. 370 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 371 bool isSLC() const { return isImmTy(ImmTySLC); } 372 bool isSWZ() const { return isImmTy(ImmTySWZ); } 373 bool isTFE() const { return isImmTy(ImmTyTFE); } 374 bool isD16() const { return isImmTy(ImmTyD16); } 375 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 376 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 377 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 378 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 379 bool isFI() const { return isImmTy(ImmTyDppFi); } 380 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 381 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 382 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 383 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 384 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 385 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 386 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 387 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 388 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 389 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 390 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 391 bool isHigh() const { return isImmTy(ImmTyHigh); } 392 393 bool isMod() const { 394 return isClampSI() || isOModSI(); 395 } 396 397 bool isRegOrImm() const { 398 return isReg() || isImm(); 399 } 400 401 bool isRegClass(unsigned RCID) const; 402 403 bool isInlineValue() const; 404 405 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 406 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 407 } 408 409 bool isSCSrcB16() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 411 } 412 413 bool isSCSrcV2B16() const { 414 return isSCSrcB16(); 415 } 416 417 bool isSCSrcB32() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 419 } 420 421 bool isSCSrcB64() const { 422 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 423 } 424 425 bool isBoolReg() const; 426 427 bool isSCSrcF16() const { 428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 429 } 430 431 bool isSCSrcV2F16() const { 432 return isSCSrcF16(); 433 } 434 435 bool isSCSrcF32() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 437 } 438 439 bool isSCSrcF64() const { 440 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 441 } 442 443 bool isSSrcB32() const { 444 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 445 } 446 447 bool isSSrcB16() const { 448 return isSCSrcB16() || isLiteralImm(MVT::i16); 449 } 450 451 bool isSSrcV2B16() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcB16(); 454 } 455 456 bool isSSrcB64() const { 457 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 458 // See isVSrc64(). 459 return isSCSrcB64() || isLiteralImm(MVT::i64); 460 } 461 462 bool isSSrcF32() const { 463 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isSSrcF64() const { 467 return isSCSrcB64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isSSrcF16() const { 471 return isSCSrcB16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isSSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isSSrcF16(); 477 } 478 479 bool isSSrcOrLdsB32() const { 480 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 481 isLiteralImm(MVT::i32) || isExpr(); 482 } 483 484 bool isVCSrcB32() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 486 } 487 488 bool isVCSrcB64() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 490 } 491 492 bool isVCSrcB16() const { 493 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 494 } 495 496 bool isVCSrcV2B16() const { 497 return isVCSrcB16(); 498 } 499 500 bool isVCSrcF32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 502 } 503 504 bool isVCSrcF64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 506 } 507 508 bool isVCSrcF16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 510 } 511 512 bool isVCSrcV2F16() const { 513 return isVCSrcF16(); 514 } 515 516 bool isVSrcB32() const { 517 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 518 } 519 520 bool isVSrcB64() const { 521 return isVCSrcF64() || isLiteralImm(MVT::i64); 522 } 523 524 bool isVSrcB16() const { 525 return isVCSrcB16() || isLiteralImm(MVT::i16); 526 } 527 528 bool isVSrcV2B16() const { 529 return isVSrcB16() || isLiteralImm(MVT::v2i16); 530 } 531 532 bool isVSrcF32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 534 } 535 536 bool isVSrcF64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::f64); 538 } 539 540 bool isVSrcF16() const { 541 return isVCSrcF16() || isLiteralImm(MVT::f16); 542 } 543 544 bool isVSrcV2F16() const { 545 return isVSrcF16() || isLiteralImm(MVT::v2f16); 546 } 547 548 bool isVISrcB32() const { 549 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 550 } 551 552 bool isVISrcB16() const { 553 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 554 } 555 556 bool isVISrcV2B16() const { 557 return isVISrcB16(); 558 } 559 560 bool isVISrcF32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 562 } 563 564 bool isVISrcF16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 566 } 567 568 bool isVISrcV2F16() const { 569 return isVISrcF16() || isVISrcB32(); 570 } 571 572 bool isAISrcB32() const { 573 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 574 } 575 576 bool isAISrcB16() const { 577 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 578 } 579 580 bool isAISrcV2B16() const { 581 return isAISrcB16(); 582 } 583 584 bool isAISrcF32() const { 585 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 586 } 587 588 bool isAISrcF16() const { 589 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 590 } 591 592 bool isAISrcV2F16() const { 593 return isAISrcF16() || isAISrcB32(); 594 } 595 596 bool isAISrc_128B32() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 598 } 599 600 bool isAISrc_128B16() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 602 } 603 604 bool isAISrc_128V2B16() const { 605 return isAISrc_128B16(); 606 } 607 608 bool isAISrc_128F32() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 610 } 611 612 bool isAISrc_128F16() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 614 } 615 616 bool isAISrc_128V2F16() const { 617 return isAISrc_128F16() || isAISrc_128B32(); 618 } 619 620 bool isAISrc_512B32() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 622 } 623 624 bool isAISrc_512B16() const { 625 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 626 } 627 628 bool isAISrc_512V2B16() const { 629 return isAISrc_512B16(); 630 } 631 632 bool isAISrc_512F32() const { 633 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 634 } 635 636 bool isAISrc_512F16() const { 637 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 638 } 639 640 bool isAISrc_512V2F16() const { 641 return isAISrc_512F16() || isAISrc_512B32(); 642 } 643 644 bool isAISrc_1024B32() const { 645 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 646 } 647 648 bool isAISrc_1024B16() const { 649 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 650 } 651 652 bool isAISrc_1024V2B16() const { 653 return isAISrc_1024B16(); 654 } 655 656 bool isAISrc_1024F32() const { 657 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 658 } 659 660 bool isAISrc_1024F16() const { 661 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 662 } 663 664 bool isAISrc_1024V2F16() const { 665 return isAISrc_1024F16() || isAISrc_1024B32(); 666 } 667 668 bool isKImmFP32() const { 669 return isLiteralImm(MVT::f32); 670 } 671 672 bool isKImmFP16() const { 673 return isLiteralImm(MVT::f16); 674 } 675 676 bool isMem() const override { 677 return false; 678 } 679 680 bool isExpr() const { 681 return Kind == Expression; 682 } 683 684 bool isSoppBrTarget() const { 685 return isExpr() || isImm(); 686 } 687 688 bool isSWaitCnt() const; 689 bool isHwreg() const; 690 bool isSendMsg() const; 691 bool isSwizzle() const; 692 bool isSMRDOffset8() const; 693 bool isSMEMOffset() const; 694 bool isSMRDLiteralOffset() const; 695 bool isDPP8() const; 696 bool isDPPCtrl() const; 697 bool isBLGP() const; 698 bool isCBSZ() const; 699 bool isABID() const; 700 bool isGPRIdxMode() const; 701 bool isS16Imm() const; 702 bool isU16Imm() const; 703 bool isEndpgm() const; 704 705 StringRef getExpressionAsToken() const { 706 assert(isExpr()); 707 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 708 return S->getSymbol().getName(); 709 } 710 711 StringRef getToken() const { 712 assert(isToken()); 713 714 if (Kind == Expression) 715 return getExpressionAsToken(); 716 717 return StringRef(Tok.Data, Tok.Length); 718 } 719 720 int64_t getImm() const { 721 assert(isImm()); 722 return Imm.Val; 723 } 724 725 void setImm(int64_t Val) { 726 assert(isImm()); 727 Imm.Val = Val; 728 } 729 730 ImmTy getImmTy() const { 731 assert(isImm()); 732 return Imm.Type; 733 } 734 735 unsigned getReg() const override { 736 assert(isRegKind()); 737 return Reg.RegNo; 738 } 739 740 SMLoc getStartLoc() const override { 741 return StartLoc; 742 } 743 744 SMLoc getEndLoc() const override { 745 return EndLoc; 746 } 747 748 SMRange getLocRange() const { 749 return SMRange(StartLoc, EndLoc); 750 } 751 752 Modifiers getModifiers() const { 753 assert(isRegKind() || isImmTy(ImmTyNone)); 754 return isRegKind() ? Reg.Mods : Imm.Mods; 755 } 756 757 void setModifiers(Modifiers Mods) { 758 assert(isRegKind() || isImmTy(ImmTyNone)); 759 if (isRegKind()) 760 Reg.Mods = Mods; 761 else 762 Imm.Mods = Mods; 763 } 764 765 bool hasModifiers() const { 766 return getModifiers().hasModifiers(); 767 } 768 769 bool hasFPModifiers() const { 770 return getModifiers().hasFPModifiers(); 771 } 772 773 bool hasIntModifiers() const { 774 return getModifiers().hasIntModifiers(); 775 } 776 777 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 778 779 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 780 781 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 782 783 template <unsigned Bitwidth> 784 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 785 786 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 787 addKImmFPOperands<16>(Inst, N); 788 } 789 790 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 791 addKImmFPOperands<32>(Inst, N); 792 } 793 794 void addRegOperands(MCInst &Inst, unsigned N) const; 795 796 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 797 addRegOperands(Inst, N); 798 } 799 800 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 801 if (isRegKind()) 802 addRegOperands(Inst, N); 803 else if (isExpr()) 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 else 806 addImmOperands(Inst, N); 807 } 808 809 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 810 Modifiers Mods = getModifiers(); 811 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 812 if (isRegKind()) { 813 addRegOperands(Inst, N); 814 } else { 815 addImmOperands(Inst, N, false); 816 } 817 } 818 819 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 820 assert(!hasIntModifiers()); 821 addRegOrImmWithInputModsOperands(Inst, N); 822 } 823 824 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 825 assert(!hasFPModifiers()); 826 addRegOrImmWithInputModsOperands(Inst, N); 827 } 828 829 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 830 Modifiers Mods = getModifiers(); 831 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 832 assert(isRegKind()); 833 addRegOperands(Inst, N); 834 } 835 836 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 837 assert(!hasIntModifiers()); 838 addRegWithInputModsOperands(Inst, N); 839 } 840 841 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 842 assert(!hasFPModifiers()); 843 addRegWithInputModsOperands(Inst, N); 844 } 845 846 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 847 if (isImm()) 848 addImmOperands(Inst, N); 849 else { 850 assert(isExpr()); 851 Inst.addOperand(MCOperand::createExpr(Expr)); 852 } 853 } 854 855 static void printImmTy(raw_ostream& OS, ImmTy Type) { 856 switch (Type) { 857 case ImmTyNone: OS << "None"; break; 858 case ImmTyGDS: OS << "GDS"; break; 859 case ImmTyLDS: OS << "LDS"; break; 860 case ImmTyOffen: OS << "Offen"; break; 861 case ImmTyIdxen: OS << "Idxen"; break; 862 case ImmTyAddr64: OS << "Addr64"; break; 863 case ImmTyOffset: OS << "Offset"; break; 864 case ImmTyInstOffset: OS << "InstOffset"; break; 865 case ImmTyOffset0: OS << "Offset0"; break; 866 case ImmTyOffset1: OS << "Offset1"; break; 867 case ImmTyDLC: OS << "DLC"; break; 868 case ImmTyGLC: OS << "GLC"; break; 869 case ImmTySLC: OS << "SLC"; break; 870 case ImmTySWZ: OS << "SWZ"; break; 871 case ImmTyTFE: OS << "TFE"; break; 872 case ImmTyD16: OS << "D16"; break; 873 case ImmTyFORMAT: OS << "FORMAT"; break; 874 case ImmTyClampSI: OS << "ClampSI"; break; 875 case ImmTyOModSI: OS << "OModSI"; break; 876 case ImmTyDPP8: OS << "DPP8"; break; 877 case ImmTyDppCtrl: OS << "DppCtrl"; break; 878 case ImmTyDppRowMask: OS << "DppRowMask"; break; 879 case ImmTyDppBankMask: OS << "DppBankMask"; break; 880 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 881 case ImmTyDppFi: OS << "FI"; break; 882 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 883 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 884 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 885 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 886 case ImmTyDMask: OS << "DMask"; break; 887 case ImmTyDim: OS << "Dim"; break; 888 case ImmTyUNorm: OS << "UNorm"; break; 889 case ImmTyDA: OS << "DA"; break; 890 case ImmTyR128A16: OS << "R128A16"; break; 891 case ImmTyA16: OS << "A16"; break; 892 case ImmTyLWE: OS << "LWE"; break; 893 case ImmTyOff: OS << "Off"; break; 894 case ImmTyExpTgt: OS << "ExpTgt"; break; 895 case ImmTyExpCompr: OS << "ExpCompr"; break; 896 case ImmTyExpVM: OS << "ExpVM"; break; 897 case ImmTyHwreg: OS << "Hwreg"; break; 898 case ImmTySendMsg: OS << "SendMsg"; break; 899 case ImmTyInterpSlot: OS << "InterpSlot"; break; 900 case ImmTyInterpAttr: OS << "InterpAttr"; break; 901 case ImmTyAttrChan: OS << "AttrChan"; break; 902 case ImmTyOpSel: OS << "OpSel"; break; 903 case ImmTyOpSelHi: OS << "OpSelHi"; break; 904 case ImmTyNegLo: OS << "NegLo"; break; 905 case ImmTyNegHi: OS << "NegHi"; break; 906 case ImmTySwizzle: OS << "Swizzle"; break; 907 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 908 case ImmTyHigh: OS << "High"; break; 909 case ImmTyBLGP: OS << "BLGP"; break; 910 case ImmTyCBSZ: OS << "CBSZ"; break; 911 case ImmTyABID: OS << "ABID"; break; 912 case ImmTyEndpgm: OS << "Endpgm"; break; 913 } 914 } 915 916 void print(raw_ostream &OS) const override { 917 switch (Kind) { 918 case Register: 919 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 920 break; 921 case Immediate: 922 OS << '<' << getImm(); 923 if (getImmTy() != ImmTyNone) { 924 OS << " type: "; printImmTy(OS, getImmTy()); 925 } 926 OS << " mods: " << Imm.Mods << '>'; 927 break; 928 case Token: 929 OS << '\'' << getToken() << '\''; 930 break; 931 case Expression: 932 OS << "<expr " << *Expr << '>'; 933 break; 934 } 935 } 936 937 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 938 int64_t Val, SMLoc Loc, 939 ImmTy Type = ImmTyNone, 940 bool IsFPImm = false) { 941 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 942 Op->Imm.Val = Val; 943 Op->Imm.IsFPImm = IsFPImm; 944 Op->Imm.Kind = ImmKindTyNone; 945 Op->Imm.Type = Type; 946 Op->Imm.Mods = Modifiers(); 947 Op->StartLoc = Loc; 948 Op->EndLoc = Loc; 949 return Op; 950 } 951 952 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 953 StringRef Str, SMLoc Loc, 954 bool HasExplicitEncodingSize = true) { 955 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 956 Res->Tok.Data = Str.data(); 957 Res->Tok.Length = Str.size(); 958 Res->StartLoc = Loc; 959 Res->EndLoc = Loc; 960 return Res; 961 } 962 963 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 964 unsigned RegNo, SMLoc S, 965 SMLoc E) { 966 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 967 Op->Reg.RegNo = RegNo; 968 Op->Reg.Mods = Modifiers(); 969 Op->StartLoc = S; 970 Op->EndLoc = E; 971 return Op; 972 } 973 974 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 975 const class MCExpr *Expr, SMLoc S) { 976 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 977 Op->Expr = Expr; 978 Op->StartLoc = S; 979 Op->EndLoc = S; 980 return Op; 981 } 982 }; 983 984 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 985 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 986 return OS; 987 } 988 989 //===----------------------------------------------------------------------===// 990 // AsmParser 991 //===----------------------------------------------------------------------===// 992 993 // Holds info related to the current kernel, e.g. count of SGPRs used. 994 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 995 // .amdgpu_hsa_kernel or at EOF. 996 class KernelScopeInfo { 997 int SgprIndexUnusedMin = -1; 998 int VgprIndexUnusedMin = -1; 999 MCContext *Ctx = nullptr; 1000 1001 void usesSgprAt(int i) { 1002 if (i >= SgprIndexUnusedMin) { 1003 SgprIndexUnusedMin = ++i; 1004 if (Ctx) { 1005 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1006 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1007 } 1008 } 1009 } 1010 1011 void usesVgprAt(int i) { 1012 if (i >= VgprIndexUnusedMin) { 1013 VgprIndexUnusedMin = ++i; 1014 if (Ctx) { 1015 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1016 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1017 } 1018 } 1019 } 1020 1021 public: 1022 KernelScopeInfo() = default; 1023 1024 void initialize(MCContext &Context) { 1025 Ctx = &Context; 1026 usesSgprAt(SgprIndexUnusedMin = -1); 1027 usesVgprAt(VgprIndexUnusedMin = -1); 1028 } 1029 1030 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1031 switch (RegKind) { 1032 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1033 case IS_AGPR: // fall through 1034 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1035 default: break; 1036 } 1037 } 1038 }; 1039 1040 class AMDGPUAsmParser : public MCTargetAsmParser { 1041 MCAsmParser &Parser; 1042 1043 // Number of extra operands parsed after the first optional operand. 1044 // This may be necessary to skip hardcoded mandatory operands. 1045 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1046 1047 unsigned ForcedEncodingSize = 0; 1048 bool ForcedDPP = false; 1049 bool ForcedSDWA = false; 1050 KernelScopeInfo KernelScope; 1051 1052 /// @name Auto-generated Match Functions 1053 /// { 1054 1055 #define GET_ASSEMBLER_HEADER 1056 #include "AMDGPUGenAsmMatcher.inc" 1057 1058 /// } 1059 1060 private: 1061 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1062 bool OutOfRangeError(SMRange Range); 1063 /// Calculate VGPR/SGPR blocks required for given target, reserved 1064 /// registers, and user-specified NextFreeXGPR values. 1065 /// 1066 /// \param Features [in] Target features, used for bug corrections. 1067 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1068 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1069 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1070 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1071 /// descriptor field, if valid. 1072 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1073 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1074 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1075 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1076 /// \param VGPRBlocks [out] Result VGPR block count. 1077 /// \param SGPRBlocks [out] Result SGPR block count. 1078 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1079 bool FlatScrUsed, bool XNACKUsed, 1080 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1081 SMRange VGPRRange, unsigned NextFreeSGPR, 1082 SMRange SGPRRange, unsigned &VGPRBlocks, 1083 unsigned &SGPRBlocks); 1084 bool ParseDirectiveAMDGCNTarget(); 1085 bool ParseDirectiveAMDHSAKernel(); 1086 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1087 bool ParseDirectiveHSACodeObjectVersion(); 1088 bool ParseDirectiveHSACodeObjectISA(); 1089 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1090 bool ParseDirectiveAMDKernelCodeT(); 1091 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1092 bool ParseDirectiveAMDGPUHsaKernel(); 1093 1094 bool ParseDirectiveISAVersion(); 1095 bool ParseDirectiveHSAMetadata(); 1096 bool ParseDirectivePALMetadataBegin(); 1097 bool ParseDirectivePALMetadata(); 1098 bool ParseDirectiveAMDGPULDS(); 1099 1100 /// Common code to parse out a block of text (typically YAML) between start and 1101 /// end directives. 1102 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1103 const char *AssemblerDirectiveEnd, 1104 std::string &CollectString); 1105 1106 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1107 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1108 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1109 unsigned &RegNum, unsigned &RegWidth, 1110 bool RestoreOnFailure = false); 1111 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1112 unsigned &RegNum, unsigned &RegWidth, 1113 SmallVectorImpl<AsmToken> &Tokens); 1114 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1115 unsigned &RegWidth, 1116 SmallVectorImpl<AsmToken> &Tokens); 1117 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1118 unsigned &RegWidth, 1119 SmallVectorImpl<AsmToken> &Tokens); 1120 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1121 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1122 bool ParseRegRange(unsigned& Num, unsigned& Width); 1123 unsigned getRegularReg(RegisterKind RegKind, 1124 unsigned RegNum, 1125 unsigned RegWidth, 1126 SMLoc Loc); 1127 1128 bool isRegister(); 1129 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1130 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1131 void initializeGprCountSymbol(RegisterKind RegKind); 1132 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1133 unsigned RegWidth); 1134 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1135 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1136 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1137 bool IsGdsHardcoded); 1138 1139 public: 1140 enum AMDGPUMatchResultTy { 1141 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1142 }; 1143 enum OperandMode { 1144 OperandMode_Default, 1145 OperandMode_NSA, 1146 }; 1147 1148 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1149 1150 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1151 const MCInstrInfo &MII, 1152 const MCTargetOptions &Options) 1153 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1154 MCAsmParserExtension::Initialize(Parser); 1155 1156 if (getFeatureBits().none()) { 1157 // Set default features. 1158 copySTI().ToggleFeature("southern-islands"); 1159 } 1160 1161 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1162 1163 { 1164 // TODO: make those pre-defined variables read-only. 1165 // Currently there is none suitable machinery in the core llvm-mc for this. 1166 // MCSymbol::isRedefinable is intended for another purpose, and 1167 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1168 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1169 MCContext &Ctx = getContext(); 1170 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1171 MCSymbol *Sym = 1172 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1173 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1174 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1175 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1176 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1177 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1178 } else { 1179 MCSymbol *Sym = 1180 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1181 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1182 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1183 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1184 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1185 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1186 } 1187 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1188 initializeGprCountSymbol(IS_VGPR); 1189 initializeGprCountSymbol(IS_SGPR); 1190 } else 1191 KernelScope.initialize(getContext()); 1192 } 1193 } 1194 1195 bool hasXNACK() const { 1196 return AMDGPU::hasXNACK(getSTI()); 1197 } 1198 1199 bool hasMIMG_R128() const { 1200 return AMDGPU::hasMIMG_R128(getSTI()); 1201 } 1202 1203 bool hasPackedD16() const { 1204 return AMDGPU::hasPackedD16(getSTI()); 1205 } 1206 1207 bool hasGFX10A16() const { 1208 return AMDGPU::hasGFX10A16(getSTI()); 1209 } 1210 1211 bool isSI() const { 1212 return AMDGPU::isSI(getSTI()); 1213 } 1214 1215 bool isCI() const { 1216 return AMDGPU::isCI(getSTI()); 1217 } 1218 1219 bool isVI() const { 1220 return AMDGPU::isVI(getSTI()); 1221 } 1222 1223 bool isGFX9() const { 1224 return AMDGPU::isGFX9(getSTI()); 1225 } 1226 1227 bool isGFX9Plus() const { 1228 return AMDGPU::isGFX9Plus(getSTI()); 1229 } 1230 1231 bool isGFX10() const { 1232 return AMDGPU::isGFX10(getSTI()); 1233 } 1234 1235 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1236 1237 bool isGFX10_BEncoding() const { 1238 return AMDGPU::isGFX10_BEncoding(getSTI()); 1239 } 1240 1241 bool hasInv2PiInlineImm() const { 1242 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1243 } 1244 1245 bool hasFlatOffsets() const { 1246 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1247 } 1248 1249 bool hasSGPR102_SGPR103() const { 1250 return !isVI() && !isGFX9(); 1251 } 1252 1253 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1254 1255 bool hasIntClamp() const { 1256 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1257 } 1258 1259 AMDGPUTargetStreamer &getTargetStreamer() { 1260 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1261 return static_cast<AMDGPUTargetStreamer &>(TS); 1262 } 1263 1264 const MCRegisterInfo *getMRI() const { 1265 // We need this const_cast because for some reason getContext() is not const 1266 // in MCAsmParser. 1267 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1268 } 1269 1270 const MCInstrInfo *getMII() const { 1271 return &MII; 1272 } 1273 1274 const FeatureBitset &getFeatureBits() const { 1275 return getSTI().getFeatureBits(); 1276 } 1277 1278 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1279 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1280 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1281 1282 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1283 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1284 bool isForcedDPP() const { return ForcedDPP; } 1285 bool isForcedSDWA() const { return ForcedSDWA; } 1286 ArrayRef<unsigned> getMatchedVariants() const; 1287 StringRef getMatchedVariantName() const; 1288 1289 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1290 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1291 bool RestoreOnFailure); 1292 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1293 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1294 SMLoc &EndLoc) override; 1295 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1296 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1297 unsigned Kind) override; 1298 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1299 OperandVector &Operands, MCStreamer &Out, 1300 uint64_t &ErrorInfo, 1301 bool MatchingInlineAsm) override; 1302 bool ParseDirective(AsmToken DirectiveID) override; 1303 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1304 OperandMode Mode = OperandMode_Default); 1305 StringRef parseMnemonicSuffix(StringRef Name); 1306 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1307 SMLoc NameLoc, OperandVector &Operands) override; 1308 //bool ProcessInstruction(MCInst &Inst); 1309 1310 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1311 1312 OperandMatchResultTy 1313 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1314 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1315 bool (*ConvertResult)(int64_t &) = nullptr); 1316 1317 OperandMatchResultTy 1318 parseOperandArrayWithPrefix(const char *Prefix, 1319 OperandVector &Operands, 1320 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1321 bool (*ConvertResult)(int64_t&) = nullptr); 1322 1323 OperandMatchResultTy 1324 parseNamedBit(const char *Name, OperandVector &Operands, 1325 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1326 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1327 StringRef &Value); 1328 1329 bool isModifier(); 1330 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1331 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1332 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1333 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1334 bool parseSP3NegModifier(); 1335 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1336 OperandMatchResultTy parseReg(OperandVector &Operands); 1337 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1338 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1339 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1340 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1341 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1342 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1343 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1344 OperandMatchResultTy parseUfmt(int64_t &Format); 1345 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1346 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1347 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1348 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1349 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1350 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1351 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1352 1353 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1354 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1355 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1356 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1357 1358 bool parseCnt(int64_t &IntVal); 1359 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1360 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1361 1362 private: 1363 struct OperandInfoTy { 1364 SMLoc Loc; 1365 int64_t Id; 1366 bool IsSymbolic = false; 1367 bool IsDefined = false; 1368 1369 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1370 }; 1371 1372 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1373 bool validateSendMsg(const OperandInfoTy &Msg, 1374 const OperandInfoTy &Op, 1375 const OperandInfoTy &Stream); 1376 1377 bool parseHwregBody(OperandInfoTy &HwReg, 1378 OperandInfoTy &Offset, 1379 OperandInfoTy &Width); 1380 bool validateHwreg(const OperandInfoTy &HwReg, 1381 const OperandInfoTy &Offset, 1382 const OperandInfoTy &Width); 1383 1384 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1385 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1386 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1387 1388 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1389 const OperandVector &Operands) const; 1390 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1391 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1392 SMLoc getLitLoc(const OperandVector &Operands) const; 1393 SMLoc getConstLoc(const OperandVector &Operands) const; 1394 1395 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1396 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1397 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1398 bool validateSOPLiteral(const MCInst &Inst) const; 1399 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1400 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1401 bool validateIntClampSupported(const MCInst &Inst); 1402 bool validateMIMGAtomicDMask(const MCInst &Inst); 1403 bool validateMIMGGatherDMask(const MCInst &Inst); 1404 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1405 bool validateMIMGDataSize(const MCInst &Inst); 1406 bool validateMIMGAddrSize(const MCInst &Inst); 1407 bool validateMIMGD16(const MCInst &Inst); 1408 bool validateMIMGDim(const MCInst &Inst); 1409 bool validateLdsDirect(const MCInst &Inst); 1410 bool validateOpSel(const MCInst &Inst); 1411 bool validateVccOperand(unsigned Reg) const; 1412 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1413 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1414 bool validateDivScale(const MCInst &Inst); 1415 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1416 const SMLoc &IDLoc); 1417 unsigned getConstantBusLimit(unsigned Opcode) const; 1418 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1419 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1420 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1421 1422 bool isSupportedMnemo(StringRef Mnemo, 1423 const FeatureBitset &FBS); 1424 bool isSupportedMnemo(StringRef Mnemo, 1425 const FeatureBitset &FBS, 1426 ArrayRef<unsigned> Variants); 1427 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1428 1429 bool isId(const StringRef Id) const; 1430 bool isId(const AsmToken &Token, const StringRef Id) const; 1431 bool isToken(const AsmToken::TokenKind Kind) const; 1432 bool trySkipId(const StringRef Id); 1433 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1434 bool trySkipToken(const AsmToken::TokenKind Kind); 1435 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1436 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1437 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1438 1439 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1440 AsmToken::TokenKind getTokenKind() const; 1441 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1442 bool parseExpr(OperandVector &Operands); 1443 StringRef getTokenStr() const; 1444 AsmToken peekToken(); 1445 AsmToken getToken() const; 1446 SMLoc getLoc() const; 1447 void lex(); 1448 1449 public: 1450 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1451 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1452 1453 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1454 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1455 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1456 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1457 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1458 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1459 1460 bool parseSwizzleOperand(int64_t &Op, 1461 const unsigned MinVal, 1462 const unsigned MaxVal, 1463 const StringRef ErrMsg, 1464 SMLoc &Loc); 1465 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1466 const unsigned MinVal, 1467 const unsigned MaxVal, 1468 const StringRef ErrMsg); 1469 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1470 bool parseSwizzleOffset(int64_t &Imm); 1471 bool parseSwizzleMacro(int64_t &Imm); 1472 bool parseSwizzleQuadPerm(int64_t &Imm); 1473 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1474 bool parseSwizzleBroadcast(int64_t &Imm); 1475 bool parseSwizzleSwap(int64_t &Imm); 1476 bool parseSwizzleReverse(int64_t &Imm); 1477 1478 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1479 int64_t parseGPRIdxMacro(); 1480 1481 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1482 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1483 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1484 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1485 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1486 1487 AMDGPUOperand::Ptr defaultDLC() const; 1488 AMDGPUOperand::Ptr defaultGLC() const; 1489 AMDGPUOperand::Ptr defaultGLC_1() const; 1490 AMDGPUOperand::Ptr defaultSLC() const; 1491 1492 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1493 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1494 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1495 AMDGPUOperand::Ptr defaultFlatOffset() const; 1496 1497 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1498 1499 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1500 OptionalImmIndexMap &OptionalIdx); 1501 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1502 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1503 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1504 1505 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1506 1507 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1508 bool IsAtomic = false); 1509 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1510 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1511 1512 OperandMatchResultTy parseDim(OperandVector &Operands); 1513 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1514 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1515 AMDGPUOperand::Ptr defaultRowMask() const; 1516 AMDGPUOperand::Ptr defaultBankMask() const; 1517 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1518 AMDGPUOperand::Ptr defaultFI() const; 1519 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1520 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1521 1522 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1523 AMDGPUOperand::ImmTy Type); 1524 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1525 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1526 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1527 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1528 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1529 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1530 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1531 uint64_t BasicInstType, 1532 bool SkipDstVcc = false, 1533 bool SkipSrcVcc = false); 1534 1535 AMDGPUOperand::Ptr defaultBLGP() const; 1536 AMDGPUOperand::Ptr defaultCBSZ() const; 1537 AMDGPUOperand::Ptr defaultABID() const; 1538 1539 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1540 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1541 }; 1542 1543 struct OptionalOperand { 1544 const char *Name; 1545 AMDGPUOperand::ImmTy Type; 1546 bool IsBit; 1547 bool (*ConvertResult)(int64_t&); 1548 }; 1549 1550 } // end anonymous namespace 1551 1552 // May be called with integer type with equivalent bitwidth. 1553 static const fltSemantics *getFltSemantics(unsigned Size) { 1554 switch (Size) { 1555 case 4: 1556 return &APFloat::IEEEsingle(); 1557 case 8: 1558 return &APFloat::IEEEdouble(); 1559 case 2: 1560 return &APFloat::IEEEhalf(); 1561 default: 1562 llvm_unreachable("unsupported fp type"); 1563 } 1564 } 1565 1566 static const fltSemantics *getFltSemantics(MVT VT) { 1567 return getFltSemantics(VT.getSizeInBits() / 8); 1568 } 1569 1570 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1571 switch (OperandType) { 1572 case AMDGPU::OPERAND_REG_IMM_INT32: 1573 case AMDGPU::OPERAND_REG_IMM_FP32: 1574 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1575 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1576 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1577 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1578 return &APFloat::IEEEsingle(); 1579 case AMDGPU::OPERAND_REG_IMM_INT64: 1580 case AMDGPU::OPERAND_REG_IMM_FP64: 1581 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1582 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1583 return &APFloat::IEEEdouble(); 1584 case AMDGPU::OPERAND_REG_IMM_INT16: 1585 case AMDGPU::OPERAND_REG_IMM_FP16: 1586 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1587 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1588 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1589 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1590 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1591 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1592 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1593 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1594 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1595 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1596 return &APFloat::IEEEhalf(); 1597 default: 1598 llvm_unreachable("unsupported fp type"); 1599 } 1600 } 1601 1602 //===----------------------------------------------------------------------===// 1603 // Operand 1604 //===----------------------------------------------------------------------===// 1605 1606 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1607 bool Lost; 1608 1609 // Convert literal to single precision 1610 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1611 APFloat::rmNearestTiesToEven, 1612 &Lost); 1613 // We allow precision lost but not overflow or underflow 1614 if (Status != APFloat::opOK && 1615 Lost && 1616 ((Status & APFloat::opOverflow) != 0 || 1617 (Status & APFloat::opUnderflow) != 0)) { 1618 return false; 1619 } 1620 1621 return true; 1622 } 1623 1624 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1625 return isUIntN(Size, Val) || isIntN(Size, Val); 1626 } 1627 1628 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1629 if (VT.getScalarType() == MVT::i16) { 1630 // FP immediate values are broken. 1631 return isInlinableIntLiteral(Val); 1632 } 1633 1634 // f16/v2f16 operands work correctly for all values. 1635 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1636 } 1637 1638 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1639 1640 // This is a hack to enable named inline values like 1641 // shared_base with both 32-bit and 64-bit operands. 1642 // Note that these values are defined as 1643 // 32-bit operands only. 1644 if (isInlineValue()) { 1645 return true; 1646 } 1647 1648 if (!isImmTy(ImmTyNone)) { 1649 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1650 return false; 1651 } 1652 // TODO: We should avoid using host float here. It would be better to 1653 // check the float bit values which is what a few other places do. 1654 // We've had bot failures before due to weird NaN support on mips hosts. 1655 1656 APInt Literal(64, Imm.Val); 1657 1658 if (Imm.IsFPImm) { // We got fp literal token 1659 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1660 return AMDGPU::isInlinableLiteral64(Imm.Val, 1661 AsmParser->hasInv2PiInlineImm()); 1662 } 1663 1664 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1665 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1666 return false; 1667 1668 if (type.getScalarSizeInBits() == 16) { 1669 return isInlineableLiteralOp16( 1670 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1671 type, AsmParser->hasInv2PiInlineImm()); 1672 } 1673 1674 // Check if single precision literal is inlinable 1675 return AMDGPU::isInlinableLiteral32( 1676 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1677 AsmParser->hasInv2PiInlineImm()); 1678 } 1679 1680 // We got int literal token. 1681 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1682 return AMDGPU::isInlinableLiteral64(Imm.Val, 1683 AsmParser->hasInv2PiInlineImm()); 1684 } 1685 1686 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1687 return false; 1688 } 1689 1690 if (type.getScalarSizeInBits() == 16) { 1691 return isInlineableLiteralOp16( 1692 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1693 type, AsmParser->hasInv2PiInlineImm()); 1694 } 1695 1696 return AMDGPU::isInlinableLiteral32( 1697 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1698 AsmParser->hasInv2PiInlineImm()); 1699 } 1700 1701 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1702 // Check that this immediate can be added as literal 1703 if (!isImmTy(ImmTyNone)) { 1704 return false; 1705 } 1706 1707 if (!Imm.IsFPImm) { 1708 // We got int literal token. 1709 1710 if (type == MVT::f64 && hasFPModifiers()) { 1711 // Cannot apply fp modifiers to int literals preserving the same semantics 1712 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1713 // disable these cases. 1714 return false; 1715 } 1716 1717 unsigned Size = type.getSizeInBits(); 1718 if (Size == 64) 1719 Size = 32; 1720 1721 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1722 // types. 1723 return isSafeTruncation(Imm.Val, Size); 1724 } 1725 1726 // We got fp literal token 1727 if (type == MVT::f64) { // Expected 64-bit fp operand 1728 // We would set low 64-bits of literal to zeroes but we accept this literals 1729 return true; 1730 } 1731 1732 if (type == MVT::i64) { // Expected 64-bit int operand 1733 // We don't allow fp literals in 64-bit integer instructions. It is 1734 // unclear how we should encode them. 1735 return false; 1736 } 1737 1738 // We allow fp literals with f16x2 operands assuming that the specified 1739 // literal goes into the lower half and the upper half is zero. We also 1740 // require that the literal may be losslesly converted to f16. 1741 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1742 (type == MVT::v2i16)? MVT::i16 : type; 1743 1744 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1745 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1746 } 1747 1748 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1749 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1750 } 1751 1752 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1753 if (AsmParser->isVI()) 1754 return isVReg32(); 1755 else if (AsmParser->isGFX9Plus()) 1756 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1757 else 1758 return false; 1759 } 1760 1761 bool AMDGPUOperand::isSDWAFP16Operand() const { 1762 return isSDWAOperand(MVT::f16); 1763 } 1764 1765 bool AMDGPUOperand::isSDWAFP32Operand() const { 1766 return isSDWAOperand(MVT::f32); 1767 } 1768 1769 bool AMDGPUOperand::isSDWAInt16Operand() const { 1770 return isSDWAOperand(MVT::i16); 1771 } 1772 1773 bool AMDGPUOperand::isSDWAInt32Operand() const { 1774 return isSDWAOperand(MVT::i32); 1775 } 1776 1777 bool AMDGPUOperand::isBoolReg() const { 1778 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1779 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1780 } 1781 1782 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1783 { 1784 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1785 assert(Size == 2 || Size == 4 || Size == 8); 1786 1787 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1788 1789 if (Imm.Mods.Abs) { 1790 Val &= ~FpSignMask; 1791 } 1792 if (Imm.Mods.Neg) { 1793 Val ^= FpSignMask; 1794 } 1795 1796 return Val; 1797 } 1798 1799 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1800 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1801 Inst.getNumOperands())) { 1802 addLiteralImmOperand(Inst, Imm.Val, 1803 ApplyModifiers & 1804 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1805 } else { 1806 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1807 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1808 setImmKindNone(); 1809 } 1810 } 1811 1812 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1813 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1814 auto OpNum = Inst.getNumOperands(); 1815 // Check that this operand accepts literals 1816 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1817 1818 if (ApplyModifiers) { 1819 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1820 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1821 Val = applyInputFPModifiers(Val, Size); 1822 } 1823 1824 APInt Literal(64, Val); 1825 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1826 1827 if (Imm.IsFPImm) { // We got fp literal token 1828 switch (OpTy) { 1829 case AMDGPU::OPERAND_REG_IMM_INT64: 1830 case AMDGPU::OPERAND_REG_IMM_FP64: 1831 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1832 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1833 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1834 AsmParser->hasInv2PiInlineImm())) { 1835 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1836 setImmKindConst(); 1837 return; 1838 } 1839 1840 // Non-inlineable 1841 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1842 // For fp operands we check if low 32 bits are zeros 1843 if (Literal.getLoBits(32) != 0) { 1844 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1845 "Can't encode literal as exact 64-bit floating-point operand. " 1846 "Low 32-bits will be set to zero"); 1847 } 1848 1849 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1850 setImmKindLiteral(); 1851 return; 1852 } 1853 1854 // We don't allow fp literals in 64-bit integer instructions. It is 1855 // unclear how we should encode them. This case should be checked earlier 1856 // in predicate methods (isLiteralImm()) 1857 llvm_unreachable("fp literal in 64-bit integer instruction."); 1858 1859 case AMDGPU::OPERAND_REG_IMM_INT32: 1860 case AMDGPU::OPERAND_REG_IMM_FP32: 1861 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1862 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1863 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1865 case AMDGPU::OPERAND_REG_IMM_INT16: 1866 case AMDGPU::OPERAND_REG_IMM_FP16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1868 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1869 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1870 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1871 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1872 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1873 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1874 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1875 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1876 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1877 bool lost; 1878 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1879 // Convert literal to single precision 1880 FPLiteral.convert(*getOpFltSemantics(OpTy), 1881 APFloat::rmNearestTiesToEven, &lost); 1882 // We allow precision lost but not overflow or underflow. This should be 1883 // checked earlier in isLiteralImm() 1884 1885 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1886 Inst.addOperand(MCOperand::createImm(ImmVal)); 1887 setImmKindLiteral(); 1888 return; 1889 } 1890 default: 1891 llvm_unreachable("invalid operand size"); 1892 } 1893 1894 return; 1895 } 1896 1897 // We got int literal token. 1898 // Only sign extend inline immediates. 1899 switch (OpTy) { 1900 case AMDGPU::OPERAND_REG_IMM_INT32: 1901 case AMDGPU::OPERAND_REG_IMM_FP32: 1902 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1903 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1904 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1905 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1906 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1907 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1908 if (isSafeTruncation(Val, 32) && 1909 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1910 AsmParser->hasInv2PiInlineImm())) { 1911 Inst.addOperand(MCOperand::createImm(Val)); 1912 setImmKindConst(); 1913 return; 1914 } 1915 1916 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1917 setImmKindLiteral(); 1918 return; 1919 1920 case AMDGPU::OPERAND_REG_IMM_INT64: 1921 case AMDGPU::OPERAND_REG_IMM_FP64: 1922 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1923 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1924 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1925 Inst.addOperand(MCOperand::createImm(Val)); 1926 setImmKindConst(); 1927 return; 1928 } 1929 1930 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1931 setImmKindLiteral(); 1932 return; 1933 1934 case AMDGPU::OPERAND_REG_IMM_INT16: 1935 case AMDGPU::OPERAND_REG_IMM_FP16: 1936 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1937 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1938 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1939 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1940 if (isSafeTruncation(Val, 16) && 1941 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1942 AsmParser->hasInv2PiInlineImm())) { 1943 Inst.addOperand(MCOperand::createImm(Val)); 1944 setImmKindConst(); 1945 return; 1946 } 1947 1948 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1949 setImmKindLiteral(); 1950 return; 1951 1952 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1953 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1954 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1955 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1956 assert(isSafeTruncation(Val, 16)); 1957 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1958 AsmParser->hasInv2PiInlineImm())); 1959 1960 Inst.addOperand(MCOperand::createImm(Val)); 1961 return; 1962 } 1963 default: 1964 llvm_unreachable("invalid operand size"); 1965 } 1966 } 1967 1968 template <unsigned Bitwidth> 1969 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1970 APInt Literal(64, Imm.Val); 1971 setImmKindNone(); 1972 1973 if (!Imm.IsFPImm) { 1974 // We got int literal token. 1975 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1976 return; 1977 } 1978 1979 bool Lost; 1980 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1981 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1982 APFloat::rmNearestTiesToEven, &Lost); 1983 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1984 } 1985 1986 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1987 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1988 } 1989 1990 static bool isInlineValue(unsigned Reg) { 1991 switch (Reg) { 1992 case AMDGPU::SRC_SHARED_BASE: 1993 case AMDGPU::SRC_SHARED_LIMIT: 1994 case AMDGPU::SRC_PRIVATE_BASE: 1995 case AMDGPU::SRC_PRIVATE_LIMIT: 1996 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1997 return true; 1998 case AMDGPU::SRC_VCCZ: 1999 case AMDGPU::SRC_EXECZ: 2000 case AMDGPU::SRC_SCC: 2001 return true; 2002 case AMDGPU::SGPR_NULL: 2003 return true; 2004 default: 2005 return false; 2006 } 2007 } 2008 2009 bool AMDGPUOperand::isInlineValue() const { 2010 return isRegKind() && ::isInlineValue(getReg()); 2011 } 2012 2013 //===----------------------------------------------------------------------===// 2014 // AsmParser 2015 //===----------------------------------------------------------------------===// 2016 2017 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2018 if (Is == IS_VGPR) { 2019 switch (RegWidth) { 2020 default: return -1; 2021 case 1: return AMDGPU::VGPR_32RegClassID; 2022 case 2: return AMDGPU::VReg_64RegClassID; 2023 case 3: return AMDGPU::VReg_96RegClassID; 2024 case 4: return AMDGPU::VReg_128RegClassID; 2025 case 5: return AMDGPU::VReg_160RegClassID; 2026 case 6: return AMDGPU::VReg_192RegClassID; 2027 case 8: return AMDGPU::VReg_256RegClassID; 2028 case 16: return AMDGPU::VReg_512RegClassID; 2029 case 32: return AMDGPU::VReg_1024RegClassID; 2030 } 2031 } else if (Is == IS_TTMP) { 2032 switch (RegWidth) { 2033 default: return -1; 2034 case 1: return AMDGPU::TTMP_32RegClassID; 2035 case 2: return AMDGPU::TTMP_64RegClassID; 2036 case 4: return AMDGPU::TTMP_128RegClassID; 2037 case 8: return AMDGPU::TTMP_256RegClassID; 2038 case 16: return AMDGPU::TTMP_512RegClassID; 2039 } 2040 } else if (Is == IS_SGPR) { 2041 switch (RegWidth) { 2042 default: return -1; 2043 case 1: return AMDGPU::SGPR_32RegClassID; 2044 case 2: return AMDGPU::SGPR_64RegClassID; 2045 case 3: return AMDGPU::SGPR_96RegClassID; 2046 case 4: return AMDGPU::SGPR_128RegClassID; 2047 case 5: return AMDGPU::SGPR_160RegClassID; 2048 case 6: return AMDGPU::SGPR_192RegClassID; 2049 case 8: return AMDGPU::SGPR_256RegClassID; 2050 case 16: return AMDGPU::SGPR_512RegClassID; 2051 } 2052 } else if (Is == IS_AGPR) { 2053 switch (RegWidth) { 2054 default: return -1; 2055 case 1: return AMDGPU::AGPR_32RegClassID; 2056 case 2: return AMDGPU::AReg_64RegClassID; 2057 case 3: return AMDGPU::AReg_96RegClassID; 2058 case 4: return AMDGPU::AReg_128RegClassID; 2059 case 5: return AMDGPU::AReg_160RegClassID; 2060 case 6: return AMDGPU::AReg_192RegClassID; 2061 case 8: return AMDGPU::AReg_256RegClassID; 2062 case 16: return AMDGPU::AReg_512RegClassID; 2063 case 32: return AMDGPU::AReg_1024RegClassID; 2064 } 2065 } 2066 return -1; 2067 } 2068 2069 static unsigned getSpecialRegForName(StringRef RegName) { 2070 return StringSwitch<unsigned>(RegName) 2071 .Case("exec", AMDGPU::EXEC) 2072 .Case("vcc", AMDGPU::VCC) 2073 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2074 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2075 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2076 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2077 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2078 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2079 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2080 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2081 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2082 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2083 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2084 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2085 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2086 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2087 .Case("m0", AMDGPU::M0) 2088 .Case("vccz", AMDGPU::SRC_VCCZ) 2089 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2090 .Case("execz", AMDGPU::SRC_EXECZ) 2091 .Case("src_execz", AMDGPU::SRC_EXECZ) 2092 .Case("scc", AMDGPU::SRC_SCC) 2093 .Case("src_scc", AMDGPU::SRC_SCC) 2094 .Case("tba", AMDGPU::TBA) 2095 .Case("tma", AMDGPU::TMA) 2096 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2097 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2098 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2099 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2100 .Case("vcc_lo", AMDGPU::VCC_LO) 2101 .Case("vcc_hi", AMDGPU::VCC_HI) 2102 .Case("exec_lo", AMDGPU::EXEC_LO) 2103 .Case("exec_hi", AMDGPU::EXEC_HI) 2104 .Case("tma_lo", AMDGPU::TMA_LO) 2105 .Case("tma_hi", AMDGPU::TMA_HI) 2106 .Case("tba_lo", AMDGPU::TBA_LO) 2107 .Case("tba_hi", AMDGPU::TBA_HI) 2108 .Case("pc", AMDGPU::PC_REG) 2109 .Case("null", AMDGPU::SGPR_NULL) 2110 .Default(AMDGPU::NoRegister); 2111 } 2112 2113 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2114 SMLoc &EndLoc, bool RestoreOnFailure) { 2115 auto R = parseRegister(); 2116 if (!R) return true; 2117 assert(R->isReg()); 2118 RegNo = R->getReg(); 2119 StartLoc = R->getStartLoc(); 2120 EndLoc = R->getEndLoc(); 2121 return false; 2122 } 2123 2124 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2125 SMLoc &EndLoc) { 2126 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2127 } 2128 2129 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2130 SMLoc &StartLoc, 2131 SMLoc &EndLoc) { 2132 bool Result = 2133 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2134 bool PendingErrors = getParser().hasPendingError(); 2135 getParser().clearPendingErrors(); 2136 if (PendingErrors) 2137 return MatchOperand_ParseFail; 2138 if (Result) 2139 return MatchOperand_NoMatch; 2140 return MatchOperand_Success; 2141 } 2142 2143 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2144 RegisterKind RegKind, unsigned Reg1, 2145 SMLoc Loc) { 2146 switch (RegKind) { 2147 case IS_SPECIAL: 2148 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2149 Reg = AMDGPU::EXEC; 2150 RegWidth = 2; 2151 return true; 2152 } 2153 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2154 Reg = AMDGPU::FLAT_SCR; 2155 RegWidth = 2; 2156 return true; 2157 } 2158 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2159 Reg = AMDGPU::XNACK_MASK; 2160 RegWidth = 2; 2161 return true; 2162 } 2163 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2164 Reg = AMDGPU::VCC; 2165 RegWidth = 2; 2166 return true; 2167 } 2168 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2169 Reg = AMDGPU::TBA; 2170 RegWidth = 2; 2171 return true; 2172 } 2173 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2174 Reg = AMDGPU::TMA; 2175 RegWidth = 2; 2176 return true; 2177 } 2178 Error(Loc, "register does not fit in the list"); 2179 return false; 2180 case IS_VGPR: 2181 case IS_SGPR: 2182 case IS_AGPR: 2183 case IS_TTMP: 2184 if (Reg1 != Reg + RegWidth) { 2185 Error(Loc, "registers in a list must have consecutive indices"); 2186 return false; 2187 } 2188 RegWidth++; 2189 return true; 2190 default: 2191 llvm_unreachable("unexpected register kind"); 2192 } 2193 } 2194 2195 struct RegInfo { 2196 StringLiteral Name; 2197 RegisterKind Kind; 2198 }; 2199 2200 static constexpr RegInfo RegularRegisters[] = { 2201 {{"v"}, IS_VGPR}, 2202 {{"s"}, IS_SGPR}, 2203 {{"ttmp"}, IS_TTMP}, 2204 {{"acc"}, IS_AGPR}, 2205 {{"a"}, IS_AGPR}, 2206 }; 2207 2208 static bool isRegularReg(RegisterKind Kind) { 2209 return Kind == IS_VGPR || 2210 Kind == IS_SGPR || 2211 Kind == IS_TTMP || 2212 Kind == IS_AGPR; 2213 } 2214 2215 static const RegInfo* getRegularRegInfo(StringRef Str) { 2216 for (const RegInfo &Reg : RegularRegisters) 2217 if (Str.startswith(Reg.Name)) 2218 return &Reg; 2219 return nullptr; 2220 } 2221 2222 static bool getRegNum(StringRef Str, unsigned& Num) { 2223 return !Str.getAsInteger(10, Num); 2224 } 2225 2226 bool 2227 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2228 const AsmToken &NextToken) const { 2229 2230 // A list of consecutive registers: [s0,s1,s2,s3] 2231 if (Token.is(AsmToken::LBrac)) 2232 return true; 2233 2234 if (!Token.is(AsmToken::Identifier)) 2235 return false; 2236 2237 // A single register like s0 or a range of registers like s[0:1] 2238 2239 StringRef Str = Token.getString(); 2240 const RegInfo *Reg = getRegularRegInfo(Str); 2241 if (Reg) { 2242 StringRef RegName = Reg->Name; 2243 StringRef RegSuffix = Str.substr(RegName.size()); 2244 if (!RegSuffix.empty()) { 2245 unsigned Num; 2246 // A single register with an index: rXX 2247 if (getRegNum(RegSuffix, Num)) 2248 return true; 2249 } else { 2250 // A range of registers: r[XX:YY]. 2251 if (NextToken.is(AsmToken::LBrac)) 2252 return true; 2253 } 2254 } 2255 2256 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2257 } 2258 2259 bool 2260 AMDGPUAsmParser::isRegister() 2261 { 2262 return isRegister(getToken(), peekToken()); 2263 } 2264 2265 unsigned 2266 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2267 unsigned RegNum, 2268 unsigned RegWidth, 2269 SMLoc Loc) { 2270 2271 assert(isRegularReg(RegKind)); 2272 2273 unsigned AlignSize = 1; 2274 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2275 // SGPR and TTMP registers must be aligned. 2276 // Max required alignment is 4 dwords. 2277 AlignSize = std::min(RegWidth, 4u); 2278 } 2279 2280 if (RegNum % AlignSize != 0) { 2281 Error(Loc, "invalid register alignment"); 2282 return AMDGPU::NoRegister; 2283 } 2284 2285 unsigned RegIdx = RegNum / AlignSize; 2286 int RCID = getRegClass(RegKind, RegWidth); 2287 if (RCID == -1) { 2288 Error(Loc, "invalid or unsupported register size"); 2289 return AMDGPU::NoRegister; 2290 } 2291 2292 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2293 const MCRegisterClass RC = TRI->getRegClass(RCID); 2294 if (RegIdx >= RC.getNumRegs()) { 2295 Error(Loc, "register index is out of range"); 2296 return AMDGPU::NoRegister; 2297 } 2298 2299 return RC.getRegister(RegIdx); 2300 } 2301 2302 bool 2303 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2304 int64_t RegLo, RegHi; 2305 if (!skipToken(AsmToken::LBrac, "missing register index")) 2306 return false; 2307 2308 SMLoc FirstIdxLoc = getLoc(); 2309 SMLoc SecondIdxLoc; 2310 2311 if (!parseExpr(RegLo)) 2312 return false; 2313 2314 if (trySkipToken(AsmToken::Colon)) { 2315 SecondIdxLoc = getLoc(); 2316 if (!parseExpr(RegHi)) 2317 return false; 2318 } else { 2319 RegHi = RegLo; 2320 } 2321 2322 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2323 return false; 2324 2325 if (!isUInt<32>(RegLo)) { 2326 Error(FirstIdxLoc, "invalid register index"); 2327 return false; 2328 } 2329 2330 if (!isUInt<32>(RegHi)) { 2331 Error(SecondIdxLoc, "invalid register index"); 2332 return false; 2333 } 2334 2335 if (RegLo > RegHi) { 2336 Error(FirstIdxLoc, "first register index should not exceed second index"); 2337 return false; 2338 } 2339 2340 Num = static_cast<unsigned>(RegLo); 2341 Width = (RegHi - RegLo) + 1; 2342 return true; 2343 } 2344 2345 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2346 unsigned &RegNum, unsigned &RegWidth, 2347 SmallVectorImpl<AsmToken> &Tokens) { 2348 assert(isToken(AsmToken::Identifier)); 2349 unsigned Reg = getSpecialRegForName(getTokenStr()); 2350 if (Reg) { 2351 RegNum = 0; 2352 RegWidth = 1; 2353 RegKind = IS_SPECIAL; 2354 Tokens.push_back(getToken()); 2355 lex(); // skip register name 2356 } 2357 return Reg; 2358 } 2359 2360 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2361 unsigned &RegNum, unsigned &RegWidth, 2362 SmallVectorImpl<AsmToken> &Tokens) { 2363 assert(isToken(AsmToken::Identifier)); 2364 StringRef RegName = getTokenStr(); 2365 auto Loc = getLoc(); 2366 2367 const RegInfo *RI = getRegularRegInfo(RegName); 2368 if (!RI) { 2369 Error(Loc, "invalid register name"); 2370 return AMDGPU::NoRegister; 2371 } 2372 2373 Tokens.push_back(getToken()); 2374 lex(); // skip register name 2375 2376 RegKind = RI->Kind; 2377 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2378 if (!RegSuffix.empty()) { 2379 // Single 32-bit register: vXX. 2380 if (!getRegNum(RegSuffix, RegNum)) { 2381 Error(Loc, "invalid register index"); 2382 return AMDGPU::NoRegister; 2383 } 2384 RegWidth = 1; 2385 } else { 2386 // Range of registers: v[XX:YY]. ":YY" is optional. 2387 if (!ParseRegRange(RegNum, RegWidth)) 2388 return AMDGPU::NoRegister; 2389 } 2390 2391 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2392 } 2393 2394 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2395 unsigned &RegWidth, 2396 SmallVectorImpl<AsmToken> &Tokens) { 2397 unsigned Reg = AMDGPU::NoRegister; 2398 auto ListLoc = getLoc(); 2399 2400 if (!skipToken(AsmToken::LBrac, 2401 "expected a register or a list of registers")) { 2402 return AMDGPU::NoRegister; 2403 } 2404 2405 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2406 2407 auto Loc = getLoc(); 2408 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2409 return AMDGPU::NoRegister; 2410 if (RegWidth != 1) { 2411 Error(Loc, "expected a single 32-bit register"); 2412 return AMDGPU::NoRegister; 2413 } 2414 2415 for (; trySkipToken(AsmToken::Comma); ) { 2416 RegisterKind NextRegKind; 2417 unsigned NextReg, NextRegNum, NextRegWidth; 2418 Loc = getLoc(); 2419 2420 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2421 NextRegNum, NextRegWidth, 2422 Tokens)) { 2423 return AMDGPU::NoRegister; 2424 } 2425 if (NextRegWidth != 1) { 2426 Error(Loc, "expected a single 32-bit register"); 2427 return AMDGPU::NoRegister; 2428 } 2429 if (NextRegKind != RegKind) { 2430 Error(Loc, "registers in a list must be of the same kind"); 2431 return AMDGPU::NoRegister; 2432 } 2433 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2434 return AMDGPU::NoRegister; 2435 } 2436 2437 if (!skipToken(AsmToken::RBrac, 2438 "expected a comma or a closing square bracket")) { 2439 return AMDGPU::NoRegister; 2440 } 2441 2442 if (isRegularReg(RegKind)) 2443 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2444 2445 return Reg; 2446 } 2447 2448 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2449 unsigned &RegNum, unsigned &RegWidth, 2450 SmallVectorImpl<AsmToken> &Tokens) { 2451 auto Loc = getLoc(); 2452 Reg = AMDGPU::NoRegister; 2453 2454 if (isToken(AsmToken::Identifier)) { 2455 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2456 if (Reg == AMDGPU::NoRegister) 2457 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2458 } else { 2459 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2460 } 2461 2462 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2463 if (Reg == AMDGPU::NoRegister) { 2464 assert(Parser.hasPendingError()); 2465 return false; 2466 } 2467 2468 if (!subtargetHasRegister(*TRI, Reg)) { 2469 if (Reg == AMDGPU::SGPR_NULL) { 2470 Error(Loc, "'null' operand is not supported on this GPU"); 2471 } else { 2472 Error(Loc, "register not available on this GPU"); 2473 } 2474 return false; 2475 } 2476 2477 return true; 2478 } 2479 2480 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2481 unsigned &RegNum, unsigned &RegWidth, 2482 bool RestoreOnFailure /*=false*/) { 2483 Reg = AMDGPU::NoRegister; 2484 2485 SmallVector<AsmToken, 1> Tokens; 2486 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2487 if (RestoreOnFailure) { 2488 while (!Tokens.empty()) { 2489 getLexer().UnLex(Tokens.pop_back_val()); 2490 } 2491 } 2492 return true; 2493 } 2494 return false; 2495 } 2496 2497 Optional<StringRef> 2498 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2499 switch (RegKind) { 2500 case IS_VGPR: 2501 return StringRef(".amdgcn.next_free_vgpr"); 2502 case IS_SGPR: 2503 return StringRef(".amdgcn.next_free_sgpr"); 2504 default: 2505 return None; 2506 } 2507 } 2508 2509 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2510 auto SymbolName = getGprCountSymbolName(RegKind); 2511 assert(SymbolName && "initializing invalid register kind"); 2512 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2513 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2514 } 2515 2516 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2517 unsigned DwordRegIndex, 2518 unsigned RegWidth) { 2519 // Symbols are only defined for GCN targets 2520 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2521 return true; 2522 2523 auto SymbolName = getGprCountSymbolName(RegKind); 2524 if (!SymbolName) 2525 return true; 2526 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2527 2528 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2529 int64_t OldCount; 2530 2531 if (!Sym->isVariable()) 2532 return !Error(getParser().getTok().getLoc(), 2533 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2534 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2535 return !Error( 2536 getParser().getTok().getLoc(), 2537 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2538 2539 if (OldCount <= NewMax) 2540 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2541 2542 return true; 2543 } 2544 2545 std::unique_ptr<AMDGPUOperand> 2546 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2547 const auto &Tok = Parser.getTok(); 2548 SMLoc StartLoc = Tok.getLoc(); 2549 SMLoc EndLoc = Tok.getEndLoc(); 2550 RegisterKind RegKind; 2551 unsigned Reg, RegNum, RegWidth; 2552 2553 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2554 return nullptr; 2555 } 2556 if (isHsaAbiVersion3(&getSTI())) { 2557 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2558 return nullptr; 2559 } else 2560 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2561 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2562 } 2563 2564 OperandMatchResultTy 2565 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2566 // TODO: add syntactic sugar for 1/(2*PI) 2567 2568 assert(!isRegister()); 2569 assert(!isModifier()); 2570 2571 const auto& Tok = getToken(); 2572 const auto& NextTok = peekToken(); 2573 bool IsReal = Tok.is(AsmToken::Real); 2574 SMLoc S = getLoc(); 2575 bool Negate = false; 2576 2577 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2578 lex(); 2579 IsReal = true; 2580 Negate = true; 2581 } 2582 2583 if (IsReal) { 2584 // Floating-point expressions are not supported. 2585 // Can only allow floating-point literals with an 2586 // optional sign. 2587 2588 StringRef Num = getTokenStr(); 2589 lex(); 2590 2591 APFloat RealVal(APFloat::IEEEdouble()); 2592 auto roundMode = APFloat::rmNearestTiesToEven; 2593 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2594 return MatchOperand_ParseFail; 2595 } 2596 if (Negate) 2597 RealVal.changeSign(); 2598 2599 Operands.push_back( 2600 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2601 AMDGPUOperand::ImmTyNone, true)); 2602 2603 return MatchOperand_Success; 2604 2605 } else { 2606 int64_t IntVal; 2607 const MCExpr *Expr; 2608 SMLoc S = getLoc(); 2609 2610 if (HasSP3AbsModifier) { 2611 // This is a workaround for handling expressions 2612 // as arguments of SP3 'abs' modifier, for example: 2613 // |1.0| 2614 // |-1| 2615 // |1+x| 2616 // This syntax is not compatible with syntax of standard 2617 // MC expressions (due to the trailing '|'). 2618 SMLoc EndLoc; 2619 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2620 return MatchOperand_ParseFail; 2621 } else { 2622 if (Parser.parseExpression(Expr)) 2623 return MatchOperand_ParseFail; 2624 } 2625 2626 if (Expr->evaluateAsAbsolute(IntVal)) { 2627 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2628 } else { 2629 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2630 } 2631 2632 return MatchOperand_Success; 2633 } 2634 2635 return MatchOperand_NoMatch; 2636 } 2637 2638 OperandMatchResultTy 2639 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2640 if (!isRegister()) 2641 return MatchOperand_NoMatch; 2642 2643 if (auto R = parseRegister()) { 2644 assert(R->isReg()); 2645 Operands.push_back(std::move(R)); 2646 return MatchOperand_Success; 2647 } 2648 return MatchOperand_ParseFail; 2649 } 2650 2651 OperandMatchResultTy 2652 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2653 auto res = parseReg(Operands); 2654 if (res != MatchOperand_NoMatch) { 2655 return res; 2656 } else if (isModifier()) { 2657 return MatchOperand_NoMatch; 2658 } else { 2659 return parseImm(Operands, HasSP3AbsMod); 2660 } 2661 } 2662 2663 bool 2664 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2665 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2666 const auto &str = Token.getString(); 2667 return str == "abs" || str == "neg" || str == "sext"; 2668 } 2669 return false; 2670 } 2671 2672 bool 2673 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2674 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2675 } 2676 2677 bool 2678 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2679 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2680 } 2681 2682 bool 2683 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2684 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2685 } 2686 2687 // Check if this is an operand modifier or an opcode modifier 2688 // which may look like an expression but it is not. We should 2689 // avoid parsing these modifiers as expressions. Currently 2690 // recognized sequences are: 2691 // |...| 2692 // abs(...) 2693 // neg(...) 2694 // sext(...) 2695 // -reg 2696 // -|...| 2697 // -abs(...) 2698 // name:... 2699 // Note that simple opcode modifiers like 'gds' may be parsed as 2700 // expressions; this is a special case. See getExpressionAsToken. 2701 // 2702 bool 2703 AMDGPUAsmParser::isModifier() { 2704 2705 AsmToken Tok = getToken(); 2706 AsmToken NextToken[2]; 2707 peekTokens(NextToken); 2708 2709 return isOperandModifier(Tok, NextToken[0]) || 2710 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2711 isOpcodeModifierWithVal(Tok, NextToken[0]); 2712 } 2713 2714 // Check if the current token is an SP3 'neg' modifier. 2715 // Currently this modifier is allowed in the following context: 2716 // 2717 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2718 // 2. Before an 'abs' modifier: -abs(...) 2719 // 3. Before an SP3 'abs' modifier: -|...| 2720 // 2721 // In all other cases "-" is handled as a part 2722 // of an expression that follows the sign. 2723 // 2724 // Note: When "-" is followed by an integer literal, 2725 // this is interpreted as integer negation rather 2726 // than a floating-point NEG modifier applied to N. 2727 // Beside being contr-intuitive, such use of floating-point 2728 // NEG modifier would have resulted in different meaning 2729 // of integer literals used with VOP1/2/C and VOP3, 2730 // for example: 2731 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2732 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2733 // Negative fp literals with preceding "-" are 2734 // handled likewise for unifomtity 2735 // 2736 bool 2737 AMDGPUAsmParser::parseSP3NegModifier() { 2738 2739 AsmToken NextToken[2]; 2740 peekTokens(NextToken); 2741 2742 if (isToken(AsmToken::Minus) && 2743 (isRegister(NextToken[0], NextToken[1]) || 2744 NextToken[0].is(AsmToken::Pipe) || 2745 isId(NextToken[0], "abs"))) { 2746 lex(); 2747 return true; 2748 } 2749 2750 return false; 2751 } 2752 2753 OperandMatchResultTy 2754 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2755 bool AllowImm) { 2756 bool Neg, SP3Neg; 2757 bool Abs, SP3Abs; 2758 SMLoc Loc; 2759 2760 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2761 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2762 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2763 return MatchOperand_ParseFail; 2764 } 2765 2766 SP3Neg = parseSP3NegModifier(); 2767 2768 Loc = getLoc(); 2769 Neg = trySkipId("neg"); 2770 if (Neg && SP3Neg) { 2771 Error(Loc, "expected register or immediate"); 2772 return MatchOperand_ParseFail; 2773 } 2774 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2775 return MatchOperand_ParseFail; 2776 2777 Abs = trySkipId("abs"); 2778 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2779 return MatchOperand_ParseFail; 2780 2781 Loc = getLoc(); 2782 SP3Abs = trySkipToken(AsmToken::Pipe); 2783 if (Abs && SP3Abs) { 2784 Error(Loc, "expected register or immediate"); 2785 return MatchOperand_ParseFail; 2786 } 2787 2788 OperandMatchResultTy Res; 2789 if (AllowImm) { 2790 Res = parseRegOrImm(Operands, SP3Abs); 2791 } else { 2792 Res = parseReg(Operands); 2793 } 2794 if (Res != MatchOperand_Success) { 2795 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2796 } 2797 2798 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2799 return MatchOperand_ParseFail; 2800 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2801 return MatchOperand_ParseFail; 2802 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2803 return MatchOperand_ParseFail; 2804 2805 AMDGPUOperand::Modifiers Mods; 2806 Mods.Abs = Abs || SP3Abs; 2807 Mods.Neg = Neg || SP3Neg; 2808 2809 if (Mods.hasFPModifiers()) { 2810 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2811 if (Op.isExpr()) { 2812 Error(Op.getStartLoc(), "expected an absolute expression"); 2813 return MatchOperand_ParseFail; 2814 } 2815 Op.setModifiers(Mods); 2816 } 2817 return MatchOperand_Success; 2818 } 2819 2820 OperandMatchResultTy 2821 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2822 bool AllowImm) { 2823 bool Sext = trySkipId("sext"); 2824 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2825 return MatchOperand_ParseFail; 2826 2827 OperandMatchResultTy Res; 2828 if (AllowImm) { 2829 Res = parseRegOrImm(Operands); 2830 } else { 2831 Res = parseReg(Operands); 2832 } 2833 if (Res != MatchOperand_Success) { 2834 return Sext? MatchOperand_ParseFail : Res; 2835 } 2836 2837 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2838 return MatchOperand_ParseFail; 2839 2840 AMDGPUOperand::Modifiers Mods; 2841 Mods.Sext = Sext; 2842 2843 if (Mods.hasIntModifiers()) { 2844 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2845 if (Op.isExpr()) { 2846 Error(Op.getStartLoc(), "expected an absolute expression"); 2847 return MatchOperand_ParseFail; 2848 } 2849 Op.setModifiers(Mods); 2850 } 2851 2852 return MatchOperand_Success; 2853 } 2854 2855 OperandMatchResultTy 2856 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2857 return parseRegOrImmWithFPInputMods(Operands, false); 2858 } 2859 2860 OperandMatchResultTy 2861 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2862 return parseRegOrImmWithIntInputMods(Operands, false); 2863 } 2864 2865 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2866 auto Loc = getLoc(); 2867 if (trySkipId("off")) { 2868 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2869 AMDGPUOperand::ImmTyOff, false)); 2870 return MatchOperand_Success; 2871 } 2872 2873 if (!isRegister()) 2874 return MatchOperand_NoMatch; 2875 2876 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2877 if (Reg) { 2878 Operands.push_back(std::move(Reg)); 2879 return MatchOperand_Success; 2880 } 2881 2882 return MatchOperand_ParseFail; 2883 2884 } 2885 2886 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2887 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2888 2889 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2890 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2891 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2892 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2893 return Match_InvalidOperand; 2894 2895 if ((TSFlags & SIInstrFlags::VOP3) && 2896 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2897 getForcedEncodingSize() != 64) 2898 return Match_PreferE32; 2899 2900 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2901 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2902 // v_mac_f32/16 allow only dst_sel == DWORD; 2903 auto OpNum = 2904 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2905 const auto &Op = Inst.getOperand(OpNum); 2906 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2907 return Match_InvalidOperand; 2908 } 2909 } 2910 2911 return Match_Success; 2912 } 2913 2914 static ArrayRef<unsigned> getAllVariants() { 2915 static const unsigned Variants[] = { 2916 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2917 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2918 }; 2919 2920 return makeArrayRef(Variants); 2921 } 2922 2923 // What asm variants we should check 2924 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2925 if (getForcedEncodingSize() == 32) { 2926 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2927 return makeArrayRef(Variants); 2928 } 2929 2930 if (isForcedVOP3()) { 2931 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2932 return makeArrayRef(Variants); 2933 } 2934 2935 if (isForcedSDWA()) { 2936 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2937 AMDGPUAsmVariants::SDWA9}; 2938 return makeArrayRef(Variants); 2939 } 2940 2941 if (isForcedDPP()) { 2942 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2943 return makeArrayRef(Variants); 2944 } 2945 2946 return getAllVariants(); 2947 } 2948 2949 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2950 if (getForcedEncodingSize() == 32) 2951 return "e32"; 2952 2953 if (isForcedVOP3()) 2954 return "e64"; 2955 2956 if (isForcedSDWA()) 2957 return "sdwa"; 2958 2959 if (isForcedDPP()) 2960 return "dpp"; 2961 2962 return ""; 2963 } 2964 2965 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2967 const unsigned Num = Desc.getNumImplicitUses(); 2968 for (unsigned i = 0; i < Num; ++i) { 2969 unsigned Reg = Desc.ImplicitUses[i]; 2970 switch (Reg) { 2971 case AMDGPU::FLAT_SCR: 2972 case AMDGPU::VCC: 2973 case AMDGPU::VCC_LO: 2974 case AMDGPU::VCC_HI: 2975 case AMDGPU::M0: 2976 return Reg; 2977 default: 2978 break; 2979 } 2980 } 2981 return AMDGPU::NoRegister; 2982 } 2983 2984 // NB: This code is correct only when used to check constant 2985 // bus limitations because GFX7 support no f16 inline constants. 2986 // Note that there are no cases when a GFX7 opcode violates 2987 // constant bus limitations due to the use of an f16 constant. 2988 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2989 unsigned OpIdx) const { 2990 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2991 2992 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2993 return false; 2994 } 2995 2996 const MCOperand &MO = Inst.getOperand(OpIdx); 2997 2998 int64_t Val = MO.getImm(); 2999 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3000 3001 switch (OpSize) { // expected operand size 3002 case 8: 3003 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3004 case 4: 3005 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3006 case 2: { 3007 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3008 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3009 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3010 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3011 return AMDGPU::isInlinableIntLiteral(Val); 3012 3013 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3014 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3015 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3016 return AMDGPU::isInlinableIntLiteralV216(Val); 3017 3018 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3019 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3020 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3021 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3022 3023 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3024 } 3025 default: 3026 llvm_unreachable("invalid operand size"); 3027 } 3028 } 3029 3030 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3031 if (!isGFX10Plus()) 3032 return 1; 3033 3034 switch (Opcode) { 3035 // 64-bit shift instructions can use only one scalar value input 3036 case AMDGPU::V_LSHLREV_B64: 3037 case AMDGPU::V_LSHLREV_B64_gfx10: 3038 case AMDGPU::V_LSHL_B64: 3039 case AMDGPU::V_LSHRREV_B64: 3040 case AMDGPU::V_LSHRREV_B64_gfx10: 3041 case AMDGPU::V_LSHR_B64: 3042 case AMDGPU::V_ASHRREV_I64: 3043 case AMDGPU::V_ASHRREV_I64_gfx10: 3044 case AMDGPU::V_ASHR_I64: 3045 return 1; 3046 default: 3047 return 2; 3048 } 3049 } 3050 3051 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3052 const MCOperand &MO = Inst.getOperand(OpIdx); 3053 if (MO.isImm()) { 3054 return !isInlineConstant(Inst, OpIdx); 3055 } else if (MO.isReg()) { 3056 auto Reg = MO.getReg(); 3057 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3058 auto PReg = mc2PseudoReg(Reg); 3059 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3060 } else { 3061 return true; 3062 } 3063 } 3064 3065 bool 3066 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3067 const OperandVector &Operands) { 3068 const unsigned Opcode = Inst.getOpcode(); 3069 const MCInstrDesc &Desc = MII.get(Opcode); 3070 unsigned LastSGPR = AMDGPU::NoRegister; 3071 unsigned ConstantBusUseCount = 0; 3072 unsigned NumLiterals = 0; 3073 unsigned LiteralSize; 3074 3075 if (Desc.TSFlags & 3076 (SIInstrFlags::VOPC | 3077 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3078 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3079 SIInstrFlags::SDWA)) { 3080 // Check special imm operands (used by madmk, etc) 3081 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3082 ++ConstantBusUseCount; 3083 } 3084 3085 SmallDenseSet<unsigned> SGPRsUsed; 3086 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3087 if (SGPRUsed != AMDGPU::NoRegister) { 3088 SGPRsUsed.insert(SGPRUsed); 3089 ++ConstantBusUseCount; 3090 } 3091 3092 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3093 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3094 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3095 3096 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3097 3098 for (int OpIdx : OpIndices) { 3099 if (OpIdx == -1) break; 3100 3101 const MCOperand &MO = Inst.getOperand(OpIdx); 3102 if (usesConstantBus(Inst, OpIdx)) { 3103 if (MO.isReg()) { 3104 LastSGPR = mc2PseudoReg(MO.getReg()); 3105 // Pairs of registers with a partial intersections like these 3106 // s0, s[0:1] 3107 // flat_scratch_lo, flat_scratch 3108 // flat_scratch_lo, flat_scratch_hi 3109 // are theoretically valid but they are disabled anyway. 3110 // Note that this code mimics SIInstrInfo::verifyInstruction 3111 if (!SGPRsUsed.count(LastSGPR)) { 3112 SGPRsUsed.insert(LastSGPR); 3113 ++ConstantBusUseCount; 3114 } 3115 } else { // Expression or a literal 3116 3117 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3118 continue; // special operand like VINTERP attr_chan 3119 3120 // An instruction may use only one literal. 3121 // This has been validated on the previous step. 3122 // See validateVOP3Literal. 3123 // This literal may be used as more than one operand. 3124 // If all these operands are of the same size, 3125 // this literal counts as one scalar value. 3126 // Otherwise it counts as 2 scalar values. 3127 // See "GFX10 Shader Programming", section 3.6.2.3. 3128 3129 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3130 if (Size < 4) Size = 4; 3131 3132 if (NumLiterals == 0) { 3133 NumLiterals = 1; 3134 LiteralSize = Size; 3135 } else if (LiteralSize != Size) { 3136 NumLiterals = 2; 3137 } 3138 } 3139 } 3140 } 3141 } 3142 ConstantBusUseCount += NumLiterals; 3143 3144 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3145 return true; 3146 3147 SMLoc LitLoc = getLitLoc(Operands); 3148 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3149 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3150 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3151 return false; 3152 } 3153 3154 bool 3155 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3156 const OperandVector &Operands) { 3157 const unsigned Opcode = Inst.getOpcode(); 3158 const MCInstrDesc &Desc = MII.get(Opcode); 3159 3160 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3161 if (DstIdx == -1 || 3162 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3163 return true; 3164 } 3165 3166 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3167 3168 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3169 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3170 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3171 3172 assert(DstIdx != -1); 3173 const MCOperand &Dst = Inst.getOperand(DstIdx); 3174 assert(Dst.isReg()); 3175 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3176 3177 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3178 3179 for (int SrcIdx : SrcIndices) { 3180 if (SrcIdx == -1) break; 3181 const MCOperand &Src = Inst.getOperand(SrcIdx); 3182 if (Src.isReg()) { 3183 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3184 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3185 Error(getRegLoc(SrcReg, Operands), 3186 "destination must be different than all sources"); 3187 return false; 3188 } 3189 } 3190 } 3191 3192 return true; 3193 } 3194 3195 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3196 3197 const unsigned Opc = Inst.getOpcode(); 3198 const MCInstrDesc &Desc = MII.get(Opc); 3199 3200 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3201 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3202 assert(ClampIdx != -1); 3203 return Inst.getOperand(ClampIdx).getImm() == 0; 3204 } 3205 3206 return true; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 3217 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3218 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3219 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3220 3221 assert(VDataIdx != -1); 3222 3223 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3224 return true; 3225 3226 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3227 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3228 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3229 if (DMask == 0) 3230 DMask = 1; 3231 3232 unsigned DataSize = 3233 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3234 if (hasPackedD16()) { 3235 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3236 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3237 DataSize = (DataSize + 1) / 2; 3238 } 3239 3240 return (VDataSize / 4) == DataSize + TFESize; 3241 } 3242 3243 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3244 const unsigned Opc = Inst.getOpcode(); 3245 const MCInstrDesc &Desc = MII.get(Opc); 3246 3247 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3248 return true; 3249 3250 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3251 3252 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3253 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3254 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3255 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3256 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3257 3258 assert(VAddr0Idx != -1); 3259 assert(SrsrcIdx != -1); 3260 assert(SrsrcIdx > VAddr0Idx); 3261 3262 if (DimIdx == -1) 3263 return true; // intersect_ray 3264 3265 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3266 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3267 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3268 unsigned VAddrSize = 3269 IsNSA ? SrsrcIdx - VAddr0Idx 3270 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3271 3272 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3273 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3274 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3275 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3276 if (!IsNSA) { 3277 if (AddrSize > 8) 3278 AddrSize = 16; 3279 else if (AddrSize > 4) 3280 AddrSize = 8; 3281 } 3282 3283 return VAddrSize == AddrSize; 3284 } 3285 3286 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3287 3288 const unsigned Opc = Inst.getOpcode(); 3289 const MCInstrDesc &Desc = MII.get(Opc); 3290 3291 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3292 return true; 3293 if (!Desc.mayLoad() || !Desc.mayStore()) 3294 return true; // Not atomic 3295 3296 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3297 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3298 3299 // This is an incomplete check because image_atomic_cmpswap 3300 // may only use 0x3 and 0xf while other atomic operations 3301 // may use 0x1 and 0x3. However these limitations are 3302 // verified when we check that dmask matches dst size. 3303 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3304 } 3305 3306 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3307 3308 const unsigned Opc = Inst.getOpcode(); 3309 const MCInstrDesc &Desc = MII.get(Opc); 3310 3311 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3312 return true; 3313 3314 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3315 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3316 3317 // GATHER4 instructions use dmask in a different fashion compared to 3318 // other MIMG instructions. The only useful DMASK values are 3319 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3320 // (red,red,red,red) etc.) The ISA document doesn't mention 3321 // this. 3322 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3323 } 3324 3325 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3326 { 3327 switch (Opcode) { 3328 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3329 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3330 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3331 return true; 3332 default: 3333 return false; 3334 } 3335 } 3336 3337 // movrels* opcodes should only allow VGPRS as src0. 3338 // This is specified in .td description for vop1/vop3, 3339 // but sdwa is handled differently. See isSDWAOperand. 3340 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3341 const OperandVector &Operands) { 3342 3343 const unsigned Opc = Inst.getOpcode(); 3344 const MCInstrDesc &Desc = MII.get(Opc); 3345 3346 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3347 return true; 3348 3349 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3350 assert(Src0Idx != -1); 3351 3352 SMLoc ErrLoc; 3353 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3354 if (Src0.isReg()) { 3355 auto Reg = mc2PseudoReg(Src0.getReg()); 3356 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3357 if (!isSGPR(Reg, TRI)) 3358 return true; 3359 ErrLoc = getRegLoc(Reg, Operands); 3360 } else { 3361 ErrLoc = getConstLoc(Operands); 3362 } 3363 3364 Error(ErrLoc, "source operand must be a VGPR"); 3365 return false; 3366 } 3367 3368 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3369 const OperandVector &Operands) { 3370 3371 const unsigned Opc = Inst.getOpcode(); 3372 3373 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3374 return true; 3375 3376 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3377 assert(Src0Idx != -1); 3378 3379 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3380 if (!Src0.isReg()) 3381 return true; 3382 3383 auto Reg = mc2PseudoReg(Src0.getReg()); 3384 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3385 if (isSGPR(Reg, TRI)) { 3386 Error(getRegLoc(Reg, Operands), 3387 "source operand must be either a VGPR or an inline constant"); 3388 return false; 3389 } 3390 3391 return true; 3392 } 3393 3394 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3395 switch (Inst.getOpcode()) { 3396 default: 3397 return true; 3398 case V_DIV_SCALE_F32_gfx6_gfx7: 3399 case V_DIV_SCALE_F32_vi: 3400 case V_DIV_SCALE_F32_gfx10: 3401 case V_DIV_SCALE_F64_gfx6_gfx7: 3402 case V_DIV_SCALE_F64_vi: 3403 case V_DIV_SCALE_F64_gfx10: 3404 break; 3405 } 3406 3407 // TODO: Check that src0 = src1 or src2. 3408 3409 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3410 AMDGPU::OpName::src2_modifiers, 3411 AMDGPU::OpName::src2_modifiers}) { 3412 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3413 .getImm() & 3414 SISrcMods::ABS) { 3415 return false; 3416 } 3417 } 3418 3419 return true; 3420 } 3421 3422 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3423 3424 const unsigned Opc = Inst.getOpcode(); 3425 const MCInstrDesc &Desc = MII.get(Opc); 3426 3427 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3428 return true; 3429 3430 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3431 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3432 if (isCI() || isSI()) 3433 return false; 3434 } 3435 3436 return true; 3437 } 3438 3439 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3440 const unsigned Opc = Inst.getOpcode(); 3441 const MCInstrDesc &Desc = MII.get(Opc); 3442 3443 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3444 return true; 3445 3446 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3447 if (DimIdx < 0) 3448 return true; 3449 3450 long Imm = Inst.getOperand(DimIdx).getImm(); 3451 if (Imm < 0 || Imm >= 8) 3452 return false; 3453 3454 return true; 3455 } 3456 3457 static bool IsRevOpcode(const unsigned Opcode) 3458 { 3459 switch (Opcode) { 3460 case AMDGPU::V_SUBREV_F32_e32: 3461 case AMDGPU::V_SUBREV_F32_e64: 3462 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3463 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3464 case AMDGPU::V_SUBREV_F32_e32_vi: 3465 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3466 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3467 case AMDGPU::V_SUBREV_F32_e64_vi: 3468 3469 case AMDGPU::V_SUBREV_CO_U32_e32: 3470 case AMDGPU::V_SUBREV_CO_U32_e64: 3471 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3472 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3473 3474 case AMDGPU::V_SUBBREV_U32_e32: 3475 case AMDGPU::V_SUBBREV_U32_e64: 3476 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3477 case AMDGPU::V_SUBBREV_U32_e32_vi: 3478 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3479 case AMDGPU::V_SUBBREV_U32_e64_vi: 3480 3481 case AMDGPU::V_SUBREV_U32_e32: 3482 case AMDGPU::V_SUBREV_U32_e64: 3483 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3484 case AMDGPU::V_SUBREV_U32_e32_vi: 3485 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3486 case AMDGPU::V_SUBREV_U32_e64_vi: 3487 3488 case AMDGPU::V_SUBREV_F16_e32: 3489 case AMDGPU::V_SUBREV_F16_e64: 3490 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3491 case AMDGPU::V_SUBREV_F16_e32_vi: 3492 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3493 case AMDGPU::V_SUBREV_F16_e64_vi: 3494 3495 case AMDGPU::V_SUBREV_U16_e32: 3496 case AMDGPU::V_SUBREV_U16_e64: 3497 case AMDGPU::V_SUBREV_U16_e32_vi: 3498 case AMDGPU::V_SUBREV_U16_e64_vi: 3499 3500 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3501 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3502 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3503 3504 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3505 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3506 3507 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3508 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3509 3510 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3511 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3512 3513 case AMDGPU::V_LSHRREV_B32_e32: 3514 case AMDGPU::V_LSHRREV_B32_e64: 3515 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3516 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3517 case AMDGPU::V_LSHRREV_B32_e32_vi: 3518 case AMDGPU::V_LSHRREV_B32_e64_vi: 3519 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3520 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3521 3522 case AMDGPU::V_ASHRREV_I32_e32: 3523 case AMDGPU::V_ASHRREV_I32_e64: 3524 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3525 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3526 case AMDGPU::V_ASHRREV_I32_e32_vi: 3527 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3528 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3529 case AMDGPU::V_ASHRREV_I32_e64_vi: 3530 3531 case AMDGPU::V_LSHLREV_B32_e32: 3532 case AMDGPU::V_LSHLREV_B32_e64: 3533 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3534 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3535 case AMDGPU::V_LSHLREV_B32_e32_vi: 3536 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3537 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3538 case AMDGPU::V_LSHLREV_B32_e64_vi: 3539 3540 case AMDGPU::V_LSHLREV_B16_e32: 3541 case AMDGPU::V_LSHLREV_B16_e64: 3542 case AMDGPU::V_LSHLREV_B16_e32_vi: 3543 case AMDGPU::V_LSHLREV_B16_e64_vi: 3544 case AMDGPU::V_LSHLREV_B16_gfx10: 3545 3546 case AMDGPU::V_LSHRREV_B16_e32: 3547 case AMDGPU::V_LSHRREV_B16_e64: 3548 case AMDGPU::V_LSHRREV_B16_e32_vi: 3549 case AMDGPU::V_LSHRREV_B16_e64_vi: 3550 case AMDGPU::V_LSHRREV_B16_gfx10: 3551 3552 case AMDGPU::V_ASHRREV_I16_e32: 3553 case AMDGPU::V_ASHRREV_I16_e64: 3554 case AMDGPU::V_ASHRREV_I16_e32_vi: 3555 case AMDGPU::V_ASHRREV_I16_e64_vi: 3556 case AMDGPU::V_ASHRREV_I16_gfx10: 3557 3558 case AMDGPU::V_LSHLREV_B64: 3559 case AMDGPU::V_LSHLREV_B64_gfx10: 3560 case AMDGPU::V_LSHLREV_B64_vi: 3561 3562 case AMDGPU::V_LSHRREV_B64: 3563 case AMDGPU::V_LSHRREV_B64_gfx10: 3564 case AMDGPU::V_LSHRREV_B64_vi: 3565 3566 case AMDGPU::V_ASHRREV_I64: 3567 case AMDGPU::V_ASHRREV_I64_gfx10: 3568 case AMDGPU::V_ASHRREV_I64_vi: 3569 3570 case AMDGPU::V_PK_LSHLREV_B16: 3571 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3572 case AMDGPU::V_PK_LSHLREV_B16_vi: 3573 3574 case AMDGPU::V_PK_LSHRREV_B16: 3575 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3576 case AMDGPU::V_PK_LSHRREV_B16_vi: 3577 case AMDGPU::V_PK_ASHRREV_I16: 3578 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3579 case AMDGPU::V_PK_ASHRREV_I16_vi: 3580 return true; 3581 default: 3582 return false; 3583 } 3584 } 3585 3586 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3587 3588 using namespace SIInstrFlags; 3589 const unsigned Opcode = Inst.getOpcode(); 3590 const MCInstrDesc &Desc = MII.get(Opcode); 3591 3592 // lds_direct register is defined so that it can be used 3593 // with 9-bit operands only. Ignore encodings which do not accept these. 3594 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3595 return true; 3596 3597 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3598 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3599 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3600 3601 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3602 3603 // lds_direct cannot be specified as either src1 or src2. 3604 for (int SrcIdx : SrcIndices) { 3605 if (SrcIdx == -1) break; 3606 const MCOperand &Src = Inst.getOperand(SrcIdx); 3607 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3608 return false; 3609 } 3610 } 3611 3612 if (Src0Idx == -1) 3613 return true; 3614 3615 const MCOperand &Src = Inst.getOperand(Src0Idx); 3616 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3617 return true; 3618 3619 // lds_direct is specified as src0. Check additional limitations. 3620 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3621 } 3622 3623 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3624 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3626 if (Op.isFlatOffset()) 3627 return Op.getStartLoc(); 3628 } 3629 return getLoc(); 3630 } 3631 3632 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3633 const OperandVector &Operands) { 3634 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3635 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3636 return true; 3637 3638 auto Opcode = Inst.getOpcode(); 3639 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3640 assert(OpNum != -1); 3641 3642 const auto &Op = Inst.getOperand(OpNum); 3643 if (!hasFlatOffsets() && Op.getImm() != 0) { 3644 Error(getFlatOffsetLoc(Operands), 3645 "flat offset modifier is not supported on this GPU"); 3646 return false; 3647 } 3648 3649 // For FLAT segment the offset must be positive; 3650 // MSB is ignored and forced to zero. 3651 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3652 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3653 if (!isIntN(OffsetSize, Op.getImm())) { 3654 Error(getFlatOffsetLoc(Operands), 3655 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3656 return false; 3657 } 3658 } else { 3659 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3660 if (!isUIntN(OffsetSize, Op.getImm())) { 3661 Error(getFlatOffsetLoc(Operands), 3662 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3663 return false; 3664 } 3665 } 3666 3667 return true; 3668 } 3669 3670 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3671 // Start with second operand because SMEM Offset cannot be dst or src0. 3672 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3673 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3674 if (Op.isSMEMOffset()) 3675 return Op.getStartLoc(); 3676 } 3677 return getLoc(); 3678 } 3679 3680 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3681 const OperandVector &Operands) { 3682 if (isCI() || isSI()) 3683 return true; 3684 3685 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3686 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3687 return true; 3688 3689 auto Opcode = Inst.getOpcode(); 3690 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3691 if (OpNum == -1) 3692 return true; 3693 3694 const auto &Op = Inst.getOperand(OpNum); 3695 if (!Op.isImm()) 3696 return true; 3697 3698 uint64_t Offset = Op.getImm(); 3699 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3700 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3701 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3702 return true; 3703 3704 Error(getSMEMOffsetLoc(Operands), 3705 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3706 "expected a 21-bit signed offset"); 3707 3708 return false; 3709 } 3710 3711 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3712 unsigned Opcode = Inst.getOpcode(); 3713 const MCInstrDesc &Desc = MII.get(Opcode); 3714 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3715 return true; 3716 3717 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3718 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3719 3720 const int OpIndices[] = { Src0Idx, Src1Idx }; 3721 3722 unsigned NumExprs = 0; 3723 unsigned NumLiterals = 0; 3724 uint32_t LiteralValue; 3725 3726 for (int OpIdx : OpIndices) { 3727 if (OpIdx == -1) break; 3728 3729 const MCOperand &MO = Inst.getOperand(OpIdx); 3730 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3731 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3732 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3733 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3734 if (NumLiterals == 0 || LiteralValue != Value) { 3735 LiteralValue = Value; 3736 ++NumLiterals; 3737 } 3738 } else if (MO.isExpr()) { 3739 ++NumExprs; 3740 } 3741 } 3742 } 3743 3744 return NumLiterals + NumExprs <= 1; 3745 } 3746 3747 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3748 const unsigned Opc = Inst.getOpcode(); 3749 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3750 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3751 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3752 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3753 3754 if (OpSel & ~3) 3755 return false; 3756 } 3757 return true; 3758 } 3759 3760 // Check if VCC register matches wavefront size 3761 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3762 auto FB = getFeatureBits(); 3763 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3764 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3765 } 3766 3767 // VOP3 literal is only allowed in GFX10+ and only one can be used 3768 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3769 const OperandVector &Operands) { 3770 unsigned Opcode = Inst.getOpcode(); 3771 const MCInstrDesc &Desc = MII.get(Opcode); 3772 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3773 return true; 3774 3775 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3776 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3777 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3778 3779 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3780 3781 unsigned NumExprs = 0; 3782 unsigned NumLiterals = 0; 3783 uint32_t LiteralValue; 3784 3785 for (int OpIdx : OpIndices) { 3786 if (OpIdx == -1) break; 3787 3788 const MCOperand &MO = Inst.getOperand(OpIdx); 3789 if (!MO.isImm() && !MO.isExpr()) 3790 continue; 3791 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3792 continue; 3793 3794 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3795 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3796 Error(getConstLoc(Operands), 3797 "inline constants are not allowed for this operand"); 3798 return false; 3799 } 3800 3801 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3802 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3803 if (NumLiterals == 0 || LiteralValue != Value) { 3804 LiteralValue = Value; 3805 ++NumLiterals; 3806 } 3807 } else if (MO.isExpr()) { 3808 ++NumExprs; 3809 } 3810 } 3811 NumLiterals += NumExprs; 3812 3813 if (!NumLiterals) 3814 return true; 3815 3816 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3817 Error(getLitLoc(Operands), "literal operands are not supported"); 3818 return false; 3819 } 3820 3821 if (NumLiterals > 1) { 3822 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3823 return false; 3824 } 3825 3826 return true; 3827 } 3828 3829 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3830 const OperandVector &Operands, 3831 const SMLoc &IDLoc) { 3832 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3833 AMDGPU::OpName::glc1); 3834 if (GLCPos != -1) { 3835 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3836 // in the asm string, and the default value means it is not present. 3837 if (Inst.getOperand(GLCPos).getImm() == -1) { 3838 Error(IDLoc, "instruction must use glc"); 3839 return false; 3840 } 3841 } 3842 3843 return true; 3844 } 3845 3846 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3847 const SMLoc &IDLoc, 3848 const OperandVector &Operands) { 3849 if (!validateLdsDirect(Inst)) { 3850 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3851 "invalid use of lds_direct"); 3852 return false; 3853 } 3854 if (!validateSOPLiteral(Inst)) { 3855 Error(getLitLoc(Operands), 3856 "only one literal operand is allowed"); 3857 return false; 3858 } 3859 if (!validateVOP3Literal(Inst, Operands)) { 3860 return false; 3861 } 3862 if (!validateConstantBusLimitations(Inst, Operands)) { 3863 return false; 3864 } 3865 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3866 return false; 3867 } 3868 if (!validateIntClampSupported(Inst)) { 3869 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3870 "integer clamping is not supported on this GPU"); 3871 return false; 3872 } 3873 if (!validateOpSel(Inst)) { 3874 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3875 "invalid op_sel operand"); 3876 return false; 3877 } 3878 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3879 if (!validateMIMGD16(Inst)) { 3880 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3881 "d16 modifier is not supported on this GPU"); 3882 return false; 3883 } 3884 if (!validateMIMGDim(Inst)) { 3885 Error(IDLoc, "dim modifier is required on this GPU"); 3886 return false; 3887 } 3888 if (!validateMIMGDataSize(Inst)) { 3889 Error(IDLoc, 3890 "image data size does not match dmask and tfe"); 3891 return false; 3892 } 3893 if (!validateMIMGAddrSize(Inst)) { 3894 Error(IDLoc, 3895 "image address size does not match dim and a16"); 3896 return false; 3897 } 3898 if (!validateMIMGAtomicDMask(Inst)) { 3899 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3900 "invalid atomic image dmask"); 3901 return false; 3902 } 3903 if (!validateMIMGGatherDMask(Inst)) { 3904 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3905 "invalid image_gather dmask: only one bit must be set"); 3906 return false; 3907 } 3908 if (!validateMovrels(Inst, Operands)) { 3909 return false; 3910 } 3911 if (!validateFlatOffset(Inst, Operands)) { 3912 return false; 3913 } 3914 if (!validateSMEMOffset(Inst, Operands)) { 3915 return false; 3916 } 3917 if (!validateMAIAccWrite(Inst, Operands)) { 3918 return false; 3919 } 3920 if (!validateDivScale(Inst)) { 3921 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3922 return false; 3923 } 3924 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3925 return false; 3926 } 3927 3928 return true; 3929 } 3930 3931 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3932 const FeatureBitset &FBS, 3933 unsigned VariantID = 0); 3934 3935 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3936 const FeatureBitset &AvailableFeatures, 3937 unsigned VariantID); 3938 3939 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3940 const FeatureBitset &FBS) { 3941 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3942 } 3943 3944 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3945 const FeatureBitset &FBS, 3946 ArrayRef<unsigned> Variants) { 3947 for (auto Variant : Variants) { 3948 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3949 return true; 3950 } 3951 3952 return false; 3953 } 3954 3955 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3956 const SMLoc &IDLoc) { 3957 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3958 3959 // Check if requested instruction variant is supported. 3960 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3961 return false; 3962 3963 // This instruction is not supported. 3964 // Clear any other pending errors because they are no longer relevant. 3965 getParser().clearPendingErrors(); 3966 3967 // Requested instruction variant is not supported. 3968 // Check if any other variants are supported. 3969 StringRef VariantName = getMatchedVariantName(); 3970 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3971 return Error(IDLoc, 3972 Twine(VariantName, 3973 " variant of this instruction is not supported")); 3974 } 3975 3976 // Finally check if this instruction is supported on any other GPU. 3977 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3978 return Error(IDLoc, "instruction not supported on this GPU"); 3979 } 3980 3981 // Instruction not supported on any GPU. Probably a typo. 3982 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3983 return Error(IDLoc, "invalid instruction" + Suggestion); 3984 } 3985 3986 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3987 OperandVector &Operands, 3988 MCStreamer &Out, 3989 uint64_t &ErrorInfo, 3990 bool MatchingInlineAsm) { 3991 MCInst Inst; 3992 unsigned Result = Match_Success; 3993 for (auto Variant : getMatchedVariants()) { 3994 uint64_t EI; 3995 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3996 Variant); 3997 // We order match statuses from least to most specific. We use most specific 3998 // status as resulting 3999 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4000 if ((R == Match_Success) || 4001 (R == Match_PreferE32) || 4002 (R == Match_MissingFeature && Result != Match_PreferE32) || 4003 (R == Match_InvalidOperand && Result != Match_MissingFeature 4004 && Result != Match_PreferE32) || 4005 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4006 && Result != Match_MissingFeature 4007 && Result != Match_PreferE32)) { 4008 Result = R; 4009 ErrorInfo = EI; 4010 } 4011 if (R == Match_Success) 4012 break; 4013 } 4014 4015 if (Result == Match_Success) { 4016 if (!validateInstruction(Inst, IDLoc, Operands)) { 4017 return true; 4018 } 4019 Inst.setLoc(IDLoc); 4020 Out.emitInstruction(Inst, getSTI()); 4021 return false; 4022 } 4023 4024 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4025 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4026 return true; 4027 } 4028 4029 switch (Result) { 4030 default: break; 4031 case Match_MissingFeature: 4032 // It has been verified that the specified instruction 4033 // mnemonic is valid. A match was found but it requires 4034 // features which are not supported on this GPU. 4035 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4036 4037 case Match_InvalidOperand: { 4038 SMLoc ErrorLoc = IDLoc; 4039 if (ErrorInfo != ~0ULL) { 4040 if (ErrorInfo >= Operands.size()) { 4041 return Error(IDLoc, "too few operands for instruction"); 4042 } 4043 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4044 if (ErrorLoc == SMLoc()) 4045 ErrorLoc = IDLoc; 4046 } 4047 return Error(ErrorLoc, "invalid operand for instruction"); 4048 } 4049 4050 case Match_PreferE32: 4051 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4052 "should be encoded as e32"); 4053 case Match_MnemonicFail: 4054 llvm_unreachable("Invalid instructions should have been handled already"); 4055 } 4056 llvm_unreachable("Implement any new match types added!"); 4057 } 4058 4059 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4060 int64_t Tmp = -1; 4061 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 4062 return true; 4063 } 4064 if (getParser().parseAbsoluteExpression(Tmp)) { 4065 return true; 4066 } 4067 Ret = static_cast<uint32_t>(Tmp); 4068 return false; 4069 } 4070 4071 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4072 uint32_t &Minor) { 4073 if (ParseAsAbsoluteExpression(Major)) 4074 return TokError("invalid major version"); 4075 4076 if (!trySkipToken(AsmToken::Comma)) 4077 return TokError("minor version number required, comma expected"); 4078 4079 if (ParseAsAbsoluteExpression(Minor)) 4080 return TokError("invalid minor version"); 4081 4082 return false; 4083 } 4084 4085 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4086 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4087 return TokError("directive only supported for amdgcn architecture"); 4088 4089 std::string Target; 4090 4091 SMLoc TargetStart = getTok().getLoc(); 4092 if (getParser().parseEscapedString(Target)) 4093 return true; 4094 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4095 4096 std::string ExpectedTarget; 4097 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4098 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4099 4100 if (Target != ExpectedTargetOS.str()) 4101 return getParser().Error(TargetRange.Start, "target must match options", 4102 TargetRange); 4103 4104 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4105 return false; 4106 } 4107 4108 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4109 return getParser().Error(Range.Start, "value out of range", Range); 4110 } 4111 4112 bool AMDGPUAsmParser::calculateGPRBlocks( 4113 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4114 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4115 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4116 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4117 // TODO(scott.linder): These calculations are duplicated from 4118 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4119 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4120 4121 unsigned NumVGPRs = NextFreeVGPR; 4122 unsigned NumSGPRs = NextFreeSGPR; 4123 4124 if (Version.Major >= 10) 4125 NumSGPRs = 0; 4126 else { 4127 unsigned MaxAddressableNumSGPRs = 4128 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4129 4130 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4131 NumSGPRs > MaxAddressableNumSGPRs) 4132 return OutOfRangeError(SGPRRange); 4133 4134 NumSGPRs += 4135 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4136 4137 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4138 NumSGPRs > MaxAddressableNumSGPRs) 4139 return OutOfRangeError(SGPRRange); 4140 4141 if (Features.test(FeatureSGPRInitBug)) 4142 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4143 } 4144 4145 VGPRBlocks = 4146 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4147 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4148 4149 return false; 4150 } 4151 4152 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4153 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4154 return TokError("directive only supported for amdgcn architecture"); 4155 4156 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4157 return TokError("directive only supported for amdhsa OS"); 4158 4159 StringRef KernelName; 4160 if (getParser().parseIdentifier(KernelName)) 4161 return true; 4162 4163 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4164 4165 StringSet<> Seen; 4166 4167 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4168 4169 SMRange VGPRRange; 4170 uint64_t NextFreeVGPR = 0; 4171 SMRange SGPRRange; 4172 uint64_t NextFreeSGPR = 0; 4173 unsigned UserSGPRCount = 0; 4174 bool ReserveVCC = true; 4175 bool ReserveFlatScr = true; 4176 bool ReserveXNACK = hasXNACK(); 4177 Optional<bool> EnableWavefrontSize32; 4178 4179 while (true) { 4180 while (trySkipToken(AsmToken::EndOfStatement)); 4181 4182 StringRef ID; 4183 SMRange IDRange = getTok().getLocRange(); 4184 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4185 return true; 4186 4187 if (ID == ".end_amdhsa_kernel") 4188 break; 4189 4190 if (Seen.find(ID) != Seen.end()) 4191 return TokError(".amdhsa_ directives cannot be repeated"); 4192 Seen.insert(ID); 4193 4194 SMLoc ValStart = getTok().getLoc(); 4195 int64_t IVal; 4196 if (getParser().parseAbsoluteExpression(IVal)) 4197 return true; 4198 SMLoc ValEnd = getTok().getLoc(); 4199 SMRange ValRange = SMRange(ValStart, ValEnd); 4200 4201 if (IVal < 0) 4202 return OutOfRangeError(ValRange); 4203 4204 uint64_t Val = IVal; 4205 4206 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4207 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4208 return OutOfRangeError(RANGE); \ 4209 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4210 4211 if (ID == ".amdhsa_group_segment_fixed_size") { 4212 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4213 return OutOfRangeError(ValRange); 4214 KD.group_segment_fixed_size = Val; 4215 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4216 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4217 return OutOfRangeError(ValRange); 4218 KD.private_segment_fixed_size = Val; 4219 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4220 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4221 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4222 Val, ValRange); 4223 if (Val) 4224 UserSGPRCount += 4; 4225 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4226 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4227 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4228 ValRange); 4229 if (Val) 4230 UserSGPRCount += 2; 4231 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4232 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4234 ValRange); 4235 if (Val) 4236 UserSGPRCount += 2; 4237 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4238 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4239 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4240 Val, ValRange); 4241 if (Val) 4242 UserSGPRCount += 2; 4243 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4244 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4245 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4246 ValRange); 4247 if (Val) 4248 UserSGPRCount += 2; 4249 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4250 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4252 ValRange); 4253 if (Val) 4254 UserSGPRCount += 2; 4255 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4256 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4257 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4258 Val, ValRange); 4259 if (Val) 4260 UserSGPRCount += 1; 4261 } else if (ID == ".amdhsa_wavefront_size32") { 4262 if (IVersion.Major < 10) 4263 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4264 IDRange); 4265 EnableWavefrontSize32 = Val; 4266 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4267 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4268 Val, ValRange); 4269 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4270 PARSE_BITS_ENTRY( 4271 KD.compute_pgm_rsrc2, 4272 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4273 ValRange); 4274 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4275 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4276 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4277 ValRange); 4278 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4279 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4280 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4281 ValRange); 4282 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4283 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4284 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4285 ValRange); 4286 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4287 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4288 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4289 ValRange); 4290 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4291 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4292 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4293 ValRange); 4294 } else if (ID == ".amdhsa_next_free_vgpr") { 4295 VGPRRange = ValRange; 4296 NextFreeVGPR = Val; 4297 } else if (ID == ".amdhsa_next_free_sgpr") { 4298 SGPRRange = ValRange; 4299 NextFreeSGPR = Val; 4300 } else if (ID == ".amdhsa_reserve_vcc") { 4301 if (!isUInt<1>(Val)) 4302 return OutOfRangeError(ValRange); 4303 ReserveVCC = Val; 4304 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4305 if (IVersion.Major < 7) 4306 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4307 IDRange); 4308 if (!isUInt<1>(Val)) 4309 return OutOfRangeError(ValRange); 4310 ReserveFlatScr = Val; 4311 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4312 if (IVersion.Major < 8) 4313 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4314 IDRange); 4315 if (!isUInt<1>(Val)) 4316 return OutOfRangeError(ValRange); 4317 ReserveXNACK = Val; 4318 } else if (ID == ".amdhsa_float_round_mode_32") { 4319 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4320 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4321 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4322 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4323 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4324 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4325 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4326 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4327 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4329 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4330 ValRange); 4331 } else if (ID == ".amdhsa_dx10_clamp") { 4332 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4333 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4334 } else if (ID == ".amdhsa_ieee_mode") { 4335 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4336 Val, ValRange); 4337 } else if (ID == ".amdhsa_fp16_overflow") { 4338 if (IVersion.Major < 9) 4339 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4340 IDRange); 4341 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4342 ValRange); 4343 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4344 if (IVersion.Major < 10) 4345 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4346 IDRange); 4347 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4348 ValRange); 4349 } else if (ID == ".amdhsa_memory_ordered") { 4350 if (IVersion.Major < 10) 4351 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4352 IDRange); 4353 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4354 ValRange); 4355 } else if (ID == ".amdhsa_forward_progress") { 4356 if (IVersion.Major < 10) 4357 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4358 IDRange); 4359 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4360 ValRange); 4361 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4362 PARSE_BITS_ENTRY( 4363 KD.compute_pgm_rsrc2, 4364 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4365 ValRange); 4366 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4367 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4369 Val, ValRange); 4370 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4371 PARSE_BITS_ENTRY( 4372 KD.compute_pgm_rsrc2, 4373 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4374 ValRange); 4375 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4376 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4377 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4378 Val, ValRange); 4379 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4380 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4381 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4382 Val, ValRange); 4383 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4384 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4385 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4386 Val, ValRange); 4387 } else if (ID == ".amdhsa_exception_int_div_zero") { 4388 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4389 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4390 Val, ValRange); 4391 } else { 4392 return getParser().Error(IDRange.Start, 4393 "unknown .amdhsa_kernel directive", IDRange); 4394 } 4395 4396 #undef PARSE_BITS_ENTRY 4397 } 4398 4399 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4400 return TokError(".amdhsa_next_free_vgpr directive is required"); 4401 4402 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4403 return TokError(".amdhsa_next_free_sgpr directive is required"); 4404 4405 unsigned VGPRBlocks; 4406 unsigned SGPRBlocks; 4407 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4408 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4409 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4410 SGPRBlocks)) 4411 return true; 4412 4413 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4414 VGPRBlocks)) 4415 return OutOfRangeError(VGPRRange); 4416 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4417 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4418 4419 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4420 SGPRBlocks)) 4421 return OutOfRangeError(SGPRRange); 4422 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4423 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4424 SGPRBlocks); 4425 4426 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4427 return TokError("too many user SGPRs enabled"); 4428 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4429 UserSGPRCount); 4430 4431 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4432 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4433 ReserveFlatScr, ReserveXNACK); 4434 return false; 4435 } 4436 4437 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4438 uint32_t Major; 4439 uint32_t Minor; 4440 4441 if (ParseDirectiveMajorMinor(Major, Minor)) 4442 return true; 4443 4444 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4445 return false; 4446 } 4447 4448 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4449 uint32_t Major; 4450 uint32_t Minor; 4451 uint32_t Stepping; 4452 StringRef VendorName; 4453 StringRef ArchName; 4454 4455 // If this directive has no arguments, then use the ISA version for the 4456 // targeted GPU. 4457 if (getLexer().is(AsmToken::EndOfStatement)) { 4458 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4459 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4460 ISA.Stepping, 4461 "AMD", "AMDGPU"); 4462 return false; 4463 } 4464 4465 if (ParseDirectiveMajorMinor(Major, Minor)) 4466 return true; 4467 4468 if (!trySkipToken(AsmToken::Comma)) 4469 return TokError("stepping version number required, comma expected"); 4470 4471 if (ParseAsAbsoluteExpression(Stepping)) 4472 return TokError("invalid stepping version"); 4473 4474 if (!trySkipToken(AsmToken::Comma)) 4475 return TokError("vendor name required, comma expected"); 4476 4477 if (!parseString(VendorName, "invalid vendor name")) 4478 return true; 4479 4480 if (!trySkipToken(AsmToken::Comma)) 4481 return TokError("arch name required, comma expected"); 4482 4483 if (!parseString(ArchName, "invalid arch name")) 4484 return true; 4485 4486 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4487 VendorName, ArchName); 4488 return false; 4489 } 4490 4491 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4492 amd_kernel_code_t &Header) { 4493 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4494 // assembly for backwards compatibility. 4495 if (ID == "max_scratch_backing_memory_byte_size") { 4496 Parser.eatToEndOfStatement(); 4497 return false; 4498 } 4499 4500 SmallString<40> ErrStr; 4501 raw_svector_ostream Err(ErrStr); 4502 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4503 return TokError(Err.str()); 4504 } 4505 Lex(); 4506 4507 if (ID == "enable_wavefront_size32") { 4508 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4509 if (!isGFX10Plus()) 4510 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4511 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4512 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4513 } else { 4514 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4515 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4516 } 4517 } 4518 4519 if (ID == "wavefront_size") { 4520 if (Header.wavefront_size == 5) { 4521 if (!isGFX10Plus()) 4522 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4523 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4524 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4525 } else if (Header.wavefront_size == 6) { 4526 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4527 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4528 } 4529 } 4530 4531 if (ID == "enable_wgp_mode") { 4532 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4533 !isGFX10Plus()) 4534 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4535 } 4536 4537 if (ID == "enable_mem_ordered") { 4538 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4539 !isGFX10Plus()) 4540 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4541 } 4542 4543 if (ID == "enable_fwd_progress") { 4544 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4545 !isGFX10Plus()) 4546 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4547 } 4548 4549 return false; 4550 } 4551 4552 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4553 amd_kernel_code_t Header; 4554 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4555 4556 while (true) { 4557 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4558 // will set the current token to EndOfStatement. 4559 while(trySkipToken(AsmToken::EndOfStatement)); 4560 4561 StringRef ID; 4562 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4563 return true; 4564 4565 if (ID == ".end_amd_kernel_code_t") 4566 break; 4567 4568 if (ParseAMDKernelCodeTValue(ID, Header)) 4569 return true; 4570 } 4571 4572 getTargetStreamer().EmitAMDKernelCodeT(Header); 4573 4574 return false; 4575 } 4576 4577 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4578 if (getLexer().isNot(AsmToken::Identifier)) 4579 return TokError("expected symbol name"); 4580 4581 StringRef KernelName = Parser.getTok().getString(); 4582 4583 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4584 ELF::STT_AMDGPU_HSA_KERNEL); 4585 Lex(); 4586 4587 KernelScope.initialize(getContext()); 4588 return false; 4589 } 4590 4591 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4592 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4593 return Error(getParser().getTok().getLoc(), 4594 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4595 "architectures"); 4596 } 4597 4598 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4599 4600 std::string ISAVersionStringFromSTI; 4601 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4602 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4603 4604 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4605 return Error(getParser().getTok().getLoc(), 4606 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4607 "arguments specified through the command line"); 4608 } 4609 4610 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4611 Lex(); 4612 4613 return false; 4614 } 4615 4616 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4617 const char *AssemblerDirectiveBegin; 4618 const char *AssemblerDirectiveEnd; 4619 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4620 isHsaAbiVersion3(&getSTI()) 4621 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4622 HSAMD::V3::AssemblerDirectiveEnd) 4623 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4624 HSAMD::AssemblerDirectiveEnd); 4625 4626 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4627 return Error(getParser().getTok().getLoc(), 4628 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4629 "not available on non-amdhsa OSes")).str()); 4630 } 4631 4632 std::string HSAMetadataString; 4633 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4634 HSAMetadataString)) 4635 return true; 4636 4637 if (isHsaAbiVersion3(&getSTI())) { 4638 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4639 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4640 } else { 4641 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4642 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4643 } 4644 4645 return false; 4646 } 4647 4648 /// Common code to parse out a block of text (typically YAML) between start and 4649 /// end directives. 4650 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4651 const char *AssemblerDirectiveEnd, 4652 std::string &CollectString) { 4653 4654 raw_string_ostream CollectStream(CollectString); 4655 4656 getLexer().setSkipSpace(false); 4657 4658 bool FoundEnd = false; 4659 while (!getLexer().is(AsmToken::Eof)) { 4660 while (getLexer().is(AsmToken::Space)) { 4661 CollectStream << getLexer().getTok().getString(); 4662 Lex(); 4663 } 4664 4665 if (trySkipId(AssemblerDirectiveEnd)) { 4666 FoundEnd = true; 4667 break; 4668 } 4669 4670 CollectStream << Parser.parseStringToEndOfStatement() 4671 << getContext().getAsmInfo()->getSeparatorString(); 4672 4673 Parser.eatToEndOfStatement(); 4674 } 4675 4676 getLexer().setSkipSpace(true); 4677 4678 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4679 return TokError(Twine("expected directive ") + 4680 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4681 } 4682 4683 CollectStream.flush(); 4684 return false; 4685 } 4686 4687 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4688 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4689 std::string String; 4690 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4691 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4692 return true; 4693 4694 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4695 if (!PALMetadata->setFromString(String)) 4696 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4697 return false; 4698 } 4699 4700 /// Parse the assembler directive for old linear-format PAL metadata. 4701 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4702 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4703 return Error(getParser().getTok().getLoc(), 4704 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4705 "not available on non-amdpal OSes")).str()); 4706 } 4707 4708 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4709 PALMetadata->setLegacy(); 4710 for (;;) { 4711 uint32_t Key, Value; 4712 if (ParseAsAbsoluteExpression(Key)) { 4713 return TokError(Twine("invalid value in ") + 4714 Twine(PALMD::AssemblerDirective)); 4715 } 4716 if (!trySkipToken(AsmToken::Comma)) { 4717 return TokError(Twine("expected an even number of values in ") + 4718 Twine(PALMD::AssemblerDirective)); 4719 } 4720 if (ParseAsAbsoluteExpression(Value)) { 4721 return TokError(Twine("invalid value in ") + 4722 Twine(PALMD::AssemblerDirective)); 4723 } 4724 PALMetadata->setRegister(Key, Value); 4725 if (!trySkipToken(AsmToken::Comma)) 4726 break; 4727 } 4728 return false; 4729 } 4730 4731 /// ParseDirectiveAMDGPULDS 4732 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4733 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4734 if (getParser().checkForValidSection()) 4735 return true; 4736 4737 StringRef Name; 4738 SMLoc NameLoc = getLexer().getLoc(); 4739 if (getParser().parseIdentifier(Name)) 4740 return TokError("expected identifier in directive"); 4741 4742 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4743 if (parseToken(AsmToken::Comma, "expected ','")) 4744 return true; 4745 4746 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4747 4748 int64_t Size; 4749 SMLoc SizeLoc = getLexer().getLoc(); 4750 if (getParser().parseAbsoluteExpression(Size)) 4751 return true; 4752 if (Size < 0) 4753 return Error(SizeLoc, "size must be non-negative"); 4754 if (Size > LocalMemorySize) 4755 return Error(SizeLoc, "size is too large"); 4756 4757 int64_t Alignment = 4; 4758 if (trySkipToken(AsmToken::Comma)) { 4759 SMLoc AlignLoc = getLexer().getLoc(); 4760 if (getParser().parseAbsoluteExpression(Alignment)) 4761 return true; 4762 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4763 return Error(AlignLoc, "alignment must be a power of two"); 4764 4765 // Alignment larger than the size of LDS is possible in theory, as long 4766 // as the linker manages to place to symbol at address 0, but we do want 4767 // to make sure the alignment fits nicely into a 32-bit integer. 4768 if (Alignment >= 1u << 31) 4769 return Error(AlignLoc, "alignment is too large"); 4770 } 4771 4772 if (parseToken(AsmToken::EndOfStatement, 4773 "unexpected token in '.amdgpu_lds' directive")) 4774 return true; 4775 4776 Symbol->redefineIfPossible(); 4777 if (!Symbol->isUndefined()) 4778 return Error(NameLoc, "invalid symbol redefinition"); 4779 4780 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4781 return false; 4782 } 4783 4784 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4785 StringRef IDVal = DirectiveID.getString(); 4786 4787 if (isHsaAbiVersion3(&getSTI())) { 4788 if (IDVal == ".amdgcn_target") 4789 return ParseDirectiveAMDGCNTarget(); 4790 4791 if (IDVal == ".amdhsa_kernel") 4792 return ParseDirectiveAMDHSAKernel(); 4793 4794 // TODO: Restructure/combine with PAL metadata directive. 4795 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4796 return ParseDirectiveHSAMetadata(); 4797 } else { 4798 if (IDVal == ".hsa_code_object_version") 4799 return ParseDirectiveHSACodeObjectVersion(); 4800 4801 if (IDVal == ".hsa_code_object_isa") 4802 return ParseDirectiveHSACodeObjectISA(); 4803 4804 if (IDVal == ".amd_kernel_code_t") 4805 return ParseDirectiveAMDKernelCodeT(); 4806 4807 if (IDVal == ".amdgpu_hsa_kernel") 4808 return ParseDirectiveAMDGPUHsaKernel(); 4809 4810 if (IDVal == ".amd_amdgpu_isa") 4811 return ParseDirectiveISAVersion(); 4812 4813 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4814 return ParseDirectiveHSAMetadata(); 4815 } 4816 4817 if (IDVal == ".amdgpu_lds") 4818 return ParseDirectiveAMDGPULDS(); 4819 4820 if (IDVal == PALMD::AssemblerDirectiveBegin) 4821 return ParseDirectivePALMetadataBegin(); 4822 4823 if (IDVal == PALMD::AssemblerDirective) 4824 return ParseDirectivePALMetadata(); 4825 4826 return true; 4827 } 4828 4829 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4830 unsigned RegNo) const { 4831 4832 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4833 R.isValid(); ++R) { 4834 if (*R == RegNo) 4835 return isGFX9Plus(); 4836 } 4837 4838 // GFX10 has 2 more SGPRs 104 and 105. 4839 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4840 R.isValid(); ++R) { 4841 if (*R == RegNo) 4842 return hasSGPR104_SGPR105(); 4843 } 4844 4845 switch (RegNo) { 4846 case AMDGPU::SRC_SHARED_BASE: 4847 case AMDGPU::SRC_SHARED_LIMIT: 4848 case AMDGPU::SRC_PRIVATE_BASE: 4849 case AMDGPU::SRC_PRIVATE_LIMIT: 4850 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4851 return isGFX9Plus(); 4852 case AMDGPU::TBA: 4853 case AMDGPU::TBA_LO: 4854 case AMDGPU::TBA_HI: 4855 case AMDGPU::TMA: 4856 case AMDGPU::TMA_LO: 4857 case AMDGPU::TMA_HI: 4858 return !isGFX9Plus(); 4859 case AMDGPU::XNACK_MASK: 4860 case AMDGPU::XNACK_MASK_LO: 4861 case AMDGPU::XNACK_MASK_HI: 4862 return (isVI() || isGFX9()) && hasXNACK(); 4863 case AMDGPU::SGPR_NULL: 4864 return isGFX10Plus(); 4865 default: 4866 break; 4867 } 4868 4869 if (isCI()) 4870 return true; 4871 4872 if (isSI() || isGFX10Plus()) { 4873 // No flat_scr on SI. 4874 // On GFX10 flat scratch is not a valid register operand and can only be 4875 // accessed with s_setreg/s_getreg. 4876 switch (RegNo) { 4877 case AMDGPU::FLAT_SCR: 4878 case AMDGPU::FLAT_SCR_LO: 4879 case AMDGPU::FLAT_SCR_HI: 4880 return false; 4881 default: 4882 return true; 4883 } 4884 } 4885 4886 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4887 // SI/CI have. 4888 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4889 R.isValid(); ++R) { 4890 if (*R == RegNo) 4891 return hasSGPR102_SGPR103(); 4892 } 4893 4894 return true; 4895 } 4896 4897 OperandMatchResultTy 4898 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4899 OperandMode Mode) { 4900 // Try to parse with a custom parser 4901 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4902 4903 // If we successfully parsed the operand or if there as an error parsing, 4904 // we are done. 4905 // 4906 // If we are parsing after we reach EndOfStatement then this means we 4907 // are appending default values to the Operands list. This is only done 4908 // by custom parser, so we shouldn't continue on to the generic parsing. 4909 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4910 getLexer().is(AsmToken::EndOfStatement)) 4911 return ResTy; 4912 4913 SMLoc RBraceLoc; 4914 SMLoc LBraceLoc = getLoc(); 4915 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 4916 unsigned Prefix = Operands.size(); 4917 4918 for (;;) { 4919 ResTy = parseReg(Operands); 4920 if (ResTy != MatchOperand_Success) 4921 return ResTy; 4922 4923 RBraceLoc = getLoc(); 4924 if (trySkipToken(AsmToken::RBrac)) 4925 break; 4926 4927 if (!trySkipToken(AsmToken::Comma)) 4928 return MatchOperand_ParseFail; 4929 } 4930 4931 if (Operands.size() - Prefix > 1) { 4932 Operands.insert(Operands.begin() + Prefix, 4933 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4934 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 4935 } 4936 4937 return MatchOperand_Success; 4938 } 4939 4940 return parseRegOrImm(Operands); 4941 } 4942 4943 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4944 // Clear any forced encodings from the previous instruction. 4945 setForcedEncodingSize(0); 4946 setForcedDPP(false); 4947 setForcedSDWA(false); 4948 4949 if (Name.endswith("_e64")) { 4950 setForcedEncodingSize(64); 4951 return Name.substr(0, Name.size() - 4); 4952 } else if (Name.endswith("_e32")) { 4953 setForcedEncodingSize(32); 4954 return Name.substr(0, Name.size() - 4); 4955 } else if (Name.endswith("_dpp")) { 4956 setForcedDPP(true); 4957 return Name.substr(0, Name.size() - 4); 4958 } else if (Name.endswith("_sdwa")) { 4959 setForcedSDWA(true); 4960 return Name.substr(0, Name.size() - 5); 4961 } 4962 return Name; 4963 } 4964 4965 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4966 StringRef Name, 4967 SMLoc NameLoc, OperandVector &Operands) { 4968 // Add the instruction mnemonic 4969 Name = parseMnemonicSuffix(Name); 4970 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4971 4972 bool IsMIMG = Name.startswith("image_"); 4973 4974 while (!trySkipToken(AsmToken::EndOfStatement)) { 4975 OperandMode Mode = OperandMode_Default; 4976 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 4977 Mode = OperandMode_NSA; 4978 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4979 4980 // Eat the comma or space if there is one. 4981 trySkipToken(AsmToken::Comma); 4982 4983 if (Res != MatchOperand_Success) { 4984 checkUnsupportedInstruction(Name, NameLoc); 4985 if (!Parser.hasPendingError()) { 4986 // FIXME: use real operand location rather than the current location. 4987 StringRef Msg = 4988 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4989 "not a valid operand."; 4990 Error(getLexer().getLoc(), Msg); 4991 } 4992 while (!trySkipToken(AsmToken::EndOfStatement)) { 4993 Parser.Lex(); 4994 } 4995 return true; 4996 } 4997 } 4998 4999 return false; 5000 } 5001 5002 //===----------------------------------------------------------------------===// 5003 // Utility functions 5004 //===----------------------------------------------------------------------===// 5005 5006 OperandMatchResultTy 5007 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5008 5009 if (!trySkipId(Prefix, AsmToken::Colon)) 5010 return MatchOperand_NoMatch; 5011 5012 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5013 } 5014 5015 OperandMatchResultTy 5016 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5017 AMDGPUOperand::ImmTy ImmTy, 5018 bool (*ConvertResult)(int64_t&)) { 5019 SMLoc S = getLoc(); 5020 int64_t Value = 0; 5021 5022 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5023 if (Res != MatchOperand_Success) 5024 return Res; 5025 5026 if (ConvertResult && !ConvertResult(Value)) { 5027 Error(S, "invalid " + StringRef(Prefix) + " value."); 5028 } 5029 5030 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5031 return MatchOperand_Success; 5032 } 5033 5034 OperandMatchResultTy 5035 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5036 OperandVector &Operands, 5037 AMDGPUOperand::ImmTy ImmTy, 5038 bool (*ConvertResult)(int64_t&)) { 5039 SMLoc S = getLoc(); 5040 if (!trySkipId(Prefix, AsmToken::Colon)) 5041 return MatchOperand_NoMatch; 5042 5043 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5044 return MatchOperand_ParseFail; 5045 5046 unsigned Val = 0; 5047 const unsigned MaxSize = 4; 5048 5049 // FIXME: How to verify the number of elements matches the number of src 5050 // operands? 5051 for (int I = 0; ; ++I) { 5052 int64_t Op; 5053 SMLoc Loc = getLoc(); 5054 if (!parseExpr(Op)) 5055 return MatchOperand_ParseFail; 5056 5057 if (Op != 0 && Op != 1) { 5058 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5059 return MatchOperand_ParseFail; 5060 } 5061 5062 Val |= (Op << I); 5063 5064 if (trySkipToken(AsmToken::RBrac)) 5065 break; 5066 5067 if (I + 1 == MaxSize) { 5068 Error(getLoc(), "expected a closing square bracket"); 5069 return MatchOperand_ParseFail; 5070 } 5071 5072 if (!skipToken(AsmToken::Comma, "expected a comma")) 5073 return MatchOperand_ParseFail; 5074 } 5075 5076 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5077 return MatchOperand_Success; 5078 } 5079 5080 OperandMatchResultTy 5081 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5082 AMDGPUOperand::ImmTy ImmTy) { 5083 int64_t Bit = 0; 5084 SMLoc S = Parser.getTok().getLoc(); 5085 5086 // We are at the end of the statement, and this is a default argument, so 5087 // use a default value. 5088 if (getLexer().isNot(AsmToken::EndOfStatement)) { 5089 switch(getLexer().getKind()) { 5090 case AsmToken::Identifier: { 5091 StringRef Tok = Parser.getTok().getString(); 5092 if (Tok == Name) { 5093 if (Tok == "r128" && !hasMIMG_R128()) 5094 Error(S, "r128 modifier is not supported on this GPU"); 5095 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5096 Error(S, "a16 modifier is not supported on this GPU"); 5097 Bit = 1; 5098 Parser.Lex(); 5099 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5100 Bit = 0; 5101 Parser.Lex(); 5102 } else { 5103 return MatchOperand_NoMatch; 5104 } 5105 break; 5106 } 5107 default: 5108 return MatchOperand_NoMatch; 5109 } 5110 } 5111 5112 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) 5113 return MatchOperand_ParseFail; 5114 5115 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5116 ImmTy = AMDGPUOperand::ImmTyR128A16; 5117 5118 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5119 return MatchOperand_Success; 5120 } 5121 5122 static void addOptionalImmOperand( 5123 MCInst& Inst, const OperandVector& Operands, 5124 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5125 AMDGPUOperand::ImmTy ImmT, 5126 int64_t Default = 0) { 5127 auto i = OptionalIdx.find(ImmT); 5128 if (i != OptionalIdx.end()) { 5129 unsigned Idx = i->second; 5130 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5131 } else { 5132 Inst.addOperand(MCOperand::createImm(Default)); 5133 } 5134 } 5135 5136 OperandMatchResultTy 5137 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5138 if (!trySkipId(Prefix)) 5139 return MatchOperand_NoMatch; 5140 5141 if (!trySkipToken(AsmToken::Colon)) 5142 return MatchOperand_ParseFail; 5143 5144 return parseId(Value) ? MatchOperand_Success : MatchOperand_ParseFail; 5145 } 5146 5147 //===----------------------------------------------------------------------===// 5148 // MTBUF format 5149 //===----------------------------------------------------------------------===// 5150 5151 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5152 int64_t MaxVal, 5153 int64_t &Fmt) { 5154 int64_t Val; 5155 SMLoc Loc = getLoc(); 5156 5157 auto Res = parseIntWithPrefix(Pref, Val); 5158 if (Res == MatchOperand_ParseFail) 5159 return false; 5160 if (Res == MatchOperand_NoMatch) 5161 return true; 5162 5163 if (Val < 0 || Val > MaxVal) { 5164 Error(Loc, Twine("out of range ", StringRef(Pref))); 5165 return false; 5166 } 5167 5168 Fmt = Val; 5169 return true; 5170 } 5171 5172 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5173 // values to live in a joint format operand in the MCInst encoding. 5174 OperandMatchResultTy 5175 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5176 using namespace llvm::AMDGPU::MTBUFFormat; 5177 5178 int64_t Dfmt = DFMT_UNDEF; 5179 int64_t Nfmt = NFMT_UNDEF; 5180 5181 // dfmt and nfmt can appear in either order, and each is optional. 5182 for (int I = 0; I < 2; ++I) { 5183 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5184 return MatchOperand_ParseFail; 5185 5186 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5187 return MatchOperand_ParseFail; 5188 } 5189 // Skip optional comma between dfmt/nfmt 5190 // but guard against 2 commas following each other. 5191 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5192 !peekToken().is(AsmToken::Comma)) { 5193 trySkipToken(AsmToken::Comma); 5194 } 5195 } 5196 5197 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5198 return MatchOperand_NoMatch; 5199 5200 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5201 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5202 5203 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5204 return MatchOperand_Success; 5205 } 5206 5207 OperandMatchResultTy 5208 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5209 using namespace llvm::AMDGPU::MTBUFFormat; 5210 5211 int64_t Fmt = UFMT_UNDEF; 5212 5213 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5214 return MatchOperand_ParseFail; 5215 5216 if (Fmt == UFMT_UNDEF) 5217 return MatchOperand_NoMatch; 5218 5219 Format = Fmt; 5220 return MatchOperand_Success; 5221 } 5222 5223 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5224 int64_t &Nfmt, 5225 StringRef FormatStr, 5226 SMLoc Loc) { 5227 using namespace llvm::AMDGPU::MTBUFFormat; 5228 int64_t Format; 5229 5230 Format = getDfmt(FormatStr); 5231 if (Format != DFMT_UNDEF) { 5232 Dfmt = Format; 5233 return true; 5234 } 5235 5236 Format = getNfmt(FormatStr, getSTI()); 5237 if (Format != NFMT_UNDEF) { 5238 Nfmt = Format; 5239 return true; 5240 } 5241 5242 Error(Loc, "unsupported format"); 5243 return false; 5244 } 5245 5246 OperandMatchResultTy 5247 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5248 SMLoc FormatLoc, 5249 int64_t &Format) { 5250 using namespace llvm::AMDGPU::MTBUFFormat; 5251 5252 int64_t Dfmt = DFMT_UNDEF; 5253 int64_t Nfmt = NFMT_UNDEF; 5254 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5255 return MatchOperand_ParseFail; 5256 5257 if (trySkipToken(AsmToken::Comma)) { 5258 StringRef Str; 5259 SMLoc Loc = getLoc(); 5260 if (!parseId(Str, "expected a format string") || 5261 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5262 return MatchOperand_ParseFail; 5263 } 5264 if (Dfmt == DFMT_UNDEF) { 5265 Error(Loc, "duplicate numeric format"); 5266 return MatchOperand_ParseFail; 5267 } else if (Nfmt == NFMT_UNDEF) { 5268 Error(Loc, "duplicate data format"); 5269 return MatchOperand_ParseFail; 5270 } 5271 } 5272 5273 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5274 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5275 5276 if (isGFX10Plus()) { 5277 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5278 if (Ufmt == UFMT_UNDEF) { 5279 Error(FormatLoc, "unsupported format"); 5280 return MatchOperand_ParseFail; 5281 } 5282 Format = Ufmt; 5283 } else { 5284 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5285 } 5286 5287 return MatchOperand_Success; 5288 } 5289 5290 OperandMatchResultTy 5291 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5292 SMLoc Loc, 5293 int64_t &Format) { 5294 using namespace llvm::AMDGPU::MTBUFFormat; 5295 5296 auto Id = getUnifiedFormat(FormatStr); 5297 if (Id == UFMT_UNDEF) 5298 return MatchOperand_NoMatch; 5299 5300 if (!isGFX10Plus()) { 5301 Error(Loc, "unified format is not supported on this GPU"); 5302 return MatchOperand_ParseFail; 5303 } 5304 5305 Format = Id; 5306 return MatchOperand_Success; 5307 } 5308 5309 OperandMatchResultTy 5310 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5311 using namespace llvm::AMDGPU::MTBUFFormat; 5312 SMLoc Loc = getLoc(); 5313 5314 if (!parseExpr(Format)) 5315 return MatchOperand_ParseFail; 5316 if (!isValidFormatEncoding(Format, getSTI())) { 5317 Error(Loc, "out of range format"); 5318 return MatchOperand_ParseFail; 5319 } 5320 5321 return MatchOperand_Success; 5322 } 5323 5324 OperandMatchResultTy 5325 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5326 using namespace llvm::AMDGPU::MTBUFFormat; 5327 5328 if (!trySkipId("format", AsmToken::Colon)) 5329 return MatchOperand_NoMatch; 5330 5331 if (trySkipToken(AsmToken::LBrac)) { 5332 StringRef FormatStr; 5333 SMLoc Loc = getLoc(); 5334 if (!parseId(FormatStr, "expected a format string")) 5335 return MatchOperand_ParseFail; 5336 5337 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5338 if (Res == MatchOperand_NoMatch) 5339 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5340 if (Res != MatchOperand_Success) 5341 return Res; 5342 5343 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5344 return MatchOperand_ParseFail; 5345 5346 return MatchOperand_Success; 5347 } 5348 5349 return parseNumericFormat(Format); 5350 } 5351 5352 OperandMatchResultTy 5353 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5354 using namespace llvm::AMDGPU::MTBUFFormat; 5355 5356 int64_t Format = getDefaultFormatEncoding(getSTI()); 5357 OperandMatchResultTy Res; 5358 SMLoc Loc = getLoc(); 5359 5360 // Parse legacy format syntax. 5361 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5362 if (Res == MatchOperand_ParseFail) 5363 return Res; 5364 5365 bool FormatFound = (Res == MatchOperand_Success); 5366 5367 Operands.push_back( 5368 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5369 5370 if (FormatFound) 5371 trySkipToken(AsmToken::Comma); 5372 5373 if (isToken(AsmToken::EndOfStatement)) { 5374 // We are expecting an soffset operand, 5375 // but let matcher handle the error. 5376 return MatchOperand_Success; 5377 } 5378 5379 // Parse soffset. 5380 Res = parseRegOrImm(Operands); 5381 if (Res != MatchOperand_Success) 5382 return Res; 5383 5384 trySkipToken(AsmToken::Comma); 5385 5386 if (!FormatFound) { 5387 Res = parseSymbolicOrNumericFormat(Format); 5388 if (Res == MatchOperand_ParseFail) 5389 return Res; 5390 if (Res == MatchOperand_Success) { 5391 auto Size = Operands.size(); 5392 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5393 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5394 Op.setImm(Format); 5395 } 5396 return MatchOperand_Success; 5397 } 5398 5399 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5400 Error(getLoc(), "duplicate format"); 5401 return MatchOperand_ParseFail; 5402 } 5403 return MatchOperand_Success; 5404 } 5405 5406 //===----------------------------------------------------------------------===// 5407 // ds 5408 //===----------------------------------------------------------------------===// 5409 5410 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5411 const OperandVector &Operands) { 5412 OptionalImmIndexMap OptionalIdx; 5413 5414 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5415 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5416 5417 // Add the register arguments 5418 if (Op.isReg()) { 5419 Op.addRegOperands(Inst, 1); 5420 continue; 5421 } 5422 5423 // Handle optional arguments 5424 OptionalIdx[Op.getImmTy()] = i; 5425 } 5426 5427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5430 5431 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5432 } 5433 5434 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5435 bool IsGdsHardcoded) { 5436 OptionalImmIndexMap OptionalIdx; 5437 5438 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5439 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5440 5441 // Add the register arguments 5442 if (Op.isReg()) { 5443 Op.addRegOperands(Inst, 1); 5444 continue; 5445 } 5446 5447 if (Op.isToken() && Op.getToken() == "gds") { 5448 IsGdsHardcoded = true; 5449 continue; 5450 } 5451 5452 // Handle optional arguments 5453 OptionalIdx[Op.getImmTy()] = i; 5454 } 5455 5456 AMDGPUOperand::ImmTy OffsetType = 5457 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5458 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5459 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5460 AMDGPUOperand::ImmTyOffset; 5461 5462 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5463 5464 if (!IsGdsHardcoded) { 5465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5466 } 5467 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5468 } 5469 5470 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5471 OptionalImmIndexMap OptionalIdx; 5472 5473 unsigned OperandIdx[4]; 5474 unsigned EnMask = 0; 5475 int SrcIdx = 0; 5476 5477 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5478 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5479 5480 // Add the register arguments 5481 if (Op.isReg()) { 5482 assert(SrcIdx < 4); 5483 OperandIdx[SrcIdx] = Inst.size(); 5484 Op.addRegOperands(Inst, 1); 5485 ++SrcIdx; 5486 continue; 5487 } 5488 5489 if (Op.isOff()) { 5490 assert(SrcIdx < 4); 5491 OperandIdx[SrcIdx] = Inst.size(); 5492 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5493 ++SrcIdx; 5494 continue; 5495 } 5496 5497 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5498 Op.addImmOperands(Inst, 1); 5499 continue; 5500 } 5501 5502 if (Op.isToken() && Op.getToken() == "done") 5503 continue; 5504 5505 // Handle optional arguments 5506 OptionalIdx[Op.getImmTy()] = i; 5507 } 5508 5509 assert(SrcIdx == 4); 5510 5511 bool Compr = false; 5512 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5513 Compr = true; 5514 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5515 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5516 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5517 } 5518 5519 for (auto i = 0; i < SrcIdx; ++i) { 5520 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5521 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5522 } 5523 } 5524 5525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5527 5528 Inst.addOperand(MCOperand::createImm(EnMask)); 5529 } 5530 5531 //===----------------------------------------------------------------------===// 5532 // s_waitcnt 5533 //===----------------------------------------------------------------------===// 5534 5535 static bool 5536 encodeCnt( 5537 const AMDGPU::IsaVersion ISA, 5538 int64_t &IntVal, 5539 int64_t CntVal, 5540 bool Saturate, 5541 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5542 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5543 { 5544 bool Failed = false; 5545 5546 IntVal = encode(ISA, IntVal, CntVal); 5547 if (CntVal != decode(ISA, IntVal)) { 5548 if (Saturate) { 5549 IntVal = encode(ISA, IntVal, -1); 5550 } else { 5551 Failed = true; 5552 } 5553 } 5554 return Failed; 5555 } 5556 5557 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5558 5559 SMLoc CntLoc = getLoc(); 5560 StringRef CntName = getTokenStr(); 5561 5562 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5563 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5564 return false; 5565 5566 int64_t CntVal; 5567 SMLoc ValLoc = getLoc(); 5568 if (!parseExpr(CntVal)) 5569 return false; 5570 5571 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5572 5573 bool Failed = true; 5574 bool Sat = CntName.endswith("_sat"); 5575 5576 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5577 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5578 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5579 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5580 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5581 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5582 } else { 5583 Error(CntLoc, "invalid counter name " + CntName); 5584 return false; 5585 } 5586 5587 if (Failed) { 5588 Error(ValLoc, "too large value for " + CntName); 5589 return false; 5590 } 5591 5592 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5593 return false; 5594 5595 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5596 if (isToken(AsmToken::EndOfStatement)) { 5597 Error(getLoc(), "expected a counter name"); 5598 return false; 5599 } 5600 } 5601 5602 return true; 5603 } 5604 5605 OperandMatchResultTy 5606 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5607 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5608 int64_t Waitcnt = getWaitcntBitMask(ISA); 5609 SMLoc S = getLoc(); 5610 5611 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5612 while (!isToken(AsmToken::EndOfStatement)) { 5613 if (!parseCnt(Waitcnt)) 5614 return MatchOperand_ParseFail; 5615 } 5616 } else { 5617 if (!parseExpr(Waitcnt)) 5618 return MatchOperand_ParseFail; 5619 } 5620 5621 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5622 return MatchOperand_Success; 5623 } 5624 5625 bool 5626 AMDGPUOperand::isSWaitCnt() const { 5627 return isImm(); 5628 } 5629 5630 //===----------------------------------------------------------------------===// 5631 // hwreg 5632 //===----------------------------------------------------------------------===// 5633 5634 bool 5635 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5636 OperandInfoTy &Offset, 5637 OperandInfoTy &Width) { 5638 using namespace llvm::AMDGPU::Hwreg; 5639 5640 // The register may be specified by name or using a numeric code 5641 HwReg.Loc = getLoc(); 5642 if (isToken(AsmToken::Identifier) && 5643 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5644 HwReg.IsSymbolic = true; 5645 lex(); // skip register name 5646 } else if (!parseExpr(HwReg.Id, "a register name")) { 5647 return false; 5648 } 5649 5650 if (trySkipToken(AsmToken::RParen)) 5651 return true; 5652 5653 // parse optional params 5654 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5655 return false; 5656 5657 Offset.Loc = getLoc(); 5658 if (!parseExpr(Offset.Id)) 5659 return false; 5660 5661 if (!skipToken(AsmToken::Comma, "expected a comma")) 5662 return false; 5663 5664 Width.Loc = getLoc(); 5665 return parseExpr(Width.Id) && 5666 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5667 } 5668 5669 bool 5670 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5671 const OperandInfoTy &Offset, 5672 const OperandInfoTy &Width) { 5673 5674 using namespace llvm::AMDGPU::Hwreg; 5675 5676 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5677 Error(HwReg.Loc, 5678 "specified hardware register is not supported on this GPU"); 5679 return false; 5680 } 5681 if (!isValidHwreg(HwReg.Id)) { 5682 Error(HwReg.Loc, 5683 "invalid code of hardware register: only 6-bit values are legal"); 5684 return false; 5685 } 5686 if (!isValidHwregOffset(Offset.Id)) { 5687 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5688 return false; 5689 } 5690 if (!isValidHwregWidth(Width.Id)) { 5691 Error(Width.Loc, 5692 "invalid bitfield width: only values from 1 to 32 are legal"); 5693 return false; 5694 } 5695 return true; 5696 } 5697 5698 OperandMatchResultTy 5699 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5700 using namespace llvm::AMDGPU::Hwreg; 5701 5702 int64_t ImmVal = 0; 5703 SMLoc Loc = getLoc(); 5704 5705 if (trySkipId("hwreg", AsmToken::LParen)) { 5706 OperandInfoTy HwReg(ID_UNKNOWN_); 5707 OperandInfoTy Offset(OFFSET_DEFAULT_); 5708 OperandInfoTy Width(WIDTH_DEFAULT_); 5709 if (parseHwregBody(HwReg, Offset, Width) && 5710 validateHwreg(HwReg, Offset, Width)) { 5711 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5712 } else { 5713 return MatchOperand_ParseFail; 5714 } 5715 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5716 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5717 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5718 return MatchOperand_ParseFail; 5719 } 5720 } else { 5721 return MatchOperand_ParseFail; 5722 } 5723 5724 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5725 return MatchOperand_Success; 5726 } 5727 5728 bool AMDGPUOperand::isHwreg() const { 5729 return isImmTy(ImmTyHwreg); 5730 } 5731 5732 //===----------------------------------------------------------------------===// 5733 // sendmsg 5734 //===----------------------------------------------------------------------===// 5735 5736 bool 5737 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5738 OperandInfoTy &Op, 5739 OperandInfoTy &Stream) { 5740 using namespace llvm::AMDGPU::SendMsg; 5741 5742 Msg.Loc = getLoc(); 5743 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5744 Msg.IsSymbolic = true; 5745 lex(); // skip message name 5746 } else if (!parseExpr(Msg.Id, "a message name")) { 5747 return false; 5748 } 5749 5750 if (trySkipToken(AsmToken::Comma)) { 5751 Op.IsDefined = true; 5752 Op.Loc = getLoc(); 5753 if (isToken(AsmToken::Identifier) && 5754 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5755 lex(); // skip operation name 5756 } else if (!parseExpr(Op.Id, "an operation name")) { 5757 return false; 5758 } 5759 5760 if (trySkipToken(AsmToken::Comma)) { 5761 Stream.IsDefined = true; 5762 Stream.Loc = getLoc(); 5763 if (!parseExpr(Stream.Id)) 5764 return false; 5765 } 5766 } 5767 5768 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5769 } 5770 5771 bool 5772 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5773 const OperandInfoTy &Op, 5774 const OperandInfoTy &Stream) { 5775 using namespace llvm::AMDGPU::SendMsg; 5776 5777 // Validation strictness depends on whether message is specified 5778 // in a symbolc or in a numeric form. In the latter case 5779 // only encoding possibility is checked. 5780 bool Strict = Msg.IsSymbolic; 5781 5782 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5783 Error(Msg.Loc, "invalid message id"); 5784 return false; 5785 } 5786 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5787 if (Op.IsDefined) { 5788 Error(Op.Loc, "message does not support operations"); 5789 } else { 5790 Error(Msg.Loc, "missing message operation"); 5791 } 5792 return false; 5793 } 5794 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5795 Error(Op.Loc, "invalid operation id"); 5796 return false; 5797 } 5798 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5799 Error(Stream.Loc, "message operation does not support streams"); 5800 return false; 5801 } 5802 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5803 Error(Stream.Loc, "invalid message stream id"); 5804 return false; 5805 } 5806 return true; 5807 } 5808 5809 OperandMatchResultTy 5810 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5811 using namespace llvm::AMDGPU::SendMsg; 5812 5813 int64_t ImmVal = 0; 5814 SMLoc Loc = getLoc(); 5815 5816 if (trySkipId("sendmsg", AsmToken::LParen)) { 5817 OperandInfoTy Msg(ID_UNKNOWN_); 5818 OperandInfoTy Op(OP_NONE_); 5819 OperandInfoTy Stream(STREAM_ID_NONE_); 5820 if (parseSendMsgBody(Msg, Op, Stream) && 5821 validateSendMsg(Msg, Op, Stream)) { 5822 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5823 } else { 5824 return MatchOperand_ParseFail; 5825 } 5826 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5827 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5828 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5829 return MatchOperand_ParseFail; 5830 } 5831 } else { 5832 return MatchOperand_ParseFail; 5833 } 5834 5835 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5836 return MatchOperand_Success; 5837 } 5838 5839 bool AMDGPUOperand::isSendMsg() const { 5840 return isImmTy(ImmTySendMsg); 5841 } 5842 5843 //===----------------------------------------------------------------------===// 5844 // v_interp 5845 //===----------------------------------------------------------------------===// 5846 5847 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5848 if (getLexer().getKind() != AsmToken::Identifier) 5849 return MatchOperand_NoMatch; 5850 5851 StringRef Str = Parser.getTok().getString(); 5852 int Slot = StringSwitch<int>(Str) 5853 .Case("p10", 0) 5854 .Case("p20", 1) 5855 .Case("p0", 2) 5856 .Default(-1); 5857 5858 SMLoc S = Parser.getTok().getLoc(); 5859 if (Slot == -1) 5860 return MatchOperand_ParseFail; 5861 5862 Parser.Lex(); 5863 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5864 AMDGPUOperand::ImmTyInterpSlot)); 5865 return MatchOperand_Success; 5866 } 5867 5868 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5869 if (getLexer().getKind() != AsmToken::Identifier) 5870 return MatchOperand_NoMatch; 5871 5872 StringRef Str = Parser.getTok().getString(); 5873 if (!Str.startswith("attr")) 5874 return MatchOperand_NoMatch; 5875 5876 StringRef Chan = Str.take_back(2); 5877 int AttrChan = StringSwitch<int>(Chan) 5878 .Case(".x", 0) 5879 .Case(".y", 1) 5880 .Case(".z", 2) 5881 .Case(".w", 3) 5882 .Default(-1); 5883 if (AttrChan == -1) 5884 return MatchOperand_ParseFail; 5885 5886 Str = Str.drop_back(2).drop_front(4); 5887 5888 uint8_t Attr; 5889 if (Str.getAsInteger(10, Attr)) 5890 return MatchOperand_ParseFail; 5891 5892 SMLoc S = Parser.getTok().getLoc(); 5893 Parser.Lex(); 5894 if (Attr > 63) { 5895 Error(S, "out of bounds attr"); 5896 return MatchOperand_ParseFail; 5897 } 5898 5899 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5900 5901 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5902 AMDGPUOperand::ImmTyInterpAttr)); 5903 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5904 AMDGPUOperand::ImmTyAttrChan)); 5905 return MatchOperand_Success; 5906 } 5907 5908 //===----------------------------------------------------------------------===// 5909 // exp 5910 //===----------------------------------------------------------------------===// 5911 5912 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5913 uint8_t &Val) { 5914 if (Str == "null") { 5915 Val = Exp::ET_NULL; 5916 return MatchOperand_Success; 5917 } 5918 5919 if (Str.startswith("mrt")) { 5920 Str = Str.drop_front(3); 5921 if (Str == "z") { // == mrtz 5922 Val = Exp::ET_MRTZ; 5923 return MatchOperand_Success; 5924 } 5925 5926 if (Str.getAsInteger(10, Val)) 5927 return MatchOperand_ParseFail; 5928 5929 if (Val > Exp::ET_MRT7) 5930 return MatchOperand_ParseFail; 5931 5932 return MatchOperand_Success; 5933 } 5934 5935 if (Str.startswith("pos")) { 5936 Str = Str.drop_front(3); 5937 if (Str.getAsInteger(10, Val)) 5938 return MatchOperand_ParseFail; 5939 5940 if (Val > (isGFX10Plus() ? 4 : 3)) 5941 return MatchOperand_ParseFail; 5942 5943 Val += Exp::ET_POS0; 5944 return MatchOperand_Success; 5945 } 5946 5947 if (isGFX10Plus() && Str == "prim") { 5948 Val = Exp::ET_PRIM; 5949 return MatchOperand_Success; 5950 } 5951 5952 if (Str.startswith("param")) { 5953 Str = Str.drop_front(5); 5954 if (Str.getAsInteger(10, Val)) 5955 return MatchOperand_ParseFail; 5956 5957 if (Val >= 32) 5958 return MatchOperand_ParseFail; 5959 5960 Val += Exp::ET_PARAM0; 5961 return MatchOperand_Success; 5962 } 5963 5964 return MatchOperand_ParseFail; 5965 } 5966 5967 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5968 if (!isToken(AsmToken::Identifier)) 5969 return MatchOperand_NoMatch; 5970 5971 SMLoc S = getLoc(); 5972 5973 uint8_t Val; 5974 auto Res = parseExpTgtImpl(getTokenStr(), Val); 5975 if (Res != MatchOperand_Success) { 5976 Error(S, "invalid exp target"); 5977 return Res; 5978 } 5979 5980 Parser.Lex(); 5981 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5982 AMDGPUOperand::ImmTyExpTgt)); 5983 return MatchOperand_Success; 5984 } 5985 5986 //===----------------------------------------------------------------------===// 5987 // parser helpers 5988 //===----------------------------------------------------------------------===// 5989 5990 bool 5991 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5992 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5993 } 5994 5995 bool 5996 AMDGPUAsmParser::isId(const StringRef Id) const { 5997 return isId(getToken(), Id); 5998 } 5999 6000 bool 6001 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6002 return getTokenKind() == Kind; 6003 } 6004 6005 bool 6006 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6007 if (isId(Id)) { 6008 lex(); 6009 return true; 6010 } 6011 return false; 6012 } 6013 6014 bool 6015 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6016 if (isId(Id) && peekToken().is(Kind)) { 6017 lex(); 6018 lex(); 6019 return true; 6020 } 6021 return false; 6022 } 6023 6024 bool 6025 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6026 if (isToken(Kind)) { 6027 lex(); 6028 return true; 6029 } 6030 return false; 6031 } 6032 6033 bool 6034 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6035 const StringRef ErrMsg) { 6036 if (!trySkipToken(Kind)) { 6037 Error(getLoc(), ErrMsg); 6038 return false; 6039 } 6040 return true; 6041 } 6042 6043 bool 6044 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6045 SMLoc S = getLoc(); 6046 6047 const MCExpr *Expr; 6048 if (Parser.parseExpression(Expr)) 6049 return false; 6050 6051 if (Expr->evaluateAsAbsolute(Imm)) 6052 return true; 6053 6054 if (Expected.empty()) { 6055 Error(S, "expected absolute expression"); 6056 } else { 6057 Error(S, Twine("expected ", Expected) + 6058 Twine(" or an absolute expression")); 6059 } 6060 return false; 6061 } 6062 6063 bool 6064 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6065 SMLoc S = getLoc(); 6066 6067 const MCExpr *Expr; 6068 if (Parser.parseExpression(Expr)) 6069 return false; 6070 6071 int64_t IntVal; 6072 if (Expr->evaluateAsAbsolute(IntVal)) { 6073 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6074 } else { 6075 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6076 } 6077 return true; 6078 } 6079 6080 bool 6081 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6082 if (isToken(AsmToken::String)) { 6083 Val = getToken().getStringContents(); 6084 lex(); 6085 return true; 6086 } else { 6087 Error(getLoc(), ErrMsg); 6088 return false; 6089 } 6090 } 6091 6092 bool 6093 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6094 if (isToken(AsmToken::Identifier)) { 6095 Val = getTokenStr(); 6096 lex(); 6097 return true; 6098 } else { 6099 if (!ErrMsg.empty()) 6100 Error(getLoc(), ErrMsg); 6101 return false; 6102 } 6103 } 6104 6105 AsmToken 6106 AMDGPUAsmParser::getToken() const { 6107 return Parser.getTok(); 6108 } 6109 6110 AsmToken 6111 AMDGPUAsmParser::peekToken() { 6112 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6113 } 6114 6115 void 6116 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6117 auto TokCount = getLexer().peekTokens(Tokens); 6118 6119 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6120 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6121 } 6122 6123 AsmToken::TokenKind 6124 AMDGPUAsmParser::getTokenKind() const { 6125 return getLexer().getKind(); 6126 } 6127 6128 SMLoc 6129 AMDGPUAsmParser::getLoc() const { 6130 return getToken().getLoc(); 6131 } 6132 6133 StringRef 6134 AMDGPUAsmParser::getTokenStr() const { 6135 return getToken().getString(); 6136 } 6137 6138 void 6139 AMDGPUAsmParser::lex() { 6140 Parser.Lex(); 6141 } 6142 6143 SMLoc 6144 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6145 const OperandVector &Operands) const { 6146 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6147 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6148 if (Test(Op)) 6149 return Op.getStartLoc(); 6150 } 6151 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6152 } 6153 6154 SMLoc 6155 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6156 const OperandVector &Operands) const { 6157 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6158 return getOperandLoc(Test, Operands); 6159 } 6160 6161 SMLoc 6162 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6163 const OperandVector &Operands) const { 6164 auto Test = [=](const AMDGPUOperand& Op) { 6165 return Op.isRegKind() && Op.getReg() == Reg; 6166 }; 6167 return getOperandLoc(Test, Operands); 6168 } 6169 6170 SMLoc 6171 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6172 auto Test = [](const AMDGPUOperand& Op) { 6173 return Op.IsImmKindLiteral() || Op.isExpr(); 6174 }; 6175 return getOperandLoc(Test, Operands); 6176 } 6177 6178 SMLoc 6179 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6180 auto Test = [](const AMDGPUOperand& Op) { 6181 return Op.isImmKindConst(); 6182 }; 6183 return getOperandLoc(Test, Operands); 6184 } 6185 6186 //===----------------------------------------------------------------------===// 6187 // swizzle 6188 //===----------------------------------------------------------------------===// 6189 6190 LLVM_READNONE 6191 static unsigned 6192 encodeBitmaskPerm(const unsigned AndMask, 6193 const unsigned OrMask, 6194 const unsigned XorMask) { 6195 using namespace llvm::AMDGPU::Swizzle; 6196 6197 return BITMASK_PERM_ENC | 6198 (AndMask << BITMASK_AND_SHIFT) | 6199 (OrMask << BITMASK_OR_SHIFT) | 6200 (XorMask << BITMASK_XOR_SHIFT); 6201 } 6202 6203 bool 6204 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6205 const unsigned MinVal, 6206 const unsigned MaxVal, 6207 const StringRef ErrMsg, 6208 SMLoc &Loc) { 6209 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6210 return false; 6211 } 6212 Loc = Parser.getTok().getLoc(); 6213 if (!parseExpr(Op)) { 6214 return false; 6215 } 6216 if (Op < MinVal || Op > MaxVal) { 6217 Error(Loc, ErrMsg); 6218 return false; 6219 } 6220 6221 return true; 6222 } 6223 6224 bool 6225 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6226 const unsigned MinVal, 6227 const unsigned MaxVal, 6228 const StringRef ErrMsg) { 6229 SMLoc Loc; 6230 for (unsigned i = 0; i < OpNum; ++i) { 6231 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6232 return false; 6233 } 6234 6235 return true; 6236 } 6237 6238 bool 6239 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6240 using namespace llvm::AMDGPU::Swizzle; 6241 6242 int64_t Lane[LANE_NUM]; 6243 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6244 "expected a 2-bit lane id")) { 6245 Imm = QUAD_PERM_ENC; 6246 for (unsigned I = 0; I < LANE_NUM; ++I) { 6247 Imm |= Lane[I] << (LANE_SHIFT * I); 6248 } 6249 return true; 6250 } 6251 return false; 6252 } 6253 6254 bool 6255 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6256 using namespace llvm::AMDGPU::Swizzle; 6257 6258 SMLoc Loc; 6259 int64_t GroupSize; 6260 int64_t LaneIdx; 6261 6262 if (!parseSwizzleOperand(GroupSize, 6263 2, 32, 6264 "group size must be in the interval [2,32]", 6265 Loc)) { 6266 return false; 6267 } 6268 if (!isPowerOf2_64(GroupSize)) { 6269 Error(Loc, "group size must be a power of two"); 6270 return false; 6271 } 6272 if (parseSwizzleOperand(LaneIdx, 6273 0, GroupSize - 1, 6274 "lane id must be in the interval [0,group size - 1]", 6275 Loc)) { 6276 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6277 return true; 6278 } 6279 return false; 6280 } 6281 6282 bool 6283 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6284 using namespace llvm::AMDGPU::Swizzle; 6285 6286 SMLoc Loc; 6287 int64_t GroupSize; 6288 6289 if (!parseSwizzleOperand(GroupSize, 6290 2, 32, 6291 "group size must be in the interval [2,32]", 6292 Loc)) { 6293 return false; 6294 } 6295 if (!isPowerOf2_64(GroupSize)) { 6296 Error(Loc, "group size must be a power of two"); 6297 return false; 6298 } 6299 6300 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6301 return true; 6302 } 6303 6304 bool 6305 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6306 using namespace llvm::AMDGPU::Swizzle; 6307 6308 SMLoc Loc; 6309 int64_t GroupSize; 6310 6311 if (!parseSwizzleOperand(GroupSize, 6312 1, 16, 6313 "group size must be in the interval [1,16]", 6314 Loc)) { 6315 return false; 6316 } 6317 if (!isPowerOf2_64(GroupSize)) { 6318 Error(Loc, "group size must be a power of two"); 6319 return false; 6320 } 6321 6322 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6323 return true; 6324 } 6325 6326 bool 6327 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6328 using namespace llvm::AMDGPU::Swizzle; 6329 6330 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6331 return false; 6332 } 6333 6334 StringRef Ctl; 6335 SMLoc StrLoc = Parser.getTok().getLoc(); 6336 if (!parseString(Ctl)) { 6337 return false; 6338 } 6339 if (Ctl.size() != BITMASK_WIDTH) { 6340 Error(StrLoc, "expected a 5-character mask"); 6341 return false; 6342 } 6343 6344 unsigned AndMask = 0; 6345 unsigned OrMask = 0; 6346 unsigned XorMask = 0; 6347 6348 for (size_t i = 0; i < Ctl.size(); ++i) { 6349 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6350 switch(Ctl[i]) { 6351 default: 6352 Error(StrLoc, "invalid mask"); 6353 return false; 6354 case '0': 6355 break; 6356 case '1': 6357 OrMask |= Mask; 6358 break; 6359 case 'p': 6360 AndMask |= Mask; 6361 break; 6362 case 'i': 6363 AndMask |= Mask; 6364 XorMask |= Mask; 6365 break; 6366 } 6367 } 6368 6369 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6370 return true; 6371 } 6372 6373 bool 6374 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6375 6376 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6377 6378 if (!parseExpr(Imm, "a swizzle macro")) { 6379 return false; 6380 } 6381 if (!isUInt<16>(Imm)) { 6382 Error(OffsetLoc, "expected a 16-bit offset"); 6383 return false; 6384 } 6385 return true; 6386 } 6387 6388 bool 6389 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6390 using namespace llvm::AMDGPU::Swizzle; 6391 6392 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6393 6394 SMLoc ModeLoc = Parser.getTok().getLoc(); 6395 bool Ok = false; 6396 6397 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6398 Ok = parseSwizzleQuadPerm(Imm); 6399 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6400 Ok = parseSwizzleBitmaskPerm(Imm); 6401 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6402 Ok = parseSwizzleBroadcast(Imm); 6403 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6404 Ok = parseSwizzleSwap(Imm); 6405 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6406 Ok = parseSwizzleReverse(Imm); 6407 } else { 6408 Error(ModeLoc, "expected a swizzle mode"); 6409 } 6410 6411 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6412 } 6413 6414 return false; 6415 } 6416 6417 OperandMatchResultTy 6418 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6419 SMLoc S = Parser.getTok().getLoc(); 6420 int64_t Imm = 0; 6421 6422 if (trySkipId("offset")) { 6423 6424 bool Ok = false; 6425 if (skipToken(AsmToken::Colon, "expected a colon")) { 6426 if (trySkipId("swizzle")) { 6427 Ok = parseSwizzleMacro(Imm); 6428 } else { 6429 Ok = parseSwizzleOffset(Imm); 6430 } 6431 } 6432 6433 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6434 6435 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6436 } else { 6437 // Swizzle "offset" operand is optional. 6438 // If it is omitted, try parsing other optional operands. 6439 return parseOptionalOpr(Operands); 6440 } 6441 } 6442 6443 bool 6444 AMDGPUOperand::isSwizzle() const { 6445 return isImmTy(ImmTySwizzle); 6446 } 6447 6448 //===----------------------------------------------------------------------===// 6449 // VGPR Index Mode 6450 //===----------------------------------------------------------------------===// 6451 6452 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6453 6454 using namespace llvm::AMDGPU::VGPRIndexMode; 6455 6456 if (trySkipToken(AsmToken::RParen)) { 6457 return OFF; 6458 } 6459 6460 int64_t Imm = 0; 6461 6462 while (true) { 6463 unsigned Mode = 0; 6464 SMLoc S = Parser.getTok().getLoc(); 6465 6466 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6467 if (trySkipId(IdSymbolic[ModeId])) { 6468 Mode = 1 << ModeId; 6469 break; 6470 } 6471 } 6472 6473 if (Mode == 0) { 6474 Error(S, (Imm == 0)? 6475 "expected a VGPR index mode or a closing parenthesis" : 6476 "expected a VGPR index mode"); 6477 return UNDEF; 6478 } 6479 6480 if (Imm & Mode) { 6481 Error(S, "duplicate VGPR index mode"); 6482 return UNDEF; 6483 } 6484 Imm |= Mode; 6485 6486 if (trySkipToken(AsmToken::RParen)) 6487 break; 6488 if (!skipToken(AsmToken::Comma, 6489 "expected a comma or a closing parenthesis")) 6490 return UNDEF; 6491 } 6492 6493 return Imm; 6494 } 6495 6496 OperandMatchResultTy 6497 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6498 6499 using namespace llvm::AMDGPU::VGPRIndexMode; 6500 6501 int64_t Imm = 0; 6502 SMLoc S = Parser.getTok().getLoc(); 6503 6504 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6505 Imm = parseGPRIdxMacro(); 6506 if (Imm == UNDEF) 6507 return MatchOperand_ParseFail; 6508 } else { 6509 if (getParser().parseAbsoluteExpression(Imm)) 6510 return MatchOperand_ParseFail; 6511 if (Imm < 0 || !isUInt<4>(Imm)) { 6512 Error(S, "invalid immediate: only 4-bit values are legal"); 6513 return MatchOperand_ParseFail; 6514 } 6515 } 6516 6517 Operands.push_back( 6518 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6519 return MatchOperand_Success; 6520 } 6521 6522 bool AMDGPUOperand::isGPRIdxMode() const { 6523 return isImmTy(ImmTyGprIdxMode); 6524 } 6525 6526 //===----------------------------------------------------------------------===// 6527 // sopp branch targets 6528 //===----------------------------------------------------------------------===// 6529 6530 OperandMatchResultTy 6531 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6532 6533 // Make sure we are not parsing something 6534 // that looks like a label or an expression but is not. 6535 // This will improve error messages. 6536 if (isRegister() || isModifier()) 6537 return MatchOperand_NoMatch; 6538 6539 if (!parseExpr(Operands)) 6540 return MatchOperand_ParseFail; 6541 6542 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6543 assert(Opr.isImm() || Opr.isExpr()); 6544 SMLoc Loc = Opr.getStartLoc(); 6545 6546 // Currently we do not support arbitrary expressions as branch targets. 6547 // Only labels and absolute expressions are accepted. 6548 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6549 Error(Loc, "expected an absolute expression or a label"); 6550 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6551 Error(Loc, "expected a 16-bit signed jump offset"); 6552 } 6553 6554 return MatchOperand_Success; 6555 } 6556 6557 //===----------------------------------------------------------------------===// 6558 // Boolean holding registers 6559 //===----------------------------------------------------------------------===// 6560 6561 OperandMatchResultTy 6562 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6563 return parseReg(Operands); 6564 } 6565 6566 //===----------------------------------------------------------------------===// 6567 // mubuf 6568 //===----------------------------------------------------------------------===// 6569 6570 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6571 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6572 } 6573 6574 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6575 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6576 } 6577 6578 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6579 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6580 } 6581 6582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6583 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6584 } 6585 6586 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6587 const OperandVector &Operands, 6588 bool IsAtomic, 6589 bool IsAtomicReturn, 6590 bool IsLds) { 6591 bool IsLdsOpcode = IsLds; 6592 bool HasLdsModifier = false; 6593 OptionalImmIndexMap OptionalIdx; 6594 assert(IsAtomicReturn ? IsAtomic : true); 6595 unsigned FirstOperandIdx = 1; 6596 6597 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6598 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6599 6600 // Add the register arguments 6601 if (Op.isReg()) { 6602 Op.addRegOperands(Inst, 1); 6603 // Insert a tied src for atomic return dst. 6604 // This cannot be postponed as subsequent calls to 6605 // addImmOperands rely on correct number of MC operands. 6606 if (IsAtomicReturn && i == FirstOperandIdx) 6607 Op.addRegOperands(Inst, 1); 6608 continue; 6609 } 6610 6611 // Handle the case where soffset is an immediate 6612 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6613 Op.addImmOperands(Inst, 1); 6614 continue; 6615 } 6616 6617 HasLdsModifier |= Op.isLDS(); 6618 6619 // Handle tokens like 'offen' which are sometimes hard-coded into the 6620 // asm string. There are no MCInst operands for these. 6621 if (Op.isToken()) { 6622 continue; 6623 } 6624 assert(Op.isImm()); 6625 6626 // Handle optional arguments 6627 OptionalIdx[Op.getImmTy()] = i; 6628 } 6629 6630 // This is a workaround for an llvm quirk which may result in an 6631 // incorrect instruction selection. Lds and non-lds versions of 6632 // MUBUF instructions are identical except that lds versions 6633 // have mandatory 'lds' modifier. However this modifier follows 6634 // optional modifiers and llvm asm matcher regards this 'lds' 6635 // modifier as an optional one. As a result, an lds version 6636 // of opcode may be selected even if it has no 'lds' modifier. 6637 if (IsLdsOpcode && !HasLdsModifier) { 6638 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6639 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6640 Inst.setOpcode(NoLdsOpcode); 6641 IsLdsOpcode = false; 6642 } 6643 } 6644 6645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6646 if (!IsAtomic || IsAtomicReturn) { 6647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6648 IsAtomicReturn ? -1 : 0); 6649 } 6650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6651 6652 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6654 } 6655 6656 if (isGFX10Plus()) 6657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6658 } 6659 6660 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6661 OptionalImmIndexMap OptionalIdx; 6662 6663 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6664 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6665 6666 // Add the register arguments 6667 if (Op.isReg()) { 6668 Op.addRegOperands(Inst, 1); 6669 continue; 6670 } 6671 6672 // Handle the case where soffset is an immediate 6673 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6674 Op.addImmOperands(Inst, 1); 6675 continue; 6676 } 6677 6678 // Handle tokens like 'offen' which are sometimes hard-coded into the 6679 // asm string. There are no MCInst operands for these. 6680 if (Op.isToken()) { 6681 continue; 6682 } 6683 assert(Op.isImm()); 6684 6685 // Handle optional arguments 6686 OptionalIdx[Op.getImmTy()] = i; 6687 } 6688 6689 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6690 AMDGPUOperand::ImmTyOffset); 6691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6693 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6695 6696 if (isGFX10Plus()) 6697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6698 } 6699 6700 //===----------------------------------------------------------------------===// 6701 // mimg 6702 //===----------------------------------------------------------------------===// 6703 6704 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6705 bool IsAtomic) { 6706 unsigned I = 1; 6707 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6708 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6709 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6710 } 6711 6712 if (IsAtomic) { 6713 // Add src, same as dst 6714 assert(Desc.getNumDefs() == 1); 6715 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6716 } 6717 6718 OptionalImmIndexMap OptionalIdx; 6719 6720 for (unsigned E = Operands.size(); I != E; ++I) { 6721 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6722 6723 // Add the register arguments 6724 if (Op.isReg()) { 6725 Op.addRegOperands(Inst, 1); 6726 } else if (Op.isImmModifier()) { 6727 OptionalIdx[Op.getImmTy()] = I; 6728 } else if (!Op.isToken()) { 6729 llvm_unreachable("unexpected operand type"); 6730 } 6731 } 6732 6733 bool IsGFX10Plus = isGFX10Plus(); 6734 6735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6736 if (IsGFX10Plus) 6737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6739 if (IsGFX10Plus) 6740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6741 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6744 if (IsGFX10Plus) 6745 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6748 if (!IsGFX10Plus) 6749 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6750 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6751 } 6752 6753 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6754 cvtMIMG(Inst, Operands, true); 6755 } 6756 6757 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6758 const OperandVector &Operands) { 6759 for (unsigned I = 1; I < Operands.size(); ++I) { 6760 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6761 if (Operand.isReg()) 6762 Operand.addRegOperands(Inst, 1); 6763 } 6764 6765 Inst.addOperand(MCOperand::createImm(1)); // a16 6766 } 6767 6768 //===----------------------------------------------------------------------===// 6769 // smrd 6770 //===----------------------------------------------------------------------===// 6771 6772 bool AMDGPUOperand::isSMRDOffset8() const { 6773 return isImm() && isUInt<8>(getImm()); 6774 } 6775 6776 bool AMDGPUOperand::isSMEMOffset() const { 6777 return isImm(); // Offset range is checked later by validator. 6778 } 6779 6780 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6781 // 32-bit literals are only supported on CI and we only want to use them 6782 // when the offset is > 8-bits. 6783 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6784 } 6785 6786 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6787 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6788 } 6789 6790 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6791 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6792 } 6793 6794 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6795 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6796 } 6797 6798 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6799 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6800 } 6801 6802 //===----------------------------------------------------------------------===// 6803 // vop3 6804 //===----------------------------------------------------------------------===// 6805 6806 static bool ConvertOmodMul(int64_t &Mul) { 6807 if (Mul != 1 && Mul != 2 && Mul != 4) 6808 return false; 6809 6810 Mul >>= 1; 6811 return true; 6812 } 6813 6814 static bool ConvertOmodDiv(int64_t &Div) { 6815 if (Div == 1) { 6816 Div = 0; 6817 return true; 6818 } 6819 6820 if (Div == 2) { 6821 Div = 3; 6822 return true; 6823 } 6824 6825 return false; 6826 } 6827 6828 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6829 if (BoundCtrl == 0) { 6830 BoundCtrl = 1; 6831 return true; 6832 } 6833 6834 if (BoundCtrl == -1) { 6835 BoundCtrl = 0; 6836 return true; 6837 } 6838 6839 return false; 6840 } 6841 6842 // Note: the order in this table matches the order of operands in AsmString. 6843 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6844 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6845 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6846 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6847 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6848 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6849 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6850 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6851 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6852 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6853 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6854 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6855 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6856 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6857 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6858 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6859 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6860 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6861 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6862 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6863 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6864 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6865 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6866 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6867 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6868 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6869 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6870 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6871 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6872 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6873 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6874 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6875 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6876 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6877 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6878 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6879 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6880 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6881 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6882 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6883 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6884 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6885 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6886 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6887 }; 6888 6889 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6890 6891 OperandMatchResultTy res = parseOptionalOpr(Operands); 6892 6893 // This is a hack to enable hardcoded mandatory operands which follow 6894 // optional operands. 6895 // 6896 // Current design assumes that all operands after the first optional operand 6897 // are also optional. However implementation of some instructions violates 6898 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6899 // 6900 // To alleviate this problem, we have to (implicitly) parse extra operands 6901 // to make sure autogenerated parser of custom operands never hit hardcoded 6902 // mandatory operands. 6903 6904 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6905 if (res != MatchOperand_Success || 6906 isToken(AsmToken::EndOfStatement)) 6907 break; 6908 6909 trySkipToken(AsmToken::Comma); 6910 res = parseOptionalOpr(Operands); 6911 } 6912 6913 return res; 6914 } 6915 6916 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6917 OperandMatchResultTy res; 6918 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6919 // try to parse any optional operand here 6920 if (Op.IsBit) { 6921 res = parseNamedBit(Op.Name, Operands, Op.Type); 6922 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6923 res = parseOModOperand(Operands); 6924 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6925 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6926 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6927 res = parseSDWASel(Operands, Op.Name, Op.Type); 6928 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6929 res = parseSDWADstUnused(Operands); 6930 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6931 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6932 Op.Type == AMDGPUOperand::ImmTyNegLo || 6933 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6934 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6935 Op.ConvertResult); 6936 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6937 res = parseDim(Operands); 6938 } else { 6939 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6940 } 6941 if (res != MatchOperand_NoMatch) { 6942 return res; 6943 } 6944 } 6945 return MatchOperand_NoMatch; 6946 } 6947 6948 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6949 StringRef Name = Parser.getTok().getString(); 6950 if (Name == "mul") { 6951 return parseIntWithPrefix("mul", Operands, 6952 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6953 } 6954 6955 if (Name == "div") { 6956 return parseIntWithPrefix("div", Operands, 6957 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6958 } 6959 6960 return MatchOperand_NoMatch; 6961 } 6962 6963 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6964 cvtVOP3P(Inst, Operands); 6965 6966 int Opc = Inst.getOpcode(); 6967 6968 int SrcNum; 6969 const int Ops[] = { AMDGPU::OpName::src0, 6970 AMDGPU::OpName::src1, 6971 AMDGPU::OpName::src2 }; 6972 for (SrcNum = 0; 6973 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6974 ++SrcNum); 6975 assert(SrcNum > 0); 6976 6977 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6978 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6979 6980 if ((OpSel & (1 << SrcNum)) != 0) { 6981 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6982 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6983 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6984 } 6985 } 6986 6987 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6988 // 1. This operand is input modifiers 6989 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6990 // 2. This is not last operand 6991 && Desc.NumOperands > (OpNum + 1) 6992 // 3. Next operand is register class 6993 && Desc.OpInfo[OpNum + 1].RegClass != -1 6994 // 4. Next register is not tied to any other operand 6995 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6996 } 6997 6998 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6999 { 7000 OptionalImmIndexMap OptionalIdx; 7001 unsigned Opc = Inst.getOpcode(); 7002 7003 unsigned I = 1; 7004 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7005 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7006 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7007 } 7008 7009 for (unsigned E = Operands.size(); I != E; ++I) { 7010 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7011 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7012 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7013 } else if (Op.isInterpSlot() || 7014 Op.isInterpAttr() || 7015 Op.isAttrChan()) { 7016 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7017 } else if (Op.isImmModifier()) { 7018 OptionalIdx[Op.getImmTy()] = I; 7019 } else { 7020 llvm_unreachable("unhandled operand type"); 7021 } 7022 } 7023 7024 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7025 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7026 } 7027 7028 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7030 } 7031 7032 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7033 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7034 } 7035 } 7036 7037 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7038 OptionalImmIndexMap &OptionalIdx) { 7039 unsigned Opc = Inst.getOpcode(); 7040 7041 unsigned I = 1; 7042 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7043 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7044 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7045 } 7046 7047 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7048 // This instruction has src modifiers 7049 for (unsigned E = Operands.size(); I != E; ++I) { 7050 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7051 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7052 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7053 } else if (Op.isImmModifier()) { 7054 OptionalIdx[Op.getImmTy()] = I; 7055 } else if (Op.isRegOrImm()) { 7056 Op.addRegOrImmOperands(Inst, 1); 7057 } else { 7058 llvm_unreachable("unhandled operand type"); 7059 } 7060 } 7061 } else { 7062 // No src modifiers 7063 for (unsigned E = Operands.size(); I != E; ++I) { 7064 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7065 if (Op.isMod()) { 7066 OptionalIdx[Op.getImmTy()] = I; 7067 } else { 7068 Op.addRegOrImmOperands(Inst, 1); 7069 } 7070 } 7071 } 7072 7073 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7075 } 7076 7077 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7079 } 7080 7081 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7082 // it has src2 register operand that is tied to dst operand 7083 // we don't allow modifiers for this operand in assembler so src2_modifiers 7084 // should be 0. 7085 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7086 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7087 Opc == AMDGPU::V_MAC_F32_e64_vi || 7088 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7089 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7090 Opc == AMDGPU::V_MAC_F16_e64_vi || 7091 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7092 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7093 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7094 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7095 auto it = Inst.begin(); 7096 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7097 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7098 ++it; 7099 // Copy the operand to ensure it's not invalidated when Inst grows. 7100 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7101 } 7102 } 7103 7104 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7105 OptionalImmIndexMap OptionalIdx; 7106 cvtVOP3(Inst, Operands, OptionalIdx); 7107 } 7108 7109 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7110 const OperandVector &Operands) { 7111 OptionalImmIndexMap OptIdx; 7112 const int Opc = Inst.getOpcode(); 7113 const MCInstrDesc &Desc = MII.get(Opc); 7114 7115 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7116 7117 cvtVOP3(Inst, Operands, OptIdx); 7118 7119 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7120 assert(!IsPacked); 7121 Inst.addOperand(Inst.getOperand(0)); 7122 } 7123 7124 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7125 // instruction, and then figure out where to actually put the modifiers 7126 7127 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7128 7129 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7130 if (OpSelHiIdx != -1) { 7131 int DefaultVal = IsPacked ? -1 : 0; 7132 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7133 DefaultVal); 7134 } 7135 7136 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7137 if (NegLoIdx != -1) { 7138 assert(IsPacked); 7139 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7140 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7141 } 7142 7143 const int Ops[] = { AMDGPU::OpName::src0, 7144 AMDGPU::OpName::src1, 7145 AMDGPU::OpName::src2 }; 7146 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7147 AMDGPU::OpName::src1_modifiers, 7148 AMDGPU::OpName::src2_modifiers }; 7149 7150 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7151 7152 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7153 unsigned OpSelHi = 0; 7154 unsigned NegLo = 0; 7155 unsigned NegHi = 0; 7156 7157 if (OpSelHiIdx != -1) { 7158 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7159 } 7160 7161 if (NegLoIdx != -1) { 7162 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7163 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7164 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7165 } 7166 7167 for (int J = 0; J < 3; ++J) { 7168 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7169 if (OpIdx == -1) 7170 break; 7171 7172 uint32_t ModVal = 0; 7173 7174 if ((OpSel & (1 << J)) != 0) 7175 ModVal |= SISrcMods::OP_SEL_0; 7176 7177 if ((OpSelHi & (1 << J)) != 0) 7178 ModVal |= SISrcMods::OP_SEL_1; 7179 7180 if ((NegLo & (1 << J)) != 0) 7181 ModVal |= SISrcMods::NEG; 7182 7183 if ((NegHi & (1 << J)) != 0) 7184 ModVal |= SISrcMods::NEG_HI; 7185 7186 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7187 7188 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7189 } 7190 } 7191 7192 //===----------------------------------------------------------------------===// 7193 // dpp 7194 //===----------------------------------------------------------------------===// 7195 7196 bool AMDGPUOperand::isDPP8() const { 7197 return isImmTy(ImmTyDPP8); 7198 } 7199 7200 bool AMDGPUOperand::isDPPCtrl() const { 7201 using namespace AMDGPU::DPP; 7202 7203 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7204 if (result) { 7205 int64_t Imm = getImm(); 7206 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7207 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7208 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7209 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7210 (Imm == DppCtrl::WAVE_SHL1) || 7211 (Imm == DppCtrl::WAVE_ROL1) || 7212 (Imm == DppCtrl::WAVE_SHR1) || 7213 (Imm == DppCtrl::WAVE_ROR1) || 7214 (Imm == DppCtrl::ROW_MIRROR) || 7215 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7216 (Imm == DppCtrl::BCAST15) || 7217 (Imm == DppCtrl::BCAST31) || 7218 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7219 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7220 } 7221 return false; 7222 } 7223 7224 //===----------------------------------------------------------------------===// 7225 // mAI 7226 //===----------------------------------------------------------------------===// 7227 7228 bool AMDGPUOperand::isBLGP() const { 7229 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7230 } 7231 7232 bool AMDGPUOperand::isCBSZ() const { 7233 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7234 } 7235 7236 bool AMDGPUOperand::isABID() const { 7237 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7238 } 7239 7240 bool AMDGPUOperand::isS16Imm() const { 7241 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7242 } 7243 7244 bool AMDGPUOperand::isU16Imm() const { 7245 return isImm() && isUInt<16>(getImm()); 7246 } 7247 7248 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7249 if (!isGFX10Plus()) 7250 return MatchOperand_NoMatch; 7251 7252 SMLoc S = Parser.getTok().getLoc(); 7253 7254 if (!trySkipId("dim", AsmToken::Colon)) 7255 return MatchOperand_NoMatch; 7256 7257 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7258 // integer. 7259 std::string Token; 7260 if (getLexer().is(AsmToken::Integer)) { 7261 SMLoc Loc = getLexer().getTok().getEndLoc(); 7262 Token = std::string(getLexer().getTok().getString()); 7263 Parser.Lex(); 7264 if (getLexer().getTok().getLoc() != Loc) 7265 return MatchOperand_ParseFail; 7266 } 7267 if (getLexer().isNot(AsmToken::Identifier)) 7268 return MatchOperand_ParseFail; 7269 Token += getLexer().getTok().getString(); 7270 7271 StringRef DimId = Token; 7272 if (DimId.startswith("SQ_RSRC_IMG_")) 7273 DimId = DimId.substr(12); 7274 7275 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7276 if (!DimInfo) 7277 return MatchOperand_ParseFail; 7278 7279 Parser.Lex(); 7280 7281 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7282 AMDGPUOperand::ImmTyDim)); 7283 return MatchOperand_Success; 7284 } 7285 7286 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7287 SMLoc S = Parser.getTok().getLoc(); 7288 7289 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7290 return MatchOperand_NoMatch; 7291 7292 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7293 7294 int64_t Sels[8]; 7295 7296 if (!trySkipToken(AsmToken::LBrac)) 7297 return MatchOperand_ParseFail; 7298 7299 if (getParser().parseAbsoluteExpression(Sels[0])) 7300 return MatchOperand_ParseFail; 7301 if (0 > Sels[0] || 7 < Sels[0]) 7302 return MatchOperand_ParseFail; 7303 7304 for (size_t i = 1; i < 8; ++i) { 7305 if (!trySkipToken(AsmToken::Comma)) 7306 return MatchOperand_ParseFail; 7307 7308 if (getParser().parseAbsoluteExpression(Sels[i])) 7309 return MatchOperand_ParseFail; 7310 if (0 > Sels[i] || 7 < Sels[i]) 7311 return MatchOperand_ParseFail; 7312 } 7313 7314 if (!trySkipToken(AsmToken::RBrac)) 7315 return MatchOperand_ParseFail; 7316 7317 unsigned DPP8 = 0; 7318 for (size_t i = 0; i < 8; ++i) 7319 DPP8 |= (Sels[i] << (i * 3)); 7320 7321 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7322 return MatchOperand_Success; 7323 } 7324 7325 OperandMatchResultTy 7326 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7327 using namespace AMDGPU::DPP; 7328 7329 SMLoc S = Parser.getTok().getLoc(); 7330 StringRef Prefix; 7331 int64_t Int; 7332 7333 if (getLexer().getKind() == AsmToken::Identifier) { 7334 Prefix = Parser.getTok().getString(); 7335 } else { 7336 return MatchOperand_NoMatch; 7337 } 7338 7339 if (Prefix == "row_mirror") { 7340 Int = DppCtrl::ROW_MIRROR; 7341 Parser.Lex(); 7342 } else if (Prefix == "row_half_mirror") { 7343 Int = DppCtrl::ROW_HALF_MIRROR; 7344 Parser.Lex(); 7345 } else { 7346 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7347 if (Prefix != "quad_perm" 7348 && Prefix != "row_shl" 7349 && Prefix != "row_shr" 7350 && Prefix != "row_ror" 7351 && Prefix != "wave_shl" 7352 && Prefix != "wave_rol" 7353 && Prefix != "wave_shr" 7354 && Prefix != "wave_ror" 7355 && Prefix != "row_bcast" 7356 && Prefix != "row_share" 7357 && Prefix != "row_xmask") { 7358 return MatchOperand_NoMatch; 7359 } 7360 7361 if (!isGFX10Plus() && (Prefix == "row_share" || Prefix == "row_xmask")) 7362 return MatchOperand_NoMatch; 7363 7364 if (!isVI() && !isGFX9() && 7365 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7366 Prefix == "wave_rol" || Prefix == "wave_ror" || 7367 Prefix == "row_bcast")) 7368 return MatchOperand_NoMatch; 7369 7370 Parser.Lex(); 7371 if (getLexer().isNot(AsmToken::Colon)) 7372 return MatchOperand_ParseFail; 7373 7374 if (Prefix == "quad_perm") { 7375 // quad_perm:[%d,%d,%d,%d] 7376 Parser.Lex(); 7377 if (!trySkipToken(AsmToken::LBrac)) 7378 return MatchOperand_ParseFail; 7379 7380 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7381 return MatchOperand_ParseFail; 7382 7383 for (int i = 0; i < 3; ++i) { 7384 if (!trySkipToken(AsmToken::Comma)) 7385 return MatchOperand_ParseFail; 7386 7387 int64_t Temp; 7388 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7389 return MatchOperand_ParseFail; 7390 const int shift = i*2 + 2; 7391 Int += (Temp << shift); 7392 } 7393 7394 if (!trySkipToken(AsmToken::RBrac)) 7395 return MatchOperand_ParseFail; 7396 } else { 7397 // sel:%d 7398 Parser.Lex(); 7399 if (getParser().parseAbsoluteExpression(Int)) 7400 return MatchOperand_ParseFail; 7401 7402 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7403 Int |= DppCtrl::ROW_SHL0; 7404 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7405 Int |= DppCtrl::ROW_SHR0; 7406 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7407 Int |= DppCtrl::ROW_ROR0; 7408 } else if (Prefix == "wave_shl" && 1 == Int) { 7409 Int = DppCtrl::WAVE_SHL1; 7410 } else if (Prefix == "wave_rol" && 1 == Int) { 7411 Int = DppCtrl::WAVE_ROL1; 7412 } else if (Prefix == "wave_shr" && 1 == Int) { 7413 Int = DppCtrl::WAVE_SHR1; 7414 } else if (Prefix == "wave_ror" && 1 == Int) { 7415 Int = DppCtrl::WAVE_ROR1; 7416 } else if (Prefix == "row_bcast") { 7417 if (Int == 15) { 7418 Int = DppCtrl::BCAST15; 7419 } else if (Int == 31) { 7420 Int = DppCtrl::BCAST31; 7421 } else { 7422 return MatchOperand_ParseFail; 7423 } 7424 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7425 Int |= DppCtrl::ROW_SHARE_FIRST; 7426 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7427 Int |= DppCtrl::ROW_XMASK_FIRST; 7428 } else { 7429 return MatchOperand_ParseFail; 7430 } 7431 } 7432 } 7433 7434 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7435 return MatchOperand_Success; 7436 } 7437 7438 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7439 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7440 } 7441 7442 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7443 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7444 } 7445 7446 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7447 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7448 } 7449 7450 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7451 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7452 } 7453 7454 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7455 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7456 } 7457 7458 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7459 OptionalImmIndexMap OptionalIdx; 7460 7461 unsigned I = 1; 7462 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7463 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7464 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7465 } 7466 7467 int Fi = 0; 7468 for (unsigned E = Operands.size(); I != E; ++I) { 7469 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7470 MCOI::TIED_TO); 7471 if (TiedTo != -1) { 7472 assert((unsigned)TiedTo < Inst.getNumOperands()); 7473 // handle tied old or src2 for MAC instructions 7474 Inst.addOperand(Inst.getOperand(TiedTo)); 7475 } 7476 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7477 // Add the register arguments 7478 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7479 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7480 // Skip it. 7481 continue; 7482 } 7483 7484 if (IsDPP8) { 7485 if (Op.isDPP8()) { 7486 Op.addImmOperands(Inst, 1); 7487 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7488 Op.addRegWithFPInputModsOperands(Inst, 2); 7489 } else if (Op.isFI()) { 7490 Fi = Op.getImm(); 7491 } else if (Op.isReg()) { 7492 Op.addRegOperands(Inst, 1); 7493 } else { 7494 llvm_unreachable("Invalid operand type"); 7495 } 7496 } else { 7497 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7498 Op.addRegWithFPInputModsOperands(Inst, 2); 7499 } else if (Op.isDPPCtrl()) { 7500 Op.addImmOperands(Inst, 1); 7501 } else if (Op.isImm()) { 7502 // Handle optional arguments 7503 OptionalIdx[Op.getImmTy()] = I; 7504 } else { 7505 llvm_unreachable("Invalid operand type"); 7506 } 7507 } 7508 } 7509 7510 if (IsDPP8) { 7511 using namespace llvm::AMDGPU::DPP; 7512 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7513 } else { 7514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7517 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7519 } 7520 } 7521 } 7522 7523 //===----------------------------------------------------------------------===// 7524 // sdwa 7525 //===----------------------------------------------------------------------===// 7526 7527 OperandMatchResultTy 7528 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7529 AMDGPUOperand::ImmTy Type) { 7530 using namespace llvm::AMDGPU::SDWA; 7531 7532 SMLoc S = Parser.getTok().getLoc(); 7533 StringRef Value; 7534 OperandMatchResultTy res; 7535 7536 res = parseStringWithPrefix(Prefix, Value); 7537 if (res != MatchOperand_Success) { 7538 return res; 7539 } 7540 7541 int64_t Int; 7542 Int = StringSwitch<int64_t>(Value) 7543 .Case("BYTE_0", SdwaSel::BYTE_0) 7544 .Case("BYTE_1", SdwaSel::BYTE_1) 7545 .Case("BYTE_2", SdwaSel::BYTE_2) 7546 .Case("BYTE_3", SdwaSel::BYTE_3) 7547 .Case("WORD_0", SdwaSel::WORD_0) 7548 .Case("WORD_1", SdwaSel::WORD_1) 7549 .Case("DWORD", SdwaSel::DWORD) 7550 .Default(0xffffffff); 7551 7552 if (Int == 0xffffffff) { 7553 return MatchOperand_ParseFail; 7554 } 7555 7556 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7557 return MatchOperand_Success; 7558 } 7559 7560 OperandMatchResultTy 7561 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7562 using namespace llvm::AMDGPU::SDWA; 7563 7564 SMLoc S = Parser.getTok().getLoc(); 7565 StringRef Value; 7566 OperandMatchResultTy res; 7567 7568 res = parseStringWithPrefix("dst_unused", Value); 7569 if (res != MatchOperand_Success) { 7570 return res; 7571 } 7572 7573 int64_t Int; 7574 Int = StringSwitch<int64_t>(Value) 7575 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7576 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7577 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7578 .Default(0xffffffff); 7579 7580 if (Int == 0xffffffff) { 7581 return MatchOperand_ParseFail; 7582 } 7583 7584 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7585 return MatchOperand_Success; 7586 } 7587 7588 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7589 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7590 } 7591 7592 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7593 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7594 } 7595 7596 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7597 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7598 } 7599 7600 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7601 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7602 } 7603 7604 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7605 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7606 } 7607 7608 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7609 uint64_t BasicInstType, 7610 bool SkipDstVcc, 7611 bool SkipSrcVcc) { 7612 using namespace llvm::AMDGPU::SDWA; 7613 7614 OptionalImmIndexMap OptionalIdx; 7615 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7616 bool SkippedVcc = false; 7617 7618 unsigned I = 1; 7619 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7620 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7621 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7622 } 7623 7624 for (unsigned E = Operands.size(); I != E; ++I) { 7625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7626 if (SkipVcc && !SkippedVcc && Op.isReg() && 7627 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7628 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7629 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7630 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7631 // Skip VCC only if we didn't skip it on previous iteration. 7632 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7633 if (BasicInstType == SIInstrFlags::VOP2 && 7634 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7635 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7636 SkippedVcc = true; 7637 continue; 7638 } else if (BasicInstType == SIInstrFlags::VOPC && 7639 Inst.getNumOperands() == 0) { 7640 SkippedVcc = true; 7641 continue; 7642 } 7643 } 7644 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7645 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7646 } else if (Op.isImm()) { 7647 // Handle optional arguments 7648 OptionalIdx[Op.getImmTy()] = I; 7649 } else { 7650 llvm_unreachable("Invalid operand type"); 7651 } 7652 SkippedVcc = false; 7653 } 7654 7655 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7656 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7657 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7658 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7659 switch (BasicInstType) { 7660 case SIInstrFlags::VOP1: 7661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7662 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7664 } 7665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7668 break; 7669 7670 case SIInstrFlags::VOP2: 7671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7672 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7674 } 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7679 break; 7680 7681 case SIInstrFlags::VOPC: 7682 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7686 break; 7687 7688 default: 7689 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7690 } 7691 } 7692 7693 // special case v_mac_{f16, f32}: 7694 // it has src2 register operand that is tied to dst operand 7695 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7696 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7697 auto it = Inst.begin(); 7698 std::advance( 7699 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7700 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7701 } 7702 } 7703 7704 //===----------------------------------------------------------------------===// 7705 // mAI 7706 //===----------------------------------------------------------------------===// 7707 7708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7709 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7710 } 7711 7712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7713 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7714 } 7715 7716 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7717 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7718 } 7719 7720 /// Force static initialization. 7721 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7722 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7723 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7724 } 7725 7726 #define GET_REGISTER_MATCHER 7727 #define GET_MATCHER_IMPLEMENTATION 7728 #define GET_MNEMONIC_SPELL_CHECKER 7729 #define GET_MNEMONIC_CHECKER 7730 #include "AMDGPUGenAsmMatcher.inc" 7731 7732 // This fuction should be defined after auto-generated include so that we have 7733 // MatchClassKind enum defined 7734 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7735 unsigned Kind) { 7736 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7737 // But MatchInstructionImpl() expects to meet token and fails to validate 7738 // operand. This method checks if we are given immediate operand but expect to 7739 // get corresponding token. 7740 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7741 switch (Kind) { 7742 case MCK_addr64: 7743 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7744 case MCK_gds: 7745 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7746 case MCK_lds: 7747 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7748 case MCK_glc: 7749 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7750 case MCK_idxen: 7751 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7752 case MCK_offen: 7753 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7754 case MCK_SSrcB32: 7755 // When operands have expression values, they will return true for isToken, 7756 // because it is not possible to distinguish between a token and an 7757 // expression at parse time. MatchInstructionImpl() will always try to 7758 // match an operand as a token, when isToken returns true, and when the 7759 // name of the expression is not a valid token, the match will fail, 7760 // so we need to handle it here. 7761 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7762 case MCK_SSrcF32: 7763 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7764 case MCK_SoppBrTarget: 7765 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7766 case MCK_VReg32OrOff: 7767 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7768 case MCK_InterpSlot: 7769 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7770 case MCK_Attr: 7771 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7772 case MCK_AttrChan: 7773 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7774 case MCK_ImmSMEMOffset: 7775 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7776 case MCK_SReg_64: 7777 case MCK_SReg_64_XEXEC: 7778 // Null is defined as a 32-bit register but 7779 // it should also be enabled with 64-bit operands. 7780 // The following code enables it for SReg_64 operands 7781 // used as source and destination. Remaining source 7782 // operands are handled in isInlinableImm. 7783 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7784 default: 7785 return Match_InvalidOperand; 7786 } 7787 } 7788 7789 //===----------------------------------------------------------------------===// 7790 // endpgm 7791 //===----------------------------------------------------------------------===// 7792 7793 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7794 SMLoc S = Parser.getTok().getLoc(); 7795 int64_t Imm = 0; 7796 7797 if (!parseExpr(Imm)) { 7798 // The operand is optional, if not present default to 0 7799 Imm = 0; 7800 } 7801 7802 if (!isUInt<16>(Imm)) { 7803 Error(S, "expected a 16-bit value"); 7804 return MatchOperand_ParseFail; 7805 } 7806 7807 Operands.push_back( 7808 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7809 return MatchOperand_Success; 7810 } 7811 7812 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7813