1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 enum ImmKindTy { 192 ImmKindTyNone, 193 ImmKindTyLiteral, 194 ImmKindTyConst, 195 }; 196 197 private: 198 struct TokOp { 199 const char *Data; 200 unsigned Length; 201 }; 202 203 struct ImmOp { 204 int64_t Val; 205 ImmTy Type; 206 bool IsFPImm; 207 mutable ImmKindTy Kind; 208 Modifiers Mods; 209 }; 210 211 struct RegOp { 212 unsigned RegNo; 213 Modifiers Mods; 214 }; 215 216 union { 217 TokOp Tok; 218 ImmOp Imm; 219 RegOp Reg; 220 const MCExpr *Expr; 221 }; 222 223 public: 224 bool isToken() const override { 225 if (Kind == Token) 226 return true; 227 228 // When parsing operands, we can't always tell if something was meant to be 229 // a token, like 'gds', or an expression that references a global variable. 230 // In this case, we assume the string is an expression, and if we need to 231 // interpret is a token, then we treat the symbol name as the token. 232 return isSymbolRefExpr(); 233 } 234 235 bool isSymbolRefExpr() const { 236 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 237 } 238 239 bool isImm() const override { 240 return Kind == Immediate; 241 } 242 243 void setImmKindNone() const { 244 assert(isImm()); 245 Imm.Kind = ImmKindTyNone; 246 } 247 248 void setImmKindLiteral() const { 249 assert(isImm()); 250 Imm.Kind = ImmKindTyLiteral; 251 } 252 253 void setImmKindConst() const { 254 assert(isImm()); 255 Imm.Kind = ImmKindTyConst; 256 } 257 258 bool IsImmKindLiteral() const { 259 return isImm() && Imm.Kind == ImmKindTyLiteral; 260 } 261 262 bool isImmKindConst() const { 263 return isImm() && Imm.Kind == ImmKindTyConst; 264 } 265 266 bool isInlinableImm(MVT type) const; 267 bool isLiteralImm(MVT type) const; 268 269 bool isRegKind() const { 270 return Kind == Register; 271 } 272 273 bool isReg() const override { 274 return isRegKind() && !hasModifiers(); 275 } 276 277 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 278 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 279 } 280 281 bool isRegOrImmWithInt16InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 283 } 284 285 bool isRegOrImmWithInt32InputMods() const { 286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 287 } 288 289 bool isRegOrImmWithInt64InputMods() const { 290 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 291 } 292 293 bool isRegOrImmWithFP16InputMods() const { 294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 295 } 296 297 bool isRegOrImmWithFP32InputMods() const { 298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 299 } 300 301 bool isRegOrImmWithFP64InputMods() const { 302 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 303 } 304 305 bool isVReg() const { 306 return isRegClass(AMDGPU::VGPR_32RegClassID) || 307 isRegClass(AMDGPU::VReg_64RegClassID) || 308 isRegClass(AMDGPU::VReg_96RegClassID) || 309 isRegClass(AMDGPU::VReg_128RegClassID) || 310 isRegClass(AMDGPU::VReg_160RegClassID) || 311 isRegClass(AMDGPU::VReg_192RegClassID) || 312 isRegClass(AMDGPU::VReg_256RegClassID) || 313 isRegClass(AMDGPU::VReg_512RegClassID) || 314 isRegClass(AMDGPU::VReg_1024RegClassID); 315 } 316 317 bool isVReg32() const { 318 return isRegClass(AMDGPU::VGPR_32RegClassID); 319 } 320 321 bool isVReg32OrOff() const { 322 return isOff() || isVReg32(); 323 } 324 325 bool isNull() const { 326 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 327 } 328 329 bool isSDWAOperand(MVT type) const; 330 bool isSDWAFP16Operand() const; 331 bool isSDWAFP32Operand() const; 332 bool isSDWAInt16Operand() const; 333 bool isSDWAInt32Operand() const; 334 335 bool isImmTy(ImmTy ImmT) const { 336 return isImm() && Imm.Type == ImmT; 337 } 338 339 bool isImmModifier() const { 340 return isImm() && Imm.Type != ImmTyNone; 341 } 342 343 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 344 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 345 bool isDMask() const { return isImmTy(ImmTyDMask); } 346 bool isDim() const { return isImmTy(ImmTyDim); } 347 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 348 bool isDA() const { return isImmTy(ImmTyDA); } 349 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 350 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 351 bool isLWE() const { return isImmTy(ImmTyLWE); } 352 bool isOff() const { return isImmTy(ImmTyOff); } 353 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 354 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 355 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 356 bool isOffen() const { return isImmTy(ImmTyOffen); } 357 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 358 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 359 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 360 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 361 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 362 363 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 364 bool isGDS() const { return isImmTy(ImmTyGDS); } 365 bool isLDS() const { return isImmTy(ImmTyLDS); } 366 bool isDLC() const { return isImmTy(ImmTyDLC); } 367 bool isGLC() const { return isImmTy(ImmTyGLC); } 368 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 369 // value of the GLC operand. 370 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 371 bool isSLC() const { return isImmTy(ImmTySLC); } 372 bool isSWZ() const { return isImmTy(ImmTySWZ); } 373 bool isTFE() const { return isImmTy(ImmTyTFE); } 374 bool isD16() const { return isImmTy(ImmTyD16); } 375 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 376 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 377 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 378 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 379 bool isFI() const { return isImmTy(ImmTyDppFi); } 380 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 381 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 382 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 383 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 384 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 385 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 386 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 387 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 388 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 389 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 390 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 391 bool isHigh() const { return isImmTy(ImmTyHigh); } 392 393 bool isMod() const { 394 return isClampSI() || isOModSI(); 395 } 396 397 bool isRegOrImm() const { 398 return isReg() || isImm(); 399 } 400 401 bool isRegClass(unsigned RCID) const; 402 403 bool isInlineValue() const; 404 405 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 406 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 407 } 408 409 bool isSCSrcB16() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 411 } 412 413 bool isSCSrcV2B16() const { 414 return isSCSrcB16(); 415 } 416 417 bool isSCSrcB32() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 419 } 420 421 bool isSCSrcB64() const { 422 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 423 } 424 425 bool isBoolReg() const; 426 427 bool isSCSrcF16() const { 428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 429 } 430 431 bool isSCSrcV2F16() const { 432 return isSCSrcF16(); 433 } 434 435 bool isSCSrcF32() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 437 } 438 439 bool isSCSrcF64() const { 440 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 441 } 442 443 bool isSSrcB32() const { 444 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 445 } 446 447 bool isSSrcB16() const { 448 return isSCSrcB16() || isLiteralImm(MVT::i16); 449 } 450 451 bool isSSrcV2B16() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcB16(); 454 } 455 456 bool isSSrcB64() const { 457 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 458 // See isVSrc64(). 459 return isSCSrcB64() || isLiteralImm(MVT::i64); 460 } 461 462 bool isSSrcF32() const { 463 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isSSrcF64() const { 467 return isSCSrcB64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isSSrcF16() const { 471 return isSCSrcB16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isSSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isSSrcF16(); 477 } 478 479 bool isSSrcOrLdsB32() const { 480 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 481 isLiteralImm(MVT::i32) || isExpr(); 482 } 483 484 bool isVCSrcB32() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 486 } 487 488 bool isVCSrcB64() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 490 } 491 492 bool isVCSrcB16() const { 493 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 494 } 495 496 bool isVCSrcV2B16() const { 497 return isVCSrcB16(); 498 } 499 500 bool isVCSrcF32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 502 } 503 504 bool isVCSrcF64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 506 } 507 508 bool isVCSrcF16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 510 } 511 512 bool isVCSrcV2F16() const { 513 return isVCSrcF16(); 514 } 515 516 bool isVSrcB32() const { 517 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 518 } 519 520 bool isVSrcB64() const { 521 return isVCSrcF64() || isLiteralImm(MVT::i64); 522 } 523 524 bool isVSrcB16() const { 525 return isVCSrcB16() || isLiteralImm(MVT::i16); 526 } 527 528 bool isVSrcV2B16() const { 529 return isVSrcB16() || isLiteralImm(MVT::v2i16); 530 } 531 532 bool isVSrcF32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 534 } 535 536 bool isVSrcF64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::f64); 538 } 539 540 bool isVSrcF16() const { 541 return isVCSrcF16() || isLiteralImm(MVT::f16); 542 } 543 544 bool isVSrcV2F16() const { 545 return isVSrcF16() || isLiteralImm(MVT::v2f16); 546 } 547 548 bool isVISrcB32() const { 549 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 550 } 551 552 bool isVISrcB16() const { 553 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 554 } 555 556 bool isVISrcV2B16() const { 557 return isVISrcB16(); 558 } 559 560 bool isVISrcF32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 562 } 563 564 bool isVISrcF16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 566 } 567 568 bool isVISrcV2F16() const { 569 return isVISrcF16() || isVISrcB32(); 570 } 571 572 bool isAISrcB32() const { 573 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 574 } 575 576 bool isAISrcB16() const { 577 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 578 } 579 580 bool isAISrcV2B16() const { 581 return isAISrcB16(); 582 } 583 584 bool isAISrcF32() const { 585 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 586 } 587 588 bool isAISrcF16() const { 589 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 590 } 591 592 bool isAISrcV2F16() const { 593 return isAISrcF16() || isAISrcB32(); 594 } 595 596 bool isAISrc_128B32() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 598 } 599 600 bool isAISrc_128B16() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 602 } 603 604 bool isAISrc_128V2B16() const { 605 return isAISrc_128B16(); 606 } 607 608 bool isAISrc_128F32() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 610 } 611 612 bool isAISrc_128F16() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 614 } 615 616 bool isAISrc_128V2F16() const { 617 return isAISrc_128F16() || isAISrc_128B32(); 618 } 619 620 bool isAISrc_512B32() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 622 } 623 624 bool isAISrc_512B16() const { 625 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 626 } 627 628 bool isAISrc_512V2B16() const { 629 return isAISrc_512B16(); 630 } 631 632 bool isAISrc_512F32() const { 633 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 634 } 635 636 bool isAISrc_512F16() const { 637 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 638 } 639 640 bool isAISrc_512V2F16() const { 641 return isAISrc_512F16() || isAISrc_512B32(); 642 } 643 644 bool isAISrc_1024B32() const { 645 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 646 } 647 648 bool isAISrc_1024B16() const { 649 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 650 } 651 652 bool isAISrc_1024V2B16() const { 653 return isAISrc_1024B16(); 654 } 655 656 bool isAISrc_1024F32() const { 657 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 658 } 659 660 bool isAISrc_1024F16() const { 661 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 662 } 663 664 bool isAISrc_1024V2F16() const { 665 return isAISrc_1024F16() || isAISrc_1024B32(); 666 } 667 668 bool isKImmFP32() const { 669 return isLiteralImm(MVT::f32); 670 } 671 672 bool isKImmFP16() const { 673 return isLiteralImm(MVT::f16); 674 } 675 676 bool isMem() const override { 677 return false; 678 } 679 680 bool isExpr() const { 681 return Kind == Expression; 682 } 683 684 bool isSoppBrTarget() const { 685 return isExpr() || isImm(); 686 } 687 688 bool isSWaitCnt() const; 689 bool isHwreg() const; 690 bool isSendMsg() const; 691 bool isSwizzle() const; 692 bool isSMRDOffset8() const; 693 bool isSMEMOffset() const; 694 bool isSMRDLiteralOffset() const; 695 bool isDPP8() const; 696 bool isDPPCtrl() const; 697 bool isBLGP() const; 698 bool isCBSZ() const; 699 bool isABID() const; 700 bool isGPRIdxMode() const; 701 bool isS16Imm() const; 702 bool isU16Imm() const; 703 bool isEndpgm() const; 704 705 StringRef getExpressionAsToken() const { 706 assert(isExpr()); 707 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 708 return S->getSymbol().getName(); 709 } 710 711 StringRef getToken() const { 712 assert(isToken()); 713 714 if (Kind == Expression) 715 return getExpressionAsToken(); 716 717 return StringRef(Tok.Data, Tok.Length); 718 } 719 720 int64_t getImm() const { 721 assert(isImm()); 722 return Imm.Val; 723 } 724 725 void setImm(int64_t Val) { 726 assert(isImm()); 727 Imm.Val = Val; 728 } 729 730 ImmTy getImmTy() const { 731 assert(isImm()); 732 return Imm.Type; 733 } 734 735 unsigned getReg() const override { 736 assert(isRegKind()); 737 return Reg.RegNo; 738 } 739 740 SMLoc getStartLoc() const override { 741 return StartLoc; 742 } 743 744 SMLoc getEndLoc() const override { 745 return EndLoc; 746 } 747 748 SMRange getLocRange() const { 749 return SMRange(StartLoc, EndLoc); 750 } 751 752 Modifiers getModifiers() const { 753 assert(isRegKind() || isImmTy(ImmTyNone)); 754 return isRegKind() ? Reg.Mods : Imm.Mods; 755 } 756 757 void setModifiers(Modifiers Mods) { 758 assert(isRegKind() || isImmTy(ImmTyNone)); 759 if (isRegKind()) 760 Reg.Mods = Mods; 761 else 762 Imm.Mods = Mods; 763 } 764 765 bool hasModifiers() const { 766 return getModifiers().hasModifiers(); 767 } 768 769 bool hasFPModifiers() const { 770 return getModifiers().hasFPModifiers(); 771 } 772 773 bool hasIntModifiers() const { 774 return getModifiers().hasIntModifiers(); 775 } 776 777 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 778 779 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 780 781 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 782 783 template <unsigned Bitwidth> 784 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 785 786 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 787 addKImmFPOperands<16>(Inst, N); 788 } 789 790 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 791 addKImmFPOperands<32>(Inst, N); 792 } 793 794 void addRegOperands(MCInst &Inst, unsigned N) const; 795 796 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 797 addRegOperands(Inst, N); 798 } 799 800 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 801 if (isRegKind()) 802 addRegOperands(Inst, N); 803 else if (isExpr()) 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 else 806 addImmOperands(Inst, N); 807 } 808 809 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 810 Modifiers Mods = getModifiers(); 811 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 812 if (isRegKind()) { 813 addRegOperands(Inst, N); 814 } else { 815 addImmOperands(Inst, N, false); 816 } 817 } 818 819 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 820 assert(!hasIntModifiers()); 821 addRegOrImmWithInputModsOperands(Inst, N); 822 } 823 824 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 825 assert(!hasFPModifiers()); 826 addRegOrImmWithInputModsOperands(Inst, N); 827 } 828 829 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 830 Modifiers Mods = getModifiers(); 831 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 832 assert(isRegKind()); 833 addRegOperands(Inst, N); 834 } 835 836 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 837 assert(!hasIntModifiers()); 838 addRegWithInputModsOperands(Inst, N); 839 } 840 841 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 842 assert(!hasFPModifiers()); 843 addRegWithInputModsOperands(Inst, N); 844 } 845 846 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 847 if (isImm()) 848 addImmOperands(Inst, N); 849 else { 850 assert(isExpr()); 851 Inst.addOperand(MCOperand::createExpr(Expr)); 852 } 853 } 854 855 static void printImmTy(raw_ostream& OS, ImmTy Type) { 856 switch (Type) { 857 case ImmTyNone: OS << "None"; break; 858 case ImmTyGDS: OS << "GDS"; break; 859 case ImmTyLDS: OS << "LDS"; break; 860 case ImmTyOffen: OS << "Offen"; break; 861 case ImmTyIdxen: OS << "Idxen"; break; 862 case ImmTyAddr64: OS << "Addr64"; break; 863 case ImmTyOffset: OS << "Offset"; break; 864 case ImmTyInstOffset: OS << "InstOffset"; break; 865 case ImmTyOffset0: OS << "Offset0"; break; 866 case ImmTyOffset1: OS << "Offset1"; break; 867 case ImmTyDLC: OS << "DLC"; break; 868 case ImmTyGLC: OS << "GLC"; break; 869 case ImmTySLC: OS << "SLC"; break; 870 case ImmTySWZ: OS << "SWZ"; break; 871 case ImmTyTFE: OS << "TFE"; break; 872 case ImmTyD16: OS << "D16"; break; 873 case ImmTyFORMAT: OS << "FORMAT"; break; 874 case ImmTyClampSI: OS << "ClampSI"; break; 875 case ImmTyOModSI: OS << "OModSI"; break; 876 case ImmTyDPP8: OS << "DPP8"; break; 877 case ImmTyDppCtrl: OS << "DppCtrl"; break; 878 case ImmTyDppRowMask: OS << "DppRowMask"; break; 879 case ImmTyDppBankMask: OS << "DppBankMask"; break; 880 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 881 case ImmTyDppFi: OS << "FI"; break; 882 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 883 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 884 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 885 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 886 case ImmTyDMask: OS << "DMask"; break; 887 case ImmTyDim: OS << "Dim"; break; 888 case ImmTyUNorm: OS << "UNorm"; break; 889 case ImmTyDA: OS << "DA"; break; 890 case ImmTyR128A16: OS << "R128A16"; break; 891 case ImmTyA16: OS << "A16"; break; 892 case ImmTyLWE: OS << "LWE"; break; 893 case ImmTyOff: OS << "Off"; break; 894 case ImmTyExpTgt: OS << "ExpTgt"; break; 895 case ImmTyExpCompr: OS << "ExpCompr"; break; 896 case ImmTyExpVM: OS << "ExpVM"; break; 897 case ImmTyHwreg: OS << "Hwreg"; break; 898 case ImmTySendMsg: OS << "SendMsg"; break; 899 case ImmTyInterpSlot: OS << "InterpSlot"; break; 900 case ImmTyInterpAttr: OS << "InterpAttr"; break; 901 case ImmTyAttrChan: OS << "AttrChan"; break; 902 case ImmTyOpSel: OS << "OpSel"; break; 903 case ImmTyOpSelHi: OS << "OpSelHi"; break; 904 case ImmTyNegLo: OS << "NegLo"; break; 905 case ImmTyNegHi: OS << "NegHi"; break; 906 case ImmTySwizzle: OS << "Swizzle"; break; 907 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 908 case ImmTyHigh: OS << "High"; break; 909 case ImmTyBLGP: OS << "BLGP"; break; 910 case ImmTyCBSZ: OS << "CBSZ"; break; 911 case ImmTyABID: OS << "ABID"; break; 912 case ImmTyEndpgm: OS << "Endpgm"; break; 913 } 914 } 915 916 void print(raw_ostream &OS) const override { 917 switch (Kind) { 918 case Register: 919 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 920 break; 921 case Immediate: 922 OS << '<' << getImm(); 923 if (getImmTy() != ImmTyNone) { 924 OS << " type: "; printImmTy(OS, getImmTy()); 925 } 926 OS << " mods: " << Imm.Mods << '>'; 927 break; 928 case Token: 929 OS << '\'' << getToken() << '\''; 930 break; 931 case Expression: 932 OS << "<expr " << *Expr << '>'; 933 break; 934 } 935 } 936 937 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 938 int64_t Val, SMLoc Loc, 939 ImmTy Type = ImmTyNone, 940 bool IsFPImm = false) { 941 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 942 Op->Imm.Val = Val; 943 Op->Imm.IsFPImm = IsFPImm; 944 Op->Imm.Kind = ImmKindTyNone; 945 Op->Imm.Type = Type; 946 Op->Imm.Mods = Modifiers(); 947 Op->StartLoc = Loc; 948 Op->EndLoc = Loc; 949 return Op; 950 } 951 952 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 953 StringRef Str, SMLoc Loc, 954 bool HasExplicitEncodingSize = true) { 955 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 956 Res->Tok.Data = Str.data(); 957 Res->Tok.Length = Str.size(); 958 Res->StartLoc = Loc; 959 Res->EndLoc = Loc; 960 return Res; 961 } 962 963 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 964 unsigned RegNo, SMLoc S, 965 SMLoc E) { 966 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 967 Op->Reg.RegNo = RegNo; 968 Op->Reg.Mods = Modifiers(); 969 Op->StartLoc = S; 970 Op->EndLoc = E; 971 return Op; 972 } 973 974 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 975 const class MCExpr *Expr, SMLoc S) { 976 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 977 Op->Expr = Expr; 978 Op->StartLoc = S; 979 Op->EndLoc = S; 980 return Op; 981 } 982 }; 983 984 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 985 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 986 return OS; 987 } 988 989 //===----------------------------------------------------------------------===// 990 // AsmParser 991 //===----------------------------------------------------------------------===// 992 993 // Holds info related to the current kernel, e.g. count of SGPRs used. 994 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 995 // .amdgpu_hsa_kernel or at EOF. 996 class KernelScopeInfo { 997 int SgprIndexUnusedMin = -1; 998 int VgprIndexUnusedMin = -1; 999 MCContext *Ctx = nullptr; 1000 1001 void usesSgprAt(int i) { 1002 if (i >= SgprIndexUnusedMin) { 1003 SgprIndexUnusedMin = ++i; 1004 if (Ctx) { 1005 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1006 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1007 } 1008 } 1009 } 1010 1011 void usesVgprAt(int i) { 1012 if (i >= VgprIndexUnusedMin) { 1013 VgprIndexUnusedMin = ++i; 1014 if (Ctx) { 1015 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1016 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1017 } 1018 } 1019 } 1020 1021 public: 1022 KernelScopeInfo() = default; 1023 1024 void initialize(MCContext &Context) { 1025 Ctx = &Context; 1026 usesSgprAt(SgprIndexUnusedMin = -1); 1027 usesVgprAt(VgprIndexUnusedMin = -1); 1028 } 1029 1030 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1031 switch (RegKind) { 1032 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1033 case IS_AGPR: // fall through 1034 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1035 default: break; 1036 } 1037 } 1038 }; 1039 1040 class AMDGPUAsmParser : public MCTargetAsmParser { 1041 MCAsmParser &Parser; 1042 1043 // Number of extra operands parsed after the first optional operand. 1044 // This may be necessary to skip hardcoded mandatory operands. 1045 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1046 1047 unsigned ForcedEncodingSize = 0; 1048 bool ForcedDPP = false; 1049 bool ForcedSDWA = false; 1050 KernelScopeInfo KernelScope; 1051 1052 /// @name Auto-generated Match Functions 1053 /// { 1054 1055 #define GET_ASSEMBLER_HEADER 1056 #include "AMDGPUGenAsmMatcher.inc" 1057 1058 /// } 1059 1060 private: 1061 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1062 bool OutOfRangeError(SMRange Range); 1063 /// Calculate VGPR/SGPR blocks required for given target, reserved 1064 /// registers, and user-specified NextFreeXGPR values. 1065 /// 1066 /// \param Features [in] Target features, used for bug corrections. 1067 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1068 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1069 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1070 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1071 /// descriptor field, if valid. 1072 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1073 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1074 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1075 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1076 /// \param VGPRBlocks [out] Result VGPR block count. 1077 /// \param SGPRBlocks [out] Result SGPR block count. 1078 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1079 bool FlatScrUsed, bool XNACKUsed, 1080 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1081 SMRange VGPRRange, unsigned NextFreeSGPR, 1082 SMRange SGPRRange, unsigned &VGPRBlocks, 1083 unsigned &SGPRBlocks); 1084 bool ParseDirectiveAMDGCNTarget(); 1085 bool ParseDirectiveAMDHSAKernel(); 1086 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1087 bool ParseDirectiveHSACodeObjectVersion(); 1088 bool ParseDirectiveHSACodeObjectISA(); 1089 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1090 bool ParseDirectiveAMDKernelCodeT(); 1091 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1092 bool ParseDirectiveAMDGPUHsaKernel(); 1093 1094 bool ParseDirectiveISAVersion(); 1095 bool ParseDirectiveHSAMetadata(); 1096 bool ParseDirectivePALMetadataBegin(); 1097 bool ParseDirectivePALMetadata(); 1098 bool ParseDirectiveAMDGPULDS(); 1099 1100 /// Common code to parse out a block of text (typically YAML) between start and 1101 /// end directives. 1102 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1103 const char *AssemblerDirectiveEnd, 1104 std::string &CollectString); 1105 1106 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1107 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1108 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1109 unsigned &RegNum, unsigned &RegWidth, 1110 bool RestoreOnFailure = false); 1111 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1112 unsigned &RegNum, unsigned &RegWidth, 1113 SmallVectorImpl<AsmToken> &Tokens); 1114 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1115 unsigned &RegWidth, 1116 SmallVectorImpl<AsmToken> &Tokens); 1117 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1118 unsigned &RegWidth, 1119 SmallVectorImpl<AsmToken> &Tokens); 1120 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1121 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1122 bool ParseRegRange(unsigned& Num, unsigned& Width); 1123 unsigned getRegularReg(RegisterKind RegKind, 1124 unsigned RegNum, 1125 unsigned RegWidth, 1126 SMLoc Loc); 1127 1128 bool isRegister(); 1129 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1130 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1131 void initializeGprCountSymbol(RegisterKind RegKind); 1132 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1133 unsigned RegWidth); 1134 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1135 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1136 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1137 bool IsGdsHardcoded); 1138 1139 public: 1140 enum AMDGPUMatchResultTy { 1141 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1142 }; 1143 enum OperandMode { 1144 OperandMode_Default, 1145 OperandMode_NSA, 1146 }; 1147 1148 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1149 1150 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1151 const MCInstrInfo &MII, 1152 const MCTargetOptions &Options) 1153 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1154 MCAsmParserExtension::Initialize(Parser); 1155 1156 if (getFeatureBits().none()) { 1157 // Set default features. 1158 copySTI().ToggleFeature("southern-islands"); 1159 } 1160 1161 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1162 1163 { 1164 // TODO: make those pre-defined variables read-only. 1165 // Currently there is none suitable machinery in the core llvm-mc for this. 1166 // MCSymbol::isRedefinable is intended for another purpose, and 1167 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1168 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1169 MCContext &Ctx = getContext(); 1170 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1171 MCSymbol *Sym = 1172 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1173 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1174 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1175 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1176 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1177 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1178 } else { 1179 MCSymbol *Sym = 1180 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1181 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1182 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1183 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1184 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1185 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1186 } 1187 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1188 initializeGprCountSymbol(IS_VGPR); 1189 initializeGprCountSymbol(IS_SGPR); 1190 } else 1191 KernelScope.initialize(getContext()); 1192 } 1193 } 1194 1195 bool hasXNACK() const { 1196 return AMDGPU::hasXNACK(getSTI()); 1197 } 1198 1199 bool hasMIMG_R128() const { 1200 return AMDGPU::hasMIMG_R128(getSTI()); 1201 } 1202 1203 bool hasPackedD16() const { 1204 return AMDGPU::hasPackedD16(getSTI()); 1205 } 1206 1207 bool hasGFX10A16() const { 1208 return AMDGPU::hasGFX10A16(getSTI()); 1209 } 1210 1211 bool isSI() const { 1212 return AMDGPU::isSI(getSTI()); 1213 } 1214 1215 bool isCI() const { 1216 return AMDGPU::isCI(getSTI()); 1217 } 1218 1219 bool isVI() const { 1220 return AMDGPU::isVI(getSTI()); 1221 } 1222 1223 bool isGFX9() const { 1224 return AMDGPU::isGFX9(getSTI()); 1225 } 1226 1227 bool isGFX9Plus() const { 1228 return AMDGPU::isGFX9Plus(getSTI()); 1229 } 1230 1231 bool isGFX10() const { 1232 return AMDGPU::isGFX10(getSTI()); 1233 } 1234 1235 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1236 1237 bool isGFX10_BEncoding() const { 1238 return AMDGPU::isGFX10_BEncoding(getSTI()); 1239 } 1240 1241 bool hasInv2PiInlineImm() const { 1242 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1243 } 1244 1245 bool hasFlatOffsets() const { 1246 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1247 } 1248 1249 bool hasSGPR102_SGPR103() const { 1250 return !isVI() && !isGFX9(); 1251 } 1252 1253 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1254 1255 bool hasIntClamp() const { 1256 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1257 } 1258 1259 AMDGPUTargetStreamer &getTargetStreamer() { 1260 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1261 return static_cast<AMDGPUTargetStreamer &>(TS); 1262 } 1263 1264 const MCRegisterInfo *getMRI() const { 1265 // We need this const_cast because for some reason getContext() is not const 1266 // in MCAsmParser. 1267 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1268 } 1269 1270 const MCInstrInfo *getMII() const { 1271 return &MII; 1272 } 1273 1274 const FeatureBitset &getFeatureBits() const { 1275 return getSTI().getFeatureBits(); 1276 } 1277 1278 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1279 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1280 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1281 1282 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1283 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1284 bool isForcedDPP() const { return ForcedDPP; } 1285 bool isForcedSDWA() const { return ForcedSDWA; } 1286 ArrayRef<unsigned> getMatchedVariants() const; 1287 StringRef getMatchedVariantName() const; 1288 1289 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1290 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1291 bool RestoreOnFailure); 1292 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1293 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1294 SMLoc &EndLoc) override; 1295 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1296 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1297 unsigned Kind) override; 1298 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1299 OperandVector &Operands, MCStreamer &Out, 1300 uint64_t &ErrorInfo, 1301 bool MatchingInlineAsm) override; 1302 bool ParseDirective(AsmToken DirectiveID) override; 1303 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1304 OperandMode Mode = OperandMode_Default); 1305 StringRef parseMnemonicSuffix(StringRef Name); 1306 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1307 SMLoc NameLoc, OperandVector &Operands) override; 1308 //bool ProcessInstruction(MCInst &Inst); 1309 1310 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1311 1312 OperandMatchResultTy 1313 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1314 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1315 bool (*ConvertResult)(int64_t &) = nullptr); 1316 1317 OperandMatchResultTy 1318 parseOperandArrayWithPrefix(const char *Prefix, 1319 OperandVector &Operands, 1320 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1321 bool (*ConvertResult)(int64_t&) = nullptr); 1322 1323 OperandMatchResultTy 1324 parseNamedBit(const char *Name, OperandVector &Operands, 1325 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1326 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1327 StringRef &Value); 1328 1329 bool isModifier(); 1330 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1331 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1332 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1333 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1334 bool parseSP3NegModifier(); 1335 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1336 OperandMatchResultTy parseReg(OperandVector &Operands); 1337 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1338 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1339 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1340 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1341 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1342 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1343 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1344 OperandMatchResultTy parseUfmt(int64_t &Format); 1345 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1346 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1347 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1348 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1349 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1350 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1351 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1352 1353 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1354 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1355 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1356 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1357 1358 bool parseCnt(int64_t &IntVal); 1359 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1360 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1361 1362 private: 1363 struct OperandInfoTy { 1364 SMLoc Loc; 1365 int64_t Id; 1366 bool IsSymbolic = false; 1367 bool IsDefined = false; 1368 1369 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1370 }; 1371 1372 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1373 bool validateSendMsg(const OperandInfoTy &Msg, 1374 const OperandInfoTy &Op, 1375 const OperandInfoTy &Stream); 1376 1377 bool parseHwregBody(OperandInfoTy &HwReg, 1378 OperandInfoTy &Offset, 1379 OperandInfoTy &Width); 1380 bool validateHwreg(const OperandInfoTy &HwReg, 1381 const OperandInfoTy &Offset, 1382 const OperandInfoTy &Width); 1383 1384 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1385 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1386 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1387 1388 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1389 const OperandVector &Operands) const; 1390 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1391 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1392 SMLoc getLitLoc(const OperandVector &Operands) const; 1393 SMLoc getConstLoc(const OperandVector &Operands) const; 1394 1395 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1396 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1397 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1398 bool validateSOPLiteral(const MCInst &Inst) const; 1399 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1400 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1401 bool validateIntClampSupported(const MCInst &Inst); 1402 bool validateMIMGAtomicDMask(const MCInst &Inst); 1403 bool validateMIMGGatherDMask(const MCInst &Inst); 1404 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1405 bool validateMIMGDataSize(const MCInst &Inst); 1406 bool validateMIMGAddrSize(const MCInst &Inst); 1407 bool validateMIMGD16(const MCInst &Inst); 1408 bool validateMIMGDim(const MCInst &Inst); 1409 bool validateLdsDirect(const MCInst &Inst); 1410 bool validateOpSel(const MCInst &Inst); 1411 bool validateVccOperand(unsigned Reg) const; 1412 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1413 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1414 bool validateDivScale(const MCInst &Inst); 1415 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1416 const SMLoc &IDLoc); 1417 unsigned getConstantBusLimit(unsigned Opcode) const; 1418 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1419 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1420 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1421 1422 bool isSupportedMnemo(StringRef Mnemo, 1423 const FeatureBitset &FBS); 1424 bool isSupportedMnemo(StringRef Mnemo, 1425 const FeatureBitset &FBS, 1426 ArrayRef<unsigned> Variants); 1427 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1428 1429 bool isId(const StringRef Id) const; 1430 bool isId(const AsmToken &Token, const StringRef Id) const; 1431 bool isToken(const AsmToken::TokenKind Kind) const; 1432 bool trySkipId(const StringRef Id); 1433 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1434 bool trySkipToken(const AsmToken::TokenKind Kind); 1435 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1436 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1437 bool parseId(StringRef &Val, const StringRef ErrMsg); 1438 1439 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1440 AsmToken::TokenKind getTokenKind() const; 1441 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1442 bool parseExpr(OperandVector &Operands); 1443 StringRef getTokenStr() const; 1444 AsmToken peekToken(); 1445 AsmToken getToken() const; 1446 SMLoc getLoc() const; 1447 void lex(); 1448 1449 public: 1450 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1451 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1452 1453 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1454 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1455 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1456 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1457 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1458 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1459 1460 bool parseSwizzleOperand(int64_t &Op, 1461 const unsigned MinVal, 1462 const unsigned MaxVal, 1463 const StringRef ErrMsg, 1464 SMLoc &Loc); 1465 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1466 const unsigned MinVal, 1467 const unsigned MaxVal, 1468 const StringRef ErrMsg); 1469 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1470 bool parseSwizzleOffset(int64_t &Imm); 1471 bool parseSwizzleMacro(int64_t &Imm); 1472 bool parseSwizzleQuadPerm(int64_t &Imm); 1473 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1474 bool parseSwizzleBroadcast(int64_t &Imm); 1475 bool parseSwizzleSwap(int64_t &Imm); 1476 bool parseSwizzleReverse(int64_t &Imm); 1477 1478 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1479 int64_t parseGPRIdxMacro(); 1480 1481 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1482 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1483 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1484 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1485 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1486 1487 AMDGPUOperand::Ptr defaultDLC() const; 1488 AMDGPUOperand::Ptr defaultGLC() const; 1489 AMDGPUOperand::Ptr defaultGLC_1() const; 1490 AMDGPUOperand::Ptr defaultSLC() const; 1491 1492 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1493 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1494 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1495 AMDGPUOperand::Ptr defaultFlatOffset() const; 1496 1497 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1498 1499 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1500 OptionalImmIndexMap &OptionalIdx); 1501 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1502 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1503 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1504 1505 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1506 1507 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1508 bool IsAtomic = false); 1509 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1510 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1511 1512 OperandMatchResultTy parseDim(OperandVector &Operands); 1513 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1514 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1515 AMDGPUOperand::Ptr defaultRowMask() const; 1516 AMDGPUOperand::Ptr defaultBankMask() const; 1517 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1518 AMDGPUOperand::Ptr defaultFI() const; 1519 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1520 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1521 1522 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1523 AMDGPUOperand::ImmTy Type); 1524 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1525 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1526 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1527 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1528 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1529 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1530 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1531 uint64_t BasicInstType, 1532 bool SkipDstVcc = false, 1533 bool SkipSrcVcc = false); 1534 1535 AMDGPUOperand::Ptr defaultBLGP() const; 1536 AMDGPUOperand::Ptr defaultCBSZ() const; 1537 AMDGPUOperand::Ptr defaultABID() const; 1538 1539 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1540 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1541 }; 1542 1543 struct OptionalOperand { 1544 const char *Name; 1545 AMDGPUOperand::ImmTy Type; 1546 bool IsBit; 1547 bool (*ConvertResult)(int64_t&); 1548 }; 1549 1550 } // end anonymous namespace 1551 1552 // May be called with integer type with equivalent bitwidth. 1553 static const fltSemantics *getFltSemantics(unsigned Size) { 1554 switch (Size) { 1555 case 4: 1556 return &APFloat::IEEEsingle(); 1557 case 8: 1558 return &APFloat::IEEEdouble(); 1559 case 2: 1560 return &APFloat::IEEEhalf(); 1561 default: 1562 llvm_unreachable("unsupported fp type"); 1563 } 1564 } 1565 1566 static const fltSemantics *getFltSemantics(MVT VT) { 1567 return getFltSemantics(VT.getSizeInBits() / 8); 1568 } 1569 1570 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1571 switch (OperandType) { 1572 case AMDGPU::OPERAND_REG_IMM_INT32: 1573 case AMDGPU::OPERAND_REG_IMM_FP32: 1574 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1575 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1576 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1577 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1578 return &APFloat::IEEEsingle(); 1579 case AMDGPU::OPERAND_REG_IMM_INT64: 1580 case AMDGPU::OPERAND_REG_IMM_FP64: 1581 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1582 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1583 return &APFloat::IEEEdouble(); 1584 case AMDGPU::OPERAND_REG_IMM_INT16: 1585 case AMDGPU::OPERAND_REG_IMM_FP16: 1586 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1587 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1588 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1589 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1590 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1591 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1592 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1593 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1594 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1595 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1596 return &APFloat::IEEEhalf(); 1597 default: 1598 llvm_unreachable("unsupported fp type"); 1599 } 1600 } 1601 1602 //===----------------------------------------------------------------------===// 1603 // Operand 1604 //===----------------------------------------------------------------------===// 1605 1606 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1607 bool Lost; 1608 1609 // Convert literal to single precision 1610 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1611 APFloat::rmNearestTiesToEven, 1612 &Lost); 1613 // We allow precision lost but not overflow or underflow 1614 if (Status != APFloat::opOK && 1615 Lost && 1616 ((Status & APFloat::opOverflow) != 0 || 1617 (Status & APFloat::opUnderflow) != 0)) { 1618 return false; 1619 } 1620 1621 return true; 1622 } 1623 1624 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1625 return isUIntN(Size, Val) || isIntN(Size, Val); 1626 } 1627 1628 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1629 if (VT.getScalarType() == MVT::i16) { 1630 // FP immediate values are broken. 1631 return isInlinableIntLiteral(Val); 1632 } 1633 1634 // f16/v2f16 operands work correctly for all values. 1635 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1636 } 1637 1638 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1639 1640 // This is a hack to enable named inline values like 1641 // shared_base with both 32-bit and 64-bit operands. 1642 // Note that these values are defined as 1643 // 32-bit operands only. 1644 if (isInlineValue()) { 1645 return true; 1646 } 1647 1648 if (!isImmTy(ImmTyNone)) { 1649 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1650 return false; 1651 } 1652 // TODO: We should avoid using host float here. It would be better to 1653 // check the float bit values which is what a few other places do. 1654 // We've had bot failures before due to weird NaN support on mips hosts. 1655 1656 APInt Literal(64, Imm.Val); 1657 1658 if (Imm.IsFPImm) { // We got fp literal token 1659 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1660 return AMDGPU::isInlinableLiteral64(Imm.Val, 1661 AsmParser->hasInv2PiInlineImm()); 1662 } 1663 1664 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1665 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1666 return false; 1667 1668 if (type.getScalarSizeInBits() == 16) { 1669 return isInlineableLiteralOp16( 1670 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1671 type, AsmParser->hasInv2PiInlineImm()); 1672 } 1673 1674 // Check if single precision literal is inlinable 1675 return AMDGPU::isInlinableLiteral32( 1676 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1677 AsmParser->hasInv2PiInlineImm()); 1678 } 1679 1680 // We got int literal token. 1681 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1682 return AMDGPU::isInlinableLiteral64(Imm.Val, 1683 AsmParser->hasInv2PiInlineImm()); 1684 } 1685 1686 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1687 return false; 1688 } 1689 1690 if (type.getScalarSizeInBits() == 16) { 1691 return isInlineableLiteralOp16( 1692 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1693 type, AsmParser->hasInv2PiInlineImm()); 1694 } 1695 1696 return AMDGPU::isInlinableLiteral32( 1697 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1698 AsmParser->hasInv2PiInlineImm()); 1699 } 1700 1701 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1702 // Check that this immediate can be added as literal 1703 if (!isImmTy(ImmTyNone)) { 1704 return false; 1705 } 1706 1707 if (!Imm.IsFPImm) { 1708 // We got int literal token. 1709 1710 if (type == MVT::f64 && hasFPModifiers()) { 1711 // Cannot apply fp modifiers to int literals preserving the same semantics 1712 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1713 // disable these cases. 1714 return false; 1715 } 1716 1717 unsigned Size = type.getSizeInBits(); 1718 if (Size == 64) 1719 Size = 32; 1720 1721 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1722 // types. 1723 return isSafeTruncation(Imm.Val, Size); 1724 } 1725 1726 // We got fp literal token 1727 if (type == MVT::f64) { // Expected 64-bit fp operand 1728 // We would set low 64-bits of literal to zeroes but we accept this literals 1729 return true; 1730 } 1731 1732 if (type == MVT::i64) { // Expected 64-bit int operand 1733 // We don't allow fp literals in 64-bit integer instructions. It is 1734 // unclear how we should encode them. 1735 return false; 1736 } 1737 1738 // We allow fp literals with f16x2 operands assuming that the specified 1739 // literal goes into the lower half and the upper half is zero. We also 1740 // require that the literal may be losslesly converted to f16. 1741 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1742 (type == MVT::v2i16)? MVT::i16 : type; 1743 1744 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1745 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1746 } 1747 1748 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1749 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1750 } 1751 1752 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1753 if (AsmParser->isVI()) 1754 return isVReg32(); 1755 else if (AsmParser->isGFX9Plus()) 1756 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1757 else 1758 return false; 1759 } 1760 1761 bool AMDGPUOperand::isSDWAFP16Operand() const { 1762 return isSDWAOperand(MVT::f16); 1763 } 1764 1765 bool AMDGPUOperand::isSDWAFP32Operand() const { 1766 return isSDWAOperand(MVT::f32); 1767 } 1768 1769 bool AMDGPUOperand::isSDWAInt16Operand() const { 1770 return isSDWAOperand(MVT::i16); 1771 } 1772 1773 bool AMDGPUOperand::isSDWAInt32Operand() const { 1774 return isSDWAOperand(MVT::i32); 1775 } 1776 1777 bool AMDGPUOperand::isBoolReg() const { 1778 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1779 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1780 } 1781 1782 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1783 { 1784 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1785 assert(Size == 2 || Size == 4 || Size == 8); 1786 1787 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1788 1789 if (Imm.Mods.Abs) { 1790 Val &= ~FpSignMask; 1791 } 1792 if (Imm.Mods.Neg) { 1793 Val ^= FpSignMask; 1794 } 1795 1796 return Val; 1797 } 1798 1799 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1800 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1801 Inst.getNumOperands())) { 1802 addLiteralImmOperand(Inst, Imm.Val, 1803 ApplyModifiers & 1804 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1805 } else { 1806 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1807 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1808 setImmKindNone(); 1809 } 1810 } 1811 1812 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1813 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1814 auto OpNum = Inst.getNumOperands(); 1815 // Check that this operand accepts literals 1816 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1817 1818 if (ApplyModifiers) { 1819 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1820 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1821 Val = applyInputFPModifiers(Val, Size); 1822 } 1823 1824 APInt Literal(64, Val); 1825 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1826 1827 if (Imm.IsFPImm) { // We got fp literal token 1828 switch (OpTy) { 1829 case AMDGPU::OPERAND_REG_IMM_INT64: 1830 case AMDGPU::OPERAND_REG_IMM_FP64: 1831 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1832 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1833 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1834 AsmParser->hasInv2PiInlineImm())) { 1835 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1836 setImmKindConst(); 1837 return; 1838 } 1839 1840 // Non-inlineable 1841 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1842 // For fp operands we check if low 32 bits are zeros 1843 if (Literal.getLoBits(32) != 0) { 1844 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1845 "Can't encode literal as exact 64-bit floating-point operand. " 1846 "Low 32-bits will be set to zero"); 1847 } 1848 1849 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1850 setImmKindLiteral(); 1851 return; 1852 } 1853 1854 // We don't allow fp literals in 64-bit integer instructions. It is 1855 // unclear how we should encode them. This case should be checked earlier 1856 // in predicate methods (isLiteralImm()) 1857 llvm_unreachable("fp literal in 64-bit integer instruction."); 1858 1859 case AMDGPU::OPERAND_REG_IMM_INT32: 1860 case AMDGPU::OPERAND_REG_IMM_FP32: 1861 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1862 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1863 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1865 case AMDGPU::OPERAND_REG_IMM_INT16: 1866 case AMDGPU::OPERAND_REG_IMM_FP16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1868 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1869 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1870 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1871 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1872 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1873 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1874 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1875 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1876 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1877 bool lost; 1878 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1879 // Convert literal to single precision 1880 FPLiteral.convert(*getOpFltSemantics(OpTy), 1881 APFloat::rmNearestTiesToEven, &lost); 1882 // We allow precision lost but not overflow or underflow. This should be 1883 // checked earlier in isLiteralImm() 1884 1885 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1886 Inst.addOperand(MCOperand::createImm(ImmVal)); 1887 setImmKindLiteral(); 1888 return; 1889 } 1890 default: 1891 llvm_unreachable("invalid operand size"); 1892 } 1893 1894 return; 1895 } 1896 1897 // We got int literal token. 1898 // Only sign extend inline immediates. 1899 switch (OpTy) { 1900 case AMDGPU::OPERAND_REG_IMM_INT32: 1901 case AMDGPU::OPERAND_REG_IMM_FP32: 1902 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1903 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1904 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1905 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1906 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1907 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1908 if (isSafeTruncation(Val, 32) && 1909 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1910 AsmParser->hasInv2PiInlineImm())) { 1911 Inst.addOperand(MCOperand::createImm(Val)); 1912 setImmKindConst(); 1913 return; 1914 } 1915 1916 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1917 setImmKindLiteral(); 1918 return; 1919 1920 case AMDGPU::OPERAND_REG_IMM_INT64: 1921 case AMDGPU::OPERAND_REG_IMM_FP64: 1922 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1923 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1924 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1925 Inst.addOperand(MCOperand::createImm(Val)); 1926 setImmKindConst(); 1927 return; 1928 } 1929 1930 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1931 setImmKindLiteral(); 1932 return; 1933 1934 case AMDGPU::OPERAND_REG_IMM_INT16: 1935 case AMDGPU::OPERAND_REG_IMM_FP16: 1936 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1937 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1938 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1939 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1940 if (isSafeTruncation(Val, 16) && 1941 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1942 AsmParser->hasInv2PiInlineImm())) { 1943 Inst.addOperand(MCOperand::createImm(Val)); 1944 setImmKindConst(); 1945 return; 1946 } 1947 1948 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1949 setImmKindLiteral(); 1950 return; 1951 1952 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1953 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1954 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1955 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1956 assert(isSafeTruncation(Val, 16)); 1957 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1958 AsmParser->hasInv2PiInlineImm())); 1959 1960 Inst.addOperand(MCOperand::createImm(Val)); 1961 return; 1962 } 1963 default: 1964 llvm_unreachable("invalid operand size"); 1965 } 1966 } 1967 1968 template <unsigned Bitwidth> 1969 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1970 APInt Literal(64, Imm.Val); 1971 setImmKindNone(); 1972 1973 if (!Imm.IsFPImm) { 1974 // We got int literal token. 1975 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1976 return; 1977 } 1978 1979 bool Lost; 1980 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1981 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1982 APFloat::rmNearestTiesToEven, &Lost); 1983 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1984 } 1985 1986 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1987 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1988 } 1989 1990 static bool isInlineValue(unsigned Reg) { 1991 switch (Reg) { 1992 case AMDGPU::SRC_SHARED_BASE: 1993 case AMDGPU::SRC_SHARED_LIMIT: 1994 case AMDGPU::SRC_PRIVATE_BASE: 1995 case AMDGPU::SRC_PRIVATE_LIMIT: 1996 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1997 return true; 1998 case AMDGPU::SRC_VCCZ: 1999 case AMDGPU::SRC_EXECZ: 2000 case AMDGPU::SRC_SCC: 2001 return true; 2002 case AMDGPU::SGPR_NULL: 2003 return true; 2004 default: 2005 return false; 2006 } 2007 } 2008 2009 bool AMDGPUOperand::isInlineValue() const { 2010 return isRegKind() && ::isInlineValue(getReg()); 2011 } 2012 2013 //===----------------------------------------------------------------------===// 2014 // AsmParser 2015 //===----------------------------------------------------------------------===// 2016 2017 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2018 if (Is == IS_VGPR) { 2019 switch (RegWidth) { 2020 default: return -1; 2021 case 1: return AMDGPU::VGPR_32RegClassID; 2022 case 2: return AMDGPU::VReg_64RegClassID; 2023 case 3: return AMDGPU::VReg_96RegClassID; 2024 case 4: return AMDGPU::VReg_128RegClassID; 2025 case 5: return AMDGPU::VReg_160RegClassID; 2026 case 6: return AMDGPU::VReg_192RegClassID; 2027 case 8: return AMDGPU::VReg_256RegClassID; 2028 case 16: return AMDGPU::VReg_512RegClassID; 2029 case 32: return AMDGPU::VReg_1024RegClassID; 2030 } 2031 } else if (Is == IS_TTMP) { 2032 switch (RegWidth) { 2033 default: return -1; 2034 case 1: return AMDGPU::TTMP_32RegClassID; 2035 case 2: return AMDGPU::TTMP_64RegClassID; 2036 case 4: return AMDGPU::TTMP_128RegClassID; 2037 case 8: return AMDGPU::TTMP_256RegClassID; 2038 case 16: return AMDGPU::TTMP_512RegClassID; 2039 } 2040 } else if (Is == IS_SGPR) { 2041 switch (RegWidth) { 2042 default: return -1; 2043 case 1: return AMDGPU::SGPR_32RegClassID; 2044 case 2: return AMDGPU::SGPR_64RegClassID; 2045 case 3: return AMDGPU::SGPR_96RegClassID; 2046 case 4: return AMDGPU::SGPR_128RegClassID; 2047 case 5: return AMDGPU::SGPR_160RegClassID; 2048 case 6: return AMDGPU::SGPR_192RegClassID; 2049 case 8: return AMDGPU::SGPR_256RegClassID; 2050 case 16: return AMDGPU::SGPR_512RegClassID; 2051 } 2052 } else if (Is == IS_AGPR) { 2053 switch (RegWidth) { 2054 default: return -1; 2055 case 1: return AMDGPU::AGPR_32RegClassID; 2056 case 2: return AMDGPU::AReg_64RegClassID; 2057 case 3: return AMDGPU::AReg_96RegClassID; 2058 case 4: return AMDGPU::AReg_128RegClassID; 2059 case 5: return AMDGPU::AReg_160RegClassID; 2060 case 6: return AMDGPU::AReg_192RegClassID; 2061 case 8: return AMDGPU::AReg_256RegClassID; 2062 case 16: return AMDGPU::AReg_512RegClassID; 2063 case 32: return AMDGPU::AReg_1024RegClassID; 2064 } 2065 } 2066 return -1; 2067 } 2068 2069 static unsigned getSpecialRegForName(StringRef RegName) { 2070 return StringSwitch<unsigned>(RegName) 2071 .Case("exec", AMDGPU::EXEC) 2072 .Case("vcc", AMDGPU::VCC) 2073 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2074 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2075 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2076 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2077 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2078 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2079 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2080 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2081 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2082 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2083 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2084 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2085 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2086 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2087 .Case("m0", AMDGPU::M0) 2088 .Case("vccz", AMDGPU::SRC_VCCZ) 2089 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2090 .Case("execz", AMDGPU::SRC_EXECZ) 2091 .Case("src_execz", AMDGPU::SRC_EXECZ) 2092 .Case("scc", AMDGPU::SRC_SCC) 2093 .Case("src_scc", AMDGPU::SRC_SCC) 2094 .Case("tba", AMDGPU::TBA) 2095 .Case("tma", AMDGPU::TMA) 2096 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2097 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2098 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2099 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2100 .Case("vcc_lo", AMDGPU::VCC_LO) 2101 .Case("vcc_hi", AMDGPU::VCC_HI) 2102 .Case("exec_lo", AMDGPU::EXEC_LO) 2103 .Case("exec_hi", AMDGPU::EXEC_HI) 2104 .Case("tma_lo", AMDGPU::TMA_LO) 2105 .Case("tma_hi", AMDGPU::TMA_HI) 2106 .Case("tba_lo", AMDGPU::TBA_LO) 2107 .Case("tba_hi", AMDGPU::TBA_HI) 2108 .Case("pc", AMDGPU::PC_REG) 2109 .Case("null", AMDGPU::SGPR_NULL) 2110 .Default(AMDGPU::NoRegister); 2111 } 2112 2113 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2114 SMLoc &EndLoc, bool RestoreOnFailure) { 2115 auto R = parseRegister(); 2116 if (!R) return true; 2117 assert(R->isReg()); 2118 RegNo = R->getReg(); 2119 StartLoc = R->getStartLoc(); 2120 EndLoc = R->getEndLoc(); 2121 return false; 2122 } 2123 2124 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2125 SMLoc &EndLoc) { 2126 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2127 } 2128 2129 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2130 SMLoc &StartLoc, 2131 SMLoc &EndLoc) { 2132 bool Result = 2133 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2134 bool PendingErrors = getParser().hasPendingError(); 2135 getParser().clearPendingErrors(); 2136 if (PendingErrors) 2137 return MatchOperand_ParseFail; 2138 if (Result) 2139 return MatchOperand_NoMatch; 2140 return MatchOperand_Success; 2141 } 2142 2143 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2144 RegisterKind RegKind, unsigned Reg1, 2145 SMLoc Loc) { 2146 switch (RegKind) { 2147 case IS_SPECIAL: 2148 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2149 Reg = AMDGPU::EXEC; 2150 RegWidth = 2; 2151 return true; 2152 } 2153 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2154 Reg = AMDGPU::FLAT_SCR; 2155 RegWidth = 2; 2156 return true; 2157 } 2158 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2159 Reg = AMDGPU::XNACK_MASK; 2160 RegWidth = 2; 2161 return true; 2162 } 2163 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2164 Reg = AMDGPU::VCC; 2165 RegWidth = 2; 2166 return true; 2167 } 2168 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2169 Reg = AMDGPU::TBA; 2170 RegWidth = 2; 2171 return true; 2172 } 2173 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2174 Reg = AMDGPU::TMA; 2175 RegWidth = 2; 2176 return true; 2177 } 2178 Error(Loc, "register does not fit in the list"); 2179 return false; 2180 case IS_VGPR: 2181 case IS_SGPR: 2182 case IS_AGPR: 2183 case IS_TTMP: 2184 if (Reg1 != Reg + RegWidth) { 2185 Error(Loc, "registers in a list must have consecutive indices"); 2186 return false; 2187 } 2188 RegWidth++; 2189 return true; 2190 default: 2191 llvm_unreachable("unexpected register kind"); 2192 } 2193 } 2194 2195 struct RegInfo { 2196 StringLiteral Name; 2197 RegisterKind Kind; 2198 }; 2199 2200 static constexpr RegInfo RegularRegisters[] = { 2201 {{"v"}, IS_VGPR}, 2202 {{"s"}, IS_SGPR}, 2203 {{"ttmp"}, IS_TTMP}, 2204 {{"acc"}, IS_AGPR}, 2205 {{"a"}, IS_AGPR}, 2206 }; 2207 2208 static bool isRegularReg(RegisterKind Kind) { 2209 return Kind == IS_VGPR || 2210 Kind == IS_SGPR || 2211 Kind == IS_TTMP || 2212 Kind == IS_AGPR; 2213 } 2214 2215 static const RegInfo* getRegularRegInfo(StringRef Str) { 2216 for (const RegInfo &Reg : RegularRegisters) 2217 if (Str.startswith(Reg.Name)) 2218 return &Reg; 2219 return nullptr; 2220 } 2221 2222 static bool getRegNum(StringRef Str, unsigned& Num) { 2223 return !Str.getAsInteger(10, Num); 2224 } 2225 2226 bool 2227 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2228 const AsmToken &NextToken) const { 2229 2230 // A list of consecutive registers: [s0,s1,s2,s3] 2231 if (Token.is(AsmToken::LBrac)) 2232 return true; 2233 2234 if (!Token.is(AsmToken::Identifier)) 2235 return false; 2236 2237 // A single register like s0 or a range of registers like s[0:1] 2238 2239 StringRef Str = Token.getString(); 2240 const RegInfo *Reg = getRegularRegInfo(Str); 2241 if (Reg) { 2242 StringRef RegName = Reg->Name; 2243 StringRef RegSuffix = Str.substr(RegName.size()); 2244 if (!RegSuffix.empty()) { 2245 unsigned Num; 2246 // A single register with an index: rXX 2247 if (getRegNum(RegSuffix, Num)) 2248 return true; 2249 } else { 2250 // A range of registers: r[XX:YY]. 2251 if (NextToken.is(AsmToken::LBrac)) 2252 return true; 2253 } 2254 } 2255 2256 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2257 } 2258 2259 bool 2260 AMDGPUAsmParser::isRegister() 2261 { 2262 return isRegister(getToken(), peekToken()); 2263 } 2264 2265 unsigned 2266 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2267 unsigned RegNum, 2268 unsigned RegWidth, 2269 SMLoc Loc) { 2270 2271 assert(isRegularReg(RegKind)); 2272 2273 unsigned AlignSize = 1; 2274 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2275 // SGPR and TTMP registers must be aligned. 2276 // Max required alignment is 4 dwords. 2277 AlignSize = std::min(RegWidth, 4u); 2278 } 2279 2280 if (RegNum % AlignSize != 0) { 2281 Error(Loc, "invalid register alignment"); 2282 return AMDGPU::NoRegister; 2283 } 2284 2285 unsigned RegIdx = RegNum / AlignSize; 2286 int RCID = getRegClass(RegKind, RegWidth); 2287 if (RCID == -1) { 2288 Error(Loc, "invalid or unsupported register size"); 2289 return AMDGPU::NoRegister; 2290 } 2291 2292 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2293 const MCRegisterClass RC = TRI->getRegClass(RCID); 2294 if (RegIdx >= RC.getNumRegs()) { 2295 Error(Loc, "register index is out of range"); 2296 return AMDGPU::NoRegister; 2297 } 2298 2299 return RC.getRegister(RegIdx); 2300 } 2301 2302 bool 2303 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2304 int64_t RegLo, RegHi; 2305 if (!skipToken(AsmToken::LBrac, "missing register index")) 2306 return false; 2307 2308 SMLoc FirstIdxLoc = getLoc(); 2309 SMLoc SecondIdxLoc; 2310 2311 if (!parseExpr(RegLo)) 2312 return false; 2313 2314 if (trySkipToken(AsmToken::Colon)) { 2315 SecondIdxLoc = getLoc(); 2316 if (!parseExpr(RegHi)) 2317 return false; 2318 } else { 2319 RegHi = RegLo; 2320 } 2321 2322 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2323 return false; 2324 2325 if (!isUInt<32>(RegLo)) { 2326 Error(FirstIdxLoc, "invalid register index"); 2327 return false; 2328 } 2329 2330 if (!isUInt<32>(RegHi)) { 2331 Error(SecondIdxLoc, "invalid register index"); 2332 return false; 2333 } 2334 2335 if (RegLo > RegHi) { 2336 Error(FirstIdxLoc, "first register index should not exceed second index"); 2337 return false; 2338 } 2339 2340 Num = static_cast<unsigned>(RegLo); 2341 Width = (RegHi - RegLo) + 1; 2342 return true; 2343 } 2344 2345 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2346 unsigned &RegNum, unsigned &RegWidth, 2347 SmallVectorImpl<AsmToken> &Tokens) { 2348 assert(isToken(AsmToken::Identifier)); 2349 unsigned Reg = getSpecialRegForName(getTokenStr()); 2350 if (Reg) { 2351 RegNum = 0; 2352 RegWidth = 1; 2353 RegKind = IS_SPECIAL; 2354 Tokens.push_back(getToken()); 2355 lex(); // skip register name 2356 } 2357 return Reg; 2358 } 2359 2360 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2361 unsigned &RegNum, unsigned &RegWidth, 2362 SmallVectorImpl<AsmToken> &Tokens) { 2363 assert(isToken(AsmToken::Identifier)); 2364 StringRef RegName = getTokenStr(); 2365 auto Loc = getLoc(); 2366 2367 const RegInfo *RI = getRegularRegInfo(RegName); 2368 if (!RI) { 2369 Error(Loc, "invalid register name"); 2370 return AMDGPU::NoRegister; 2371 } 2372 2373 Tokens.push_back(getToken()); 2374 lex(); // skip register name 2375 2376 RegKind = RI->Kind; 2377 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2378 if (!RegSuffix.empty()) { 2379 // Single 32-bit register: vXX. 2380 if (!getRegNum(RegSuffix, RegNum)) { 2381 Error(Loc, "invalid register index"); 2382 return AMDGPU::NoRegister; 2383 } 2384 RegWidth = 1; 2385 } else { 2386 // Range of registers: v[XX:YY]. ":YY" is optional. 2387 if (!ParseRegRange(RegNum, RegWidth)) 2388 return AMDGPU::NoRegister; 2389 } 2390 2391 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2392 } 2393 2394 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2395 unsigned &RegWidth, 2396 SmallVectorImpl<AsmToken> &Tokens) { 2397 unsigned Reg = AMDGPU::NoRegister; 2398 auto ListLoc = getLoc(); 2399 2400 if (!skipToken(AsmToken::LBrac, 2401 "expected a register or a list of registers")) { 2402 return AMDGPU::NoRegister; 2403 } 2404 2405 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2406 2407 auto Loc = getLoc(); 2408 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2409 return AMDGPU::NoRegister; 2410 if (RegWidth != 1) { 2411 Error(Loc, "expected a single 32-bit register"); 2412 return AMDGPU::NoRegister; 2413 } 2414 2415 for (; trySkipToken(AsmToken::Comma); ) { 2416 RegisterKind NextRegKind; 2417 unsigned NextReg, NextRegNum, NextRegWidth; 2418 Loc = getLoc(); 2419 2420 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2421 NextRegNum, NextRegWidth, 2422 Tokens)) { 2423 return AMDGPU::NoRegister; 2424 } 2425 if (NextRegWidth != 1) { 2426 Error(Loc, "expected a single 32-bit register"); 2427 return AMDGPU::NoRegister; 2428 } 2429 if (NextRegKind != RegKind) { 2430 Error(Loc, "registers in a list must be of the same kind"); 2431 return AMDGPU::NoRegister; 2432 } 2433 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2434 return AMDGPU::NoRegister; 2435 } 2436 2437 if (!skipToken(AsmToken::RBrac, 2438 "expected a comma or a closing square bracket")) { 2439 return AMDGPU::NoRegister; 2440 } 2441 2442 if (isRegularReg(RegKind)) 2443 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2444 2445 return Reg; 2446 } 2447 2448 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2449 unsigned &RegNum, unsigned &RegWidth, 2450 SmallVectorImpl<AsmToken> &Tokens) { 2451 auto Loc = getLoc(); 2452 Reg = AMDGPU::NoRegister; 2453 2454 if (isToken(AsmToken::Identifier)) { 2455 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2456 if (Reg == AMDGPU::NoRegister) 2457 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2458 } else { 2459 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2460 } 2461 2462 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2463 if (Reg == AMDGPU::NoRegister) { 2464 assert(Parser.hasPendingError()); 2465 return false; 2466 } 2467 2468 if (!subtargetHasRegister(*TRI, Reg)) { 2469 if (Reg == AMDGPU::SGPR_NULL) { 2470 Error(Loc, "'null' operand is not supported on this GPU"); 2471 } else { 2472 Error(Loc, "register not available on this GPU"); 2473 } 2474 return false; 2475 } 2476 2477 return true; 2478 } 2479 2480 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2481 unsigned &RegNum, unsigned &RegWidth, 2482 bool RestoreOnFailure /*=false*/) { 2483 Reg = AMDGPU::NoRegister; 2484 2485 SmallVector<AsmToken, 1> Tokens; 2486 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2487 if (RestoreOnFailure) { 2488 while (!Tokens.empty()) { 2489 getLexer().UnLex(Tokens.pop_back_val()); 2490 } 2491 } 2492 return true; 2493 } 2494 return false; 2495 } 2496 2497 Optional<StringRef> 2498 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2499 switch (RegKind) { 2500 case IS_VGPR: 2501 return StringRef(".amdgcn.next_free_vgpr"); 2502 case IS_SGPR: 2503 return StringRef(".amdgcn.next_free_sgpr"); 2504 default: 2505 return None; 2506 } 2507 } 2508 2509 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2510 auto SymbolName = getGprCountSymbolName(RegKind); 2511 assert(SymbolName && "initializing invalid register kind"); 2512 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2513 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2514 } 2515 2516 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2517 unsigned DwordRegIndex, 2518 unsigned RegWidth) { 2519 // Symbols are only defined for GCN targets 2520 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2521 return true; 2522 2523 auto SymbolName = getGprCountSymbolName(RegKind); 2524 if (!SymbolName) 2525 return true; 2526 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2527 2528 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2529 int64_t OldCount; 2530 2531 if (!Sym->isVariable()) 2532 return !Error(getParser().getTok().getLoc(), 2533 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2534 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2535 return !Error( 2536 getParser().getTok().getLoc(), 2537 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2538 2539 if (OldCount <= NewMax) 2540 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2541 2542 return true; 2543 } 2544 2545 std::unique_ptr<AMDGPUOperand> 2546 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2547 const auto &Tok = Parser.getTok(); 2548 SMLoc StartLoc = Tok.getLoc(); 2549 SMLoc EndLoc = Tok.getEndLoc(); 2550 RegisterKind RegKind; 2551 unsigned Reg, RegNum, RegWidth; 2552 2553 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2554 return nullptr; 2555 } 2556 if (isHsaAbiVersion3(&getSTI())) { 2557 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2558 return nullptr; 2559 } else 2560 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2561 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2562 } 2563 2564 OperandMatchResultTy 2565 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2566 // TODO: add syntactic sugar for 1/(2*PI) 2567 2568 assert(!isRegister()); 2569 assert(!isModifier()); 2570 2571 const auto& Tok = getToken(); 2572 const auto& NextTok = peekToken(); 2573 bool IsReal = Tok.is(AsmToken::Real); 2574 SMLoc S = getLoc(); 2575 bool Negate = false; 2576 2577 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2578 lex(); 2579 IsReal = true; 2580 Negate = true; 2581 } 2582 2583 if (IsReal) { 2584 // Floating-point expressions are not supported. 2585 // Can only allow floating-point literals with an 2586 // optional sign. 2587 2588 StringRef Num = getTokenStr(); 2589 lex(); 2590 2591 APFloat RealVal(APFloat::IEEEdouble()); 2592 auto roundMode = APFloat::rmNearestTiesToEven; 2593 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2594 return MatchOperand_ParseFail; 2595 } 2596 if (Negate) 2597 RealVal.changeSign(); 2598 2599 Operands.push_back( 2600 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2601 AMDGPUOperand::ImmTyNone, true)); 2602 2603 return MatchOperand_Success; 2604 2605 } else { 2606 int64_t IntVal; 2607 const MCExpr *Expr; 2608 SMLoc S = getLoc(); 2609 2610 if (HasSP3AbsModifier) { 2611 // This is a workaround for handling expressions 2612 // as arguments of SP3 'abs' modifier, for example: 2613 // |1.0| 2614 // |-1| 2615 // |1+x| 2616 // This syntax is not compatible with syntax of standard 2617 // MC expressions (due to the trailing '|'). 2618 SMLoc EndLoc; 2619 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2620 return MatchOperand_ParseFail; 2621 } else { 2622 if (Parser.parseExpression(Expr)) 2623 return MatchOperand_ParseFail; 2624 } 2625 2626 if (Expr->evaluateAsAbsolute(IntVal)) { 2627 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2628 } else { 2629 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2630 } 2631 2632 return MatchOperand_Success; 2633 } 2634 2635 return MatchOperand_NoMatch; 2636 } 2637 2638 OperandMatchResultTy 2639 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2640 if (!isRegister()) 2641 return MatchOperand_NoMatch; 2642 2643 if (auto R = parseRegister()) { 2644 assert(R->isReg()); 2645 Operands.push_back(std::move(R)); 2646 return MatchOperand_Success; 2647 } 2648 return MatchOperand_ParseFail; 2649 } 2650 2651 OperandMatchResultTy 2652 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2653 auto res = parseReg(Operands); 2654 if (res != MatchOperand_NoMatch) { 2655 return res; 2656 } else if (isModifier()) { 2657 return MatchOperand_NoMatch; 2658 } else { 2659 return parseImm(Operands, HasSP3AbsMod); 2660 } 2661 } 2662 2663 bool 2664 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2665 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2666 const auto &str = Token.getString(); 2667 return str == "abs" || str == "neg" || str == "sext"; 2668 } 2669 return false; 2670 } 2671 2672 bool 2673 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2674 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2675 } 2676 2677 bool 2678 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2679 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2680 } 2681 2682 bool 2683 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2684 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2685 } 2686 2687 // Check if this is an operand modifier or an opcode modifier 2688 // which may look like an expression but it is not. We should 2689 // avoid parsing these modifiers as expressions. Currently 2690 // recognized sequences are: 2691 // |...| 2692 // abs(...) 2693 // neg(...) 2694 // sext(...) 2695 // -reg 2696 // -|...| 2697 // -abs(...) 2698 // name:... 2699 // Note that simple opcode modifiers like 'gds' may be parsed as 2700 // expressions; this is a special case. See getExpressionAsToken. 2701 // 2702 bool 2703 AMDGPUAsmParser::isModifier() { 2704 2705 AsmToken Tok = getToken(); 2706 AsmToken NextToken[2]; 2707 peekTokens(NextToken); 2708 2709 return isOperandModifier(Tok, NextToken[0]) || 2710 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2711 isOpcodeModifierWithVal(Tok, NextToken[0]); 2712 } 2713 2714 // Check if the current token is an SP3 'neg' modifier. 2715 // Currently this modifier is allowed in the following context: 2716 // 2717 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2718 // 2. Before an 'abs' modifier: -abs(...) 2719 // 3. Before an SP3 'abs' modifier: -|...| 2720 // 2721 // In all other cases "-" is handled as a part 2722 // of an expression that follows the sign. 2723 // 2724 // Note: When "-" is followed by an integer literal, 2725 // this is interpreted as integer negation rather 2726 // than a floating-point NEG modifier applied to N. 2727 // Beside being contr-intuitive, such use of floating-point 2728 // NEG modifier would have resulted in different meaning 2729 // of integer literals used with VOP1/2/C and VOP3, 2730 // for example: 2731 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2732 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2733 // Negative fp literals with preceding "-" are 2734 // handled likewise for unifomtity 2735 // 2736 bool 2737 AMDGPUAsmParser::parseSP3NegModifier() { 2738 2739 AsmToken NextToken[2]; 2740 peekTokens(NextToken); 2741 2742 if (isToken(AsmToken::Minus) && 2743 (isRegister(NextToken[0], NextToken[1]) || 2744 NextToken[0].is(AsmToken::Pipe) || 2745 isId(NextToken[0], "abs"))) { 2746 lex(); 2747 return true; 2748 } 2749 2750 return false; 2751 } 2752 2753 OperandMatchResultTy 2754 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2755 bool AllowImm) { 2756 bool Neg, SP3Neg; 2757 bool Abs, SP3Abs; 2758 SMLoc Loc; 2759 2760 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2761 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2762 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2763 return MatchOperand_ParseFail; 2764 } 2765 2766 SP3Neg = parseSP3NegModifier(); 2767 2768 Loc = getLoc(); 2769 Neg = trySkipId("neg"); 2770 if (Neg && SP3Neg) { 2771 Error(Loc, "expected register or immediate"); 2772 return MatchOperand_ParseFail; 2773 } 2774 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2775 return MatchOperand_ParseFail; 2776 2777 Abs = trySkipId("abs"); 2778 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2779 return MatchOperand_ParseFail; 2780 2781 Loc = getLoc(); 2782 SP3Abs = trySkipToken(AsmToken::Pipe); 2783 if (Abs && SP3Abs) { 2784 Error(Loc, "expected register or immediate"); 2785 return MatchOperand_ParseFail; 2786 } 2787 2788 OperandMatchResultTy Res; 2789 if (AllowImm) { 2790 Res = parseRegOrImm(Operands, SP3Abs); 2791 } else { 2792 Res = parseReg(Operands); 2793 } 2794 if (Res != MatchOperand_Success) { 2795 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2796 } 2797 2798 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2799 return MatchOperand_ParseFail; 2800 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2801 return MatchOperand_ParseFail; 2802 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2803 return MatchOperand_ParseFail; 2804 2805 AMDGPUOperand::Modifiers Mods; 2806 Mods.Abs = Abs || SP3Abs; 2807 Mods.Neg = Neg || SP3Neg; 2808 2809 if (Mods.hasFPModifiers()) { 2810 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2811 if (Op.isExpr()) { 2812 Error(Op.getStartLoc(), "expected an absolute expression"); 2813 return MatchOperand_ParseFail; 2814 } 2815 Op.setModifiers(Mods); 2816 } 2817 return MatchOperand_Success; 2818 } 2819 2820 OperandMatchResultTy 2821 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2822 bool AllowImm) { 2823 bool Sext = trySkipId("sext"); 2824 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2825 return MatchOperand_ParseFail; 2826 2827 OperandMatchResultTy Res; 2828 if (AllowImm) { 2829 Res = parseRegOrImm(Operands); 2830 } else { 2831 Res = parseReg(Operands); 2832 } 2833 if (Res != MatchOperand_Success) { 2834 return Sext? MatchOperand_ParseFail : Res; 2835 } 2836 2837 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2838 return MatchOperand_ParseFail; 2839 2840 AMDGPUOperand::Modifiers Mods; 2841 Mods.Sext = Sext; 2842 2843 if (Mods.hasIntModifiers()) { 2844 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2845 if (Op.isExpr()) { 2846 Error(Op.getStartLoc(), "expected an absolute expression"); 2847 return MatchOperand_ParseFail; 2848 } 2849 Op.setModifiers(Mods); 2850 } 2851 2852 return MatchOperand_Success; 2853 } 2854 2855 OperandMatchResultTy 2856 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2857 return parseRegOrImmWithFPInputMods(Operands, false); 2858 } 2859 2860 OperandMatchResultTy 2861 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2862 return parseRegOrImmWithIntInputMods(Operands, false); 2863 } 2864 2865 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2866 auto Loc = getLoc(); 2867 if (trySkipId("off")) { 2868 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2869 AMDGPUOperand::ImmTyOff, false)); 2870 return MatchOperand_Success; 2871 } 2872 2873 if (!isRegister()) 2874 return MatchOperand_NoMatch; 2875 2876 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2877 if (Reg) { 2878 Operands.push_back(std::move(Reg)); 2879 return MatchOperand_Success; 2880 } 2881 2882 return MatchOperand_ParseFail; 2883 2884 } 2885 2886 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2887 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2888 2889 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2890 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2891 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2892 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2893 return Match_InvalidOperand; 2894 2895 if ((TSFlags & SIInstrFlags::VOP3) && 2896 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2897 getForcedEncodingSize() != 64) 2898 return Match_PreferE32; 2899 2900 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2901 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2902 // v_mac_f32/16 allow only dst_sel == DWORD; 2903 auto OpNum = 2904 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2905 const auto &Op = Inst.getOperand(OpNum); 2906 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2907 return Match_InvalidOperand; 2908 } 2909 } 2910 2911 return Match_Success; 2912 } 2913 2914 static ArrayRef<unsigned> getAllVariants() { 2915 static const unsigned Variants[] = { 2916 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2917 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2918 }; 2919 2920 return makeArrayRef(Variants); 2921 } 2922 2923 // What asm variants we should check 2924 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2925 if (getForcedEncodingSize() == 32) { 2926 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2927 return makeArrayRef(Variants); 2928 } 2929 2930 if (isForcedVOP3()) { 2931 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2932 return makeArrayRef(Variants); 2933 } 2934 2935 if (isForcedSDWA()) { 2936 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2937 AMDGPUAsmVariants::SDWA9}; 2938 return makeArrayRef(Variants); 2939 } 2940 2941 if (isForcedDPP()) { 2942 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2943 return makeArrayRef(Variants); 2944 } 2945 2946 return getAllVariants(); 2947 } 2948 2949 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2950 if (getForcedEncodingSize() == 32) 2951 return "e32"; 2952 2953 if (isForcedVOP3()) 2954 return "e64"; 2955 2956 if (isForcedSDWA()) 2957 return "sdwa"; 2958 2959 if (isForcedDPP()) 2960 return "dpp"; 2961 2962 return ""; 2963 } 2964 2965 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2967 const unsigned Num = Desc.getNumImplicitUses(); 2968 for (unsigned i = 0; i < Num; ++i) { 2969 unsigned Reg = Desc.ImplicitUses[i]; 2970 switch (Reg) { 2971 case AMDGPU::FLAT_SCR: 2972 case AMDGPU::VCC: 2973 case AMDGPU::VCC_LO: 2974 case AMDGPU::VCC_HI: 2975 case AMDGPU::M0: 2976 return Reg; 2977 default: 2978 break; 2979 } 2980 } 2981 return AMDGPU::NoRegister; 2982 } 2983 2984 // NB: This code is correct only when used to check constant 2985 // bus limitations because GFX7 support no f16 inline constants. 2986 // Note that there are no cases when a GFX7 opcode violates 2987 // constant bus limitations due to the use of an f16 constant. 2988 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2989 unsigned OpIdx) const { 2990 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2991 2992 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2993 return false; 2994 } 2995 2996 const MCOperand &MO = Inst.getOperand(OpIdx); 2997 2998 int64_t Val = MO.getImm(); 2999 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3000 3001 switch (OpSize) { // expected operand size 3002 case 8: 3003 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3004 case 4: 3005 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3006 case 2: { 3007 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3008 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3009 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3010 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3011 return AMDGPU::isInlinableIntLiteral(Val); 3012 3013 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3014 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3015 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3016 return AMDGPU::isInlinableIntLiteralV216(Val); 3017 3018 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3019 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3020 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3021 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3022 3023 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3024 } 3025 default: 3026 llvm_unreachable("invalid operand size"); 3027 } 3028 } 3029 3030 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3031 if (!isGFX10Plus()) 3032 return 1; 3033 3034 switch (Opcode) { 3035 // 64-bit shift instructions can use only one scalar value input 3036 case AMDGPU::V_LSHLREV_B64: 3037 case AMDGPU::V_LSHLREV_B64_gfx10: 3038 case AMDGPU::V_LSHL_B64: 3039 case AMDGPU::V_LSHRREV_B64: 3040 case AMDGPU::V_LSHRREV_B64_gfx10: 3041 case AMDGPU::V_LSHR_B64: 3042 case AMDGPU::V_ASHRREV_I64: 3043 case AMDGPU::V_ASHRREV_I64_gfx10: 3044 case AMDGPU::V_ASHR_I64: 3045 return 1; 3046 default: 3047 return 2; 3048 } 3049 } 3050 3051 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3052 const MCOperand &MO = Inst.getOperand(OpIdx); 3053 if (MO.isImm()) { 3054 return !isInlineConstant(Inst, OpIdx); 3055 } else if (MO.isReg()) { 3056 auto Reg = MO.getReg(); 3057 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3058 auto PReg = mc2PseudoReg(Reg); 3059 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3060 } else { 3061 return true; 3062 } 3063 } 3064 3065 bool 3066 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3067 const OperandVector &Operands) { 3068 const unsigned Opcode = Inst.getOpcode(); 3069 const MCInstrDesc &Desc = MII.get(Opcode); 3070 unsigned LastSGPR = AMDGPU::NoRegister; 3071 unsigned ConstantBusUseCount = 0; 3072 unsigned NumLiterals = 0; 3073 unsigned LiteralSize; 3074 3075 if (Desc.TSFlags & 3076 (SIInstrFlags::VOPC | 3077 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3078 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3079 SIInstrFlags::SDWA)) { 3080 // Check special imm operands (used by madmk, etc) 3081 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3082 ++ConstantBusUseCount; 3083 } 3084 3085 SmallDenseSet<unsigned> SGPRsUsed; 3086 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3087 if (SGPRUsed != AMDGPU::NoRegister) { 3088 SGPRsUsed.insert(SGPRUsed); 3089 ++ConstantBusUseCount; 3090 } 3091 3092 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3093 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3094 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3095 3096 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3097 3098 for (int OpIdx : OpIndices) { 3099 if (OpIdx == -1) break; 3100 3101 const MCOperand &MO = Inst.getOperand(OpIdx); 3102 if (usesConstantBus(Inst, OpIdx)) { 3103 if (MO.isReg()) { 3104 LastSGPR = mc2PseudoReg(MO.getReg()); 3105 // Pairs of registers with a partial intersections like these 3106 // s0, s[0:1] 3107 // flat_scratch_lo, flat_scratch 3108 // flat_scratch_lo, flat_scratch_hi 3109 // are theoretically valid but they are disabled anyway. 3110 // Note that this code mimics SIInstrInfo::verifyInstruction 3111 if (!SGPRsUsed.count(LastSGPR)) { 3112 SGPRsUsed.insert(LastSGPR); 3113 ++ConstantBusUseCount; 3114 } 3115 } else { // Expression or a literal 3116 3117 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3118 continue; // special operand like VINTERP attr_chan 3119 3120 // An instruction may use only one literal. 3121 // This has been validated on the previous step. 3122 // See validateVOP3Literal. 3123 // This literal may be used as more than one operand. 3124 // If all these operands are of the same size, 3125 // this literal counts as one scalar value. 3126 // Otherwise it counts as 2 scalar values. 3127 // See "GFX10 Shader Programming", section 3.6.2.3. 3128 3129 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3130 if (Size < 4) Size = 4; 3131 3132 if (NumLiterals == 0) { 3133 NumLiterals = 1; 3134 LiteralSize = Size; 3135 } else if (LiteralSize != Size) { 3136 NumLiterals = 2; 3137 } 3138 } 3139 } 3140 } 3141 } 3142 ConstantBusUseCount += NumLiterals; 3143 3144 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3145 return true; 3146 3147 SMLoc LitLoc = getLitLoc(Operands); 3148 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3149 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3150 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3151 return false; 3152 } 3153 3154 bool 3155 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3156 const OperandVector &Operands) { 3157 const unsigned Opcode = Inst.getOpcode(); 3158 const MCInstrDesc &Desc = MII.get(Opcode); 3159 3160 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3161 if (DstIdx == -1 || 3162 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3163 return true; 3164 } 3165 3166 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3167 3168 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3169 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3170 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3171 3172 assert(DstIdx != -1); 3173 const MCOperand &Dst = Inst.getOperand(DstIdx); 3174 assert(Dst.isReg()); 3175 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3176 3177 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3178 3179 for (int SrcIdx : SrcIndices) { 3180 if (SrcIdx == -1) break; 3181 const MCOperand &Src = Inst.getOperand(SrcIdx); 3182 if (Src.isReg()) { 3183 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3184 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3185 Error(getRegLoc(SrcReg, Operands), 3186 "destination must be different than all sources"); 3187 return false; 3188 } 3189 } 3190 } 3191 3192 return true; 3193 } 3194 3195 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3196 3197 const unsigned Opc = Inst.getOpcode(); 3198 const MCInstrDesc &Desc = MII.get(Opc); 3199 3200 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3201 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3202 assert(ClampIdx != -1); 3203 return Inst.getOperand(ClampIdx).getImm() == 0; 3204 } 3205 3206 return true; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 3217 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3218 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3219 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3220 3221 assert(VDataIdx != -1); 3222 3223 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3224 return true; 3225 3226 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3227 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3228 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3229 if (DMask == 0) 3230 DMask = 1; 3231 3232 unsigned DataSize = 3233 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3234 if (hasPackedD16()) { 3235 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3236 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3237 DataSize = (DataSize + 1) / 2; 3238 } 3239 3240 return (VDataSize / 4) == DataSize + TFESize; 3241 } 3242 3243 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3244 const unsigned Opc = Inst.getOpcode(); 3245 const MCInstrDesc &Desc = MII.get(Opc); 3246 3247 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3248 return true; 3249 3250 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3251 3252 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3253 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3254 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3255 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3256 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3257 3258 assert(VAddr0Idx != -1); 3259 assert(SrsrcIdx != -1); 3260 assert(SrsrcIdx > VAddr0Idx); 3261 3262 if (DimIdx == -1) 3263 return true; // intersect_ray 3264 3265 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3266 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3267 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3268 unsigned VAddrSize = 3269 IsNSA ? SrsrcIdx - VAddr0Idx 3270 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3271 3272 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3273 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3274 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3275 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3276 if (!IsNSA) { 3277 if (AddrSize > 8) 3278 AddrSize = 16; 3279 else if (AddrSize > 4) 3280 AddrSize = 8; 3281 } 3282 3283 return VAddrSize == AddrSize; 3284 } 3285 3286 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3287 3288 const unsigned Opc = Inst.getOpcode(); 3289 const MCInstrDesc &Desc = MII.get(Opc); 3290 3291 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3292 return true; 3293 if (!Desc.mayLoad() || !Desc.mayStore()) 3294 return true; // Not atomic 3295 3296 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3297 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3298 3299 // This is an incomplete check because image_atomic_cmpswap 3300 // may only use 0x3 and 0xf while other atomic operations 3301 // may use 0x1 and 0x3. However these limitations are 3302 // verified when we check that dmask matches dst size. 3303 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3304 } 3305 3306 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3307 3308 const unsigned Opc = Inst.getOpcode(); 3309 const MCInstrDesc &Desc = MII.get(Opc); 3310 3311 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3312 return true; 3313 3314 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3315 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3316 3317 // GATHER4 instructions use dmask in a different fashion compared to 3318 // other MIMG instructions. The only useful DMASK values are 3319 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3320 // (red,red,red,red) etc.) The ISA document doesn't mention 3321 // this. 3322 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3323 } 3324 3325 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3326 { 3327 switch (Opcode) { 3328 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3329 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3330 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3331 return true; 3332 default: 3333 return false; 3334 } 3335 } 3336 3337 // movrels* opcodes should only allow VGPRS as src0. 3338 // This is specified in .td description for vop1/vop3, 3339 // but sdwa is handled differently. See isSDWAOperand. 3340 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3341 const OperandVector &Operands) { 3342 3343 const unsigned Opc = Inst.getOpcode(); 3344 const MCInstrDesc &Desc = MII.get(Opc); 3345 3346 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3347 return true; 3348 3349 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3350 assert(Src0Idx != -1); 3351 3352 SMLoc ErrLoc; 3353 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3354 if (Src0.isReg()) { 3355 auto Reg = mc2PseudoReg(Src0.getReg()); 3356 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3357 if (!isSGPR(Reg, TRI)) 3358 return true; 3359 ErrLoc = getRegLoc(Reg, Operands); 3360 } else { 3361 ErrLoc = getConstLoc(Operands); 3362 } 3363 3364 Error(ErrLoc, "source operand must be a VGPR"); 3365 return false; 3366 } 3367 3368 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3369 const OperandVector &Operands) { 3370 3371 const unsigned Opc = Inst.getOpcode(); 3372 3373 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3374 return true; 3375 3376 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3377 assert(Src0Idx != -1); 3378 3379 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3380 if (!Src0.isReg()) 3381 return true; 3382 3383 auto Reg = mc2PseudoReg(Src0.getReg()); 3384 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3385 if (isSGPR(Reg, TRI)) { 3386 Error(getRegLoc(Reg, Operands), 3387 "source operand must be either a VGPR or an inline constant"); 3388 return false; 3389 } 3390 3391 return true; 3392 } 3393 3394 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3395 switch (Inst.getOpcode()) { 3396 default: 3397 return true; 3398 case V_DIV_SCALE_F32_gfx6_gfx7: 3399 case V_DIV_SCALE_F32_vi: 3400 case V_DIV_SCALE_F32_gfx10: 3401 case V_DIV_SCALE_F64_gfx6_gfx7: 3402 case V_DIV_SCALE_F64_vi: 3403 case V_DIV_SCALE_F64_gfx10: 3404 break; 3405 } 3406 3407 // TODO: Check that src0 = src1 or src2. 3408 3409 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3410 AMDGPU::OpName::src2_modifiers, 3411 AMDGPU::OpName::src2_modifiers}) { 3412 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3413 .getImm() & 3414 SISrcMods::ABS) { 3415 return false; 3416 } 3417 } 3418 3419 return true; 3420 } 3421 3422 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3423 3424 const unsigned Opc = Inst.getOpcode(); 3425 const MCInstrDesc &Desc = MII.get(Opc); 3426 3427 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3428 return true; 3429 3430 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3431 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3432 if (isCI() || isSI()) 3433 return false; 3434 } 3435 3436 return true; 3437 } 3438 3439 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3440 const unsigned Opc = Inst.getOpcode(); 3441 const MCInstrDesc &Desc = MII.get(Opc); 3442 3443 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3444 return true; 3445 3446 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3447 if (DimIdx < 0) 3448 return true; 3449 3450 long Imm = Inst.getOperand(DimIdx).getImm(); 3451 if (Imm < 0 || Imm >= 8) 3452 return false; 3453 3454 return true; 3455 } 3456 3457 static bool IsRevOpcode(const unsigned Opcode) 3458 { 3459 switch (Opcode) { 3460 case AMDGPU::V_SUBREV_F32_e32: 3461 case AMDGPU::V_SUBREV_F32_e64: 3462 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3463 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3464 case AMDGPU::V_SUBREV_F32_e32_vi: 3465 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3466 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3467 case AMDGPU::V_SUBREV_F32_e64_vi: 3468 3469 case AMDGPU::V_SUBREV_CO_U32_e32: 3470 case AMDGPU::V_SUBREV_CO_U32_e64: 3471 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3472 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3473 3474 case AMDGPU::V_SUBBREV_U32_e32: 3475 case AMDGPU::V_SUBBREV_U32_e64: 3476 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3477 case AMDGPU::V_SUBBREV_U32_e32_vi: 3478 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3479 case AMDGPU::V_SUBBREV_U32_e64_vi: 3480 3481 case AMDGPU::V_SUBREV_U32_e32: 3482 case AMDGPU::V_SUBREV_U32_e64: 3483 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3484 case AMDGPU::V_SUBREV_U32_e32_vi: 3485 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3486 case AMDGPU::V_SUBREV_U32_e64_vi: 3487 3488 case AMDGPU::V_SUBREV_F16_e32: 3489 case AMDGPU::V_SUBREV_F16_e64: 3490 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3491 case AMDGPU::V_SUBREV_F16_e32_vi: 3492 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3493 case AMDGPU::V_SUBREV_F16_e64_vi: 3494 3495 case AMDGPU::V_SUBREV_U16_e32: 3496 case AMDGPU::V_SUBREV_U16_e64: 3497 case AMDGPU::V_SUBREV_U16_e32_vi: 3498 case AMDGPU::V_SUBREV_U16_e64_vi: 3499 3500 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3501 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3502 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3503 3504 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3505 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3506 3507 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3508 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3509 3510 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3511 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3512 3513 case AMDGPU::V_LSHRREV_B32_e32: 3514 case AMDGPU::V_LSHRREV_B32_e64: 3515 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3516 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3517 case AMDGPU::V_LSHRREV_B32_e32_vi: 3518 case AMDGPU::V_LSHRREV_B32_e64_vi: 3519 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3520 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3521 3522 case AMDGPU::V_ASHRREV_I32_e32: 3523 case AMDGPU::V_ASHRREV_I32_e64: 3524 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3525 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3526 case AMDGPU::V_ASHRREV_I32_e32_vi: 3527 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3528 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3529 case AMDGPU::V_ASHRREV_I32_e64_vi: 3530 3531 case AMDGPU::V_LSHLREV_B32_e32: 3532 case AMDGPU::V_LSHLREV_B32_e64: 3533 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3534 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3535 case AMDGPU::V_LSHLREV_B32_e32_vi: 3536 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3537 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3538 case AMDGPU::V_LSHLREV_B32_e64_vi: 3539 3540 case AMDGPU::V_LSHLREV_B16_e32: 3541 case AMDGPU::V_LSHLREV_B16_e64: 3542 case AMDGPU::V_LSHLREV_B16_e32_vi: 3543 case AMDGPU::V_LSHLREV_B16_e64_vi: 3544 case AMDGPU::V_LSHLREV_B16_gfx10: 3545 3546 case AMDGPU::V_LSHRREV_B16_e32: 3547 case AMDGPU::V_LSHRREV_B16_e64: 3548 case AMDGPU::V_LSHRREV_B16_e32_vi: 3549 case AMDGPU::V_LSHRREV_B16_e64_vi: 3550 case AMDGPU::V_LSHRREV_B16_gfx10: 3551 3552 case AMDGPU::V_ASHRREV_I16_e32: 3553 case AMDGPU::V_ASHRREV_I16_e64: 3554 case AMDGPU::V_ASHRREV_I16_e32_vi: 3555 case AMDGPU::V_ASHRREV_I16_e64_vi: 3556 case AMDGPU::V_ASHRREV_I16_gfx10: 3557 3558 case AMDGPU::V_LSHLREV_B64: 3559 case AMDGPU::V_LSHLREV_B64_gfx10: 3560 case AMDGPU::V_LSHLREV_B64_vi: 3561 3562 case AMDGPU::V_LSHRREV_B64: 3563 case AMDGPU::V_LSHRREV_B64_gfx10: 3564 case AMDGPU::V_LSHRREV_B64_vi: 3565 3566 case AMDGPU::V_ASHRREV_I64: 3567 case AMDGPU::V_ASHRREV_I64_gfx10: 3568 case AMDGPU::V_ASHRREV_I64_vi: 3569 3570 case AMDGPU::V_PK_LSHLREV_B16: 3571 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3572 case AMDGPU::V_PK_LSHLREV_B16_vi: 3573 3574 case AMDGPU::V_PK_LSHRREV_B16: 3575 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3576 case AMDGPU::V_PK_LSHRREV_B16_vi: 3577 case AMDGPU::V_PK_ASHRREV_I16: 3578 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3579 case AMDGPU::V_PK_ASHRREV_I16_vi: 3580 return true; 3581 default: 3582 return false; 3583 } 3584 } 3585 3586 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3587 3588 using namespace SIInstrFlags; 3589 const unsigned Opcode = Inst.getOpcode(); 3590 const MCInstrDesc &Desc = MII.get(Opcode); 3591 3592 // lds_direct register is defined so that it can be used 3593 // with 9-bit operands only. Ignore encodings which do not accept these. 3594 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3595 return true; 3596 3597 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3598 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3599 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3600 3601 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3602 3603 // lds_direct cannot be specified as either src1 or src2. 3604 for (int SrcIdx : SrcIndices) { 3605 if (SrcIdx == -1) break; 3606 const MCOperand &Src = Inst.getOperand(SrcIdx); 3607 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3608 return false; 3609 } 3610 } 3611 3612 if (Src0Idx == -1) 3613 return true; 3614 3615 const MCOperand &Src = Inst.getOperand(Src0Idx); 3616 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3617 return true; 3618 3619 // lds_direct is specified as src0. Check additional limitations. 3620 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3621 } 3622 3623 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3624 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3626 if (Op.isFlatOffset()) 3627 return Op.getStartLoc(); 3628 } 3629 return getLoc(); 3630 } 3631 3632 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3633 const OperandVector &Operands) { 3634 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3635 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3636 return true; 3637 3638 auto Opcode = Inst.getOpcode(); 3639 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3640 assert(OpNum != -1); 3641 3642 const auto &Op = Inst.getOperand(OpNum); 3643 if (!hasFlatOffsets() && Op.getImm() != 0) { 3644 Error(getFlatOffsetLoc(Operands), 3645 "flat offset modifier is not supported on this GPU"); 3646 return false; 3647 } 3648 3649 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3650 // For FLAT segment the offset must be positive; 3651 // MSB is ignored and forced to zero. 3652 unsigned OffsetSize = isGFX9() ? 13 : 12; 3653 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3654 if (!isIntN(OffsetSize, Op.getImm())) { 3655 Error(getFlatOffsetLoc(Operands), 3656 isGFX9() ? "expected a 13-bit signed offset" : 3657 "expected a 12-bit signed offset"); 3658 return false; 3659 } 3660 } else { 3661 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3662 Error(getFlatOffsetLoc(Operands), 3663 isGFX9() ? "expected a 12-bit unsigned offset" : 3664 "expected an 11-bit unsigned offset"); 3665 return false; 3666 } 3667 } 3668 3669 return true; 3670 } 3671 3672 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3673 // Start with second operand because SMEM Offset cannot be dst or src0. 3674 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3675 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3676 if (Op.isSMEMOffset()) 3677 return Op.getStartLoc(); 3678 } 3679 return getLoc(); 3680 } 3681 3682 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3683 const OperandVector &Operands) { 3684 if (isCI() || isSI()) 3685 return true; 3686 3687 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3688 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3689 return true; 3690 3691 auto Opcode = Inst.getOpcode(); 3692 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3693 if (OpNum == -1) 3694 return true; 3695 3696 const auto &Op = Inst.getOperand(OpNum); 3697 if (!Op.isImm()) 3698 return true; 3699 3700 uint64_t Offset = Op.getImm(); 3701 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3702 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3703 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3704 return true; 3705 3706 Error(getSMEMOffsetLoc(Operands), 3707 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3708 "expected a 21-bit signed offset"); 3709 3710 return false; 3711 } 3712 3713 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3714 unsigned Opcode = Inst.getOpcode(); 3715 const MCInstrDesc &Desc = MII.get(Opcode); 3716 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3717 return true; 3718 3719 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3720 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3721 3722 const int OpIndices[] = { Src0Idx, Src1Idx }; 3723 3724 unsigned NumExprs = 0; 3725 unsigned NumLiterals = 0; 3726 uint32_t LiteralValue; 3727 3728 for (int OpIdx : OpIndices) { 3729 if (OpIdx == -1) break; 3730 3731 const MCOperand &MO = Inst.getOperand(OpIdx); 3732 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3733 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3734 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3735 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3736 if (NumLiterals == 0 || LiteralValue != Value) { 3737 LiteralValue = Value; 3738 ++NumLiterals; 3739 } 3740 } else if (MO.isExpr()) { 3741 ++NumExprs; 3742 } 3743 } 3744 } 3745 3746 return NumLiterals + NumExprs <= 1; 3747 } 3748 3749 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3750 const unsigned Opc = Inst.getOpcode(); 3751 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3752 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3753 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3754 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3755 3756 if (OpSel & ~3) 3757 return false; 3758 } 3759 return true; 3760 } 3761 3762 // Check if VCC register matches wavefront size 3763 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3764 auto FB = getFeatureBits(); 3765 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3766 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3767 } 3768 3769 // VOP3 literal is only allowed in GFX10+ and only one can be used 3770 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3771 const OperandVector &Operands) { 3772 unsigned Opcode = Inst.getOpcode(); 3773 const MCInstrDesc &Desc = MII.get(Opcode); 3774 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3775 return true; 3776 3777 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3778 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3779 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3780 3781 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3782 3783 unsigned NumExprs = 0; 3784 unsigned NumLiterals = 0; 3785 uint32_t LiteralValue; 3786 3787 for (int OpIdx : OpIndices) { 3788 if (OpIdx == -1) break; 3789 3790 const MCOperand &MO = Inst.getOperand(OpIdx); 3791 if (!MO.isImm() && !MO.isExpr()) 3792 continue; 3793 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3794 continue; 3795 3796 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3797 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3798 Error(getConstLoc(Operands), 3799 "inline constants are not allowed for this operand"); 3800 return false; 3801 } 3802 3803 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3804 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3805 if (NumLiterals == 0 || LiteralValue != Value) { 3806 LiteralValue = Value; 3807 ++NumLiterals; 3808 } 3809 } else if (MO.isExpr()) { 3810 ++NumExprs; 3811 } 3812 } 3813 NumLiterals += NumExprs; 3814 3815 if (!NumLiterals) 3816 return true; 3817 3818 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3819 Error(getLitLoc(Operands), "literal operands are not supported"); 3820 return false; 3821 } 3822 3823 if (NumLiterals > 1) { 3824 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3825 return false; 3826 } 3827 3828 return true; 3829 } 3830 3831 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3832 const OperandVector &Operands, 3833 const SMLoc &IDLoc) { 3834 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3835 AMDGPU::OpName::glc1); 3836 if (GLCPos != -1) { 3837 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3838 // in the asm string, and the default value means it is not present. 3839 if (Inst.getOperand(GLCPos).getImm() == -1) { 3840 Error(IDLoc, "instruction must use glc"); 3841 return false; 3842 } 3843 } 3844 3845 return true; 3846 } 3847 3848 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3849 const SMLoc &IDLoc, 3850 const OperandVector &Operands) { 3851 if (!validateLdsDirect(Inst)) { 3852 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3853 "invalid use of lds_direct"); 3854 return false; 3855 } 3856 if (!validateSOPLiteral(Inst)) { 3857 Error(getLitLoc(Operands), 3858 "only one literal operand is allowed"); 3859 return false; 3860 } 3861 if (!validateVOP3Literal(Inst, Operands)) { 3862 return false; 3863 } 3864 if (!validateConstantBusLimitations(Inst, Operands)) { 3865 return false; 3866 } 3867 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3868 return false; 3869 } 3870 if (!validateIntClampSupported(Inst)) { 3871 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3872 "integer clamping is not supported on this GPU"); 3873 return false; 3874 } 3875 if (!validateOpSel(Inst)) { 3876 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3877 "invalid op_sel operand"); 3878 return false; 3879 } 3880 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3881 if (!validateMIMGD16(Inst)) { 3882 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3883 "d16 modifier is not supported on this GPU"); 3884 return false; 3885 } 3886 if (!validateMIMGDim(Inst)) { 3887 Error(IDLoc, "dim modifier is required on this GPU"); 3888 return false; 3889 } 3890 if (!validateMIMGDataSize(Inst)) { 3891 Error(IDLoc, 3892 "image data size does not match dmask and tfe"); 3893 return false; 3894 } 3895 if (!validateMIMGAddrSize(Inst)) { 3896 Error(IDLoc, 3897 "image address size does not match dim and a16"); 3898 return false; 3899 } 3900 if (!validateMIMGAtomicDMask(Inst)) { 3901 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3902 "invalid atomic image dmask"); 3903 return false; 3904 } 3905 if (!validateMIMGGatherDMask(Inst)) { 3906 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3907 "invalid image_gather dmask: only one bit must be set"); 3908 return false; 3909 } 3910 if (!validateMovrels(Inst, Operands)) { 3911 return false; 3912 } 3913 if (!validateFlatOffset(Inst, Operands)) { 3914 return false; 3915 } 3916 if (!validateSMEMOffset(Inst, Operands)) { 3917 return false; 3918 } 3919 if (!validateMAIAccWrite(Inst, Operands)) { 3920 return false; 3921 } 3922 if (!validateDivScale(Inst)) { 3923 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3924 return false; 3925 } 3926 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3927 return false; 3928 } 3929 3930 return true; 3931 } 3932 3933 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3934 const FeatureBitset &FBS, 3935 unsigned VariantID = 0); 3936 3937 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3938 const FeatureBitset &AvailableFeatures, 3939 unsigned VariantID); 3940 3941 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3942 const FeatureBitset &FBS) { 3943 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3944 } 3945 3946 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3947 const FeatureBitset &FBS, 3948 ArrayRef<unsigned> Variants) { 3949 for (auto Variant : Variants) { 3950 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3951 return true; 3952 } 3953 3954 return false; 3955 } 3956 3957 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3958 const SMLoc &IDLoc) { 3959 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3960 3961 // Check if requested instruction variant is supported. 3962 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3963 return false; 3964 3965 // This instruction is not supported. 3966 // Clear any other pending errors because they are no longer relevant. 3967 getParser().clearPendingErrors(); 3968 3969 // Requested instruction variant is not supported. 3970 // Check if any other variants are supported. 3971 StringRef VariantName = getMatchedVariantName(); 3972 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3973 return Error(IDLoc, 3974 Twine(VariantName, 3975 " variant of this instruction is not supported")); 3976 } 3977 3978 // Finally check if this instruction is supported on any other GPU. 3979 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3980 return Error(IDLoc, "instruction not supported on this GPU"); 3981 } 3982 3983 // Instruction not supported on any GPU. Probably a typo. 3984 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3985 return Error(IDLoc, "invalid instruction" + Suggestion); 3986 } 3987 3988 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3989 OperandVector &Operands, 3990 MCStreamer &Out, 3991 uint64_t &ErrorInfo, 3992 bool MatchingInlineAsm) { 3993 MCInst Inst; 3994 unsigned Result = Match_Success; 3995 for (auto Variant : getMatchedVariants()) { 3996 uint64_t EI; 3997 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3998 Variant); 3999 // We order match statuses from least to most specific. We use most specific 4000 // status as resulting 4001 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4002 if ((R == Match_Success) || 4003 (R == Match_PreferE32) || 4004 (R == Match_MissingFeature && Result != Match_PreferE32) || 4005 (R == Match_InvalidOperand && Result != Match_MissingFeature 4006 && Result != Match_PreferE32) || 4007 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4008 && Result != Match_MissingFeature 4009 && Result != Match_PreferE32)) { 4010 Result = R; 4011 ErrorInfo = EI; 4012 } 4013 if (R == Match_Success) 4014 break; 4015 } 4016 4017 if (Result == Match_Success) { 4018 if (!validateInstruction(Inst, IDLoc, Operands)) { 4019 return true; 4020 } 4021 Inst.setLoc(IDLoc); 4022 Out.emitInstruction(Inst, getSTI()); 4023 return false; 4024 } 4025 4026 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4027 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4028 return true; 4029 } 4030 4031 switch (Result) { 4032 default: break; 4033 case Match_MissingFeature: 4034 // It has been verified that the specified instruction 4035 // mnemonic is valid. A match was found but it requires 4036 // features which are not supported on this GPU. 4037 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4038 4039 case Match_InvalidOperand: { 4040 SMLoc ErrorLoc = IDLoc; 4041 if (ErrorInfo != ~0ULL) { 4042 if (ErrorInfo >= Operands.size()) { 4043 return Error(IDLoc, "too few operands for instruction"); 4044 } 4045 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4046 if (ErrorLoc == SMLoc()) 4047 ErrorLoc = IDLoc; 4048 } 4049 return Error(ErrorLoc, "invalid operand for instruction"); 4050 } 4051 4052 case Match_PreferE32: 4053 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4054 "should be encoded as e32"); 4055 case Match_MnemonicFail: 4056 llvm_unreachable("Invalid instructions should have been handled already"); 4057 } 4058 llvm_unreachable("Implement any new match types added!"); 4059 } 4060 4061 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4062 int64_t Tmp = -1; 4063 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 4064 return true; 4065 } 4066 if (getParser().parseAbsoluteExpression(Tmp)) { 4067 return true; 4068 } 4069 Ret = static_cast<uint32_t>(Tmp); 4070 return false; 4071 } 4072 4073 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4074 uint32_t &Minor) { 4075 if (ParseAsAbsoluteExpression(Major)) 4076 return TokError("invalid major version"); 4077 4078 if (getLexer().isNot(AsmToken::Comma)) 4079 return TokError("minor version number required, comma expected"); 4080 Lex(); 4081 4082 if (ParseAsAbsoluteExpression(Minor)) 4083 return TokError("invalid minor version"); 4084 4085 return false; 4086 } 4087 4088 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4089 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4090 return TokError("directive only supported for amdgcn architecture"); 4091 4092 std::string Target; 4093 4094 SMLoc TargetStart = getTok().getLoc(); 4095 if (getParser().parseEscapedString(Target)) 4096 return true; 4097 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4098 4099 std::string ExpectedTarget; 4100 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4101 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4102 4103 if (Target != ExpectedTargetOS.str()) 4104 return getParser().Error(TargetRange.Start, "target must match options", 4105 TargetRange); 4106 4107 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4108 return false; 4109 } 4110 4111 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4112 return getParser().Error(Range.Start, "value out of range", Range); 4113 } 4114 4115 bool AMDGPUAsmParser::calculateGPRBlocks( 4116 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4117 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4118 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4119 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4120 // TODO(scott.linder): These calculations are duplicated from 4121 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4122 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4123 4124 unsigned NumVGPRs = NextFreeVGPR; 4125 unsigned NumSGPRs = NextFreeSGPR; 4126 4127 if (Version.Major >= 10) 4128 NumSGPRs = 0; 4129 else { 4130 unsigned MaxAddressableNumSGPRs = 4131 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4132 4133 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4134 NumSGPRs > MaxAddressableNumSGPRs) 4135 return OutOfRangeError(SGPRRange); 4136 4137 NumSGPRs += 4138 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4139 4140 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4141 NumSGPRs > MaxAddressableNumSGPRs) 4142 return OutOfRangeError(SGPRRange); 4143 4144 if (Features.test(FeatureSGPRInitBug)) 4145 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4146 } 4147 4148 VGPRBlocks = 4149 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4150 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4151 4152 return false; 4153 } 4154 4155 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4156 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4157 return TokError("directive only supported for amdgcn architecture"); 4158 4159 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4160 return TokError("directive only supported for amdhsa OS"); 4161 4162 StringRef KernelName; 4163 if (getParser().parseIdentifier(KernelName)) 4164 return true; 4165 4166 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4167 4168 StringSet<> Seen; 4169 4170 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4171 4172 SMRange VGPRRange; 4173 uint64_t NextFreeVGPR = 0; 4174 SMRange SGPRRange; 4175 uint64_t NextFreeSGPR = 0; 4176 unsigned UserSGPRCount = 0; 4177 bool ReserveVCC = true; 4178 bool ReserveFlatScr = true; 4179 bool ReserveXNACK = hasXNACK(); 4180 Optional<bool> EnableWavefrontSize32; 4181 4182 while (true) { 4183 while (getLexer().is(AsmToken::EndOfStatement)) 4184 Lex(); 4185 4186 if (getLexer().isNot(AsmToken::Identifier)) 4187 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4188 4189 StringRef ID = getTok().getIdentifier(); 4190 SMRange IDRange = getTok().getLocRange(); 4191 Lex(); 4192 4193 if (ID == ".end_amdhsa_kernel") 4194 break; 4195 4196 if (Seen.find(ID) != Seen.end()) 4197 return TokError(".amdhsa_ directives cannot be repeated"); 4198 Seen.insert(ID); 4199 4200 SMLoc ValStart = getTok().getLoc(); 4201 int64_t IVal; 4202 if (getParser().parseAbsoluteExpression(IVal)) 4203 return true; 4204 SMLoc ValEnd = getTok().getLoc(); 4205 SMRange ValRange = SMRange(ValStart, ValEnd); 4206 4207 if (IVal < 0) 4208 return OutOfRangeError(ValRange); 4209 4210 uint64_t Val = IVal; 4211 4212 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4213 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4214 return OutOfRangeError(RANGE); \ 4215 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4216 4217 if (ID == ".amdhsa_group_segment_fixed_size") { 4218 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4219 return OutOfRangeError(ValRange); 4220 KD.group_segment_fixed_size = Val; 4221 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4222 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4223 return OutOfRangeError(ValRange); 4224 KD.private_segment_fixed_size = Val; 4225 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4226 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4227 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4228 Val, ValRange); 4229 if (Val) 4230 UserSGPRCount += 4; 4231 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4232 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4234 ValRange); 4235 if (Val) 4236 UserSGPRCount += 2; 4237 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4238 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4239 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4240 ValRange); 4241 if (Val) 4242 UserSGPRCount += 2; 4243 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4244 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4245 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4246 Val, ValRange); 4247 if (Val) 4248 UserSGPRCount += 2; 4249 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4250 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4252 ValRange); 4253 if (Val) 4254 UserSGPRCount += 2; 4255 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4256 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4257 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4258 ValRange); 4259 if (Val) 4260 UserSGPRCount += 2; 4261 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4262 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4263 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4264 Val, ValRange); 4265 if (Val) 4266 UserSGPRCount += 1; 4267 } else if (ID == ".amdhsa_wavefront_size32") { 4268 if (IVersion.Major < 10) 4269 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4270 IDRange); 4271 EnableWavefrontSize32 = Val; 4272 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4273 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4274 Val, ValRange); 4275 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4276 PARSE_BITS_ENTRY( 4277 KD.compute_pgm_rsrc2, 4278 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4279 ValRange); 4280 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4281 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4282 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4283 ValRange); 4284 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4285 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4286 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4287 ValRange); 4288 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4289 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4290 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4291 ValRange); 4292 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4293 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4294 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4295 ValRange); 4296 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4298 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4299 ValRange); 4300 } else if (ID == ".amdhsa_next_free_vgpr") { 4301 VGPRRange = ValRange; 4302 NextFreeVGPR = Val; 4303 } else if (ID == ".amdhsa_next_free_sgpr") { 4304 SGPRRange = ValRange; 4305 NextFreeSGPR = Val; 4306 } else if (ID == ".amdhsa_reserve_vcc") { 4307 if (!isUInt<1>(Val)) 4308 return OutOfRangeError(ValRange); 4309 ReserveVCC = Val; 4310 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4311 if (IVersion.Major < 7) 4312 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4313 IDRange); 4314 if (!isUInt<1>(Val)) 4315 return OutOfRangeError(ValRange); 4316 ReserveFlatScr = Val; 4317 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4318 if (IVersion.Major < 8) 4319 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4320 IDRange); 4321 if (!isUInt<1>(Val)) 4322 return OutOfRangeError(ValRange); 4323 ReserveXNACK = Val; 4324 } else if (ID == ".amdhsa_float_round_mode_32") { 4325 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4326 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4327 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4329 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4330 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4331 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4332 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4333 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4334 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4335 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4336 ValRange); 4337 } else if (ID == ".amdhsa_dx10_clamp") { 4338 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4339 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4340 } else if (ID == ".amdhsa_ieee_mode") { 4341 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4342 Val, ValRange); 4343 } else if (ID == ".amdhsa_fp16_overflow") { 4344 if (IVersion.Major < 9) 4345 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4346 IDRange); 4347 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4348 ValRange); 4349 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4350 if (IVersion.Major < 10) 4351 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4352 IDRange); 4353 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4354 ValRange); 4355 } else if (ID == ".amdhsa_memory_ordered") { 4356 if (IVersion.Major < 10) 4357 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4358 IDRange); 4359 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4360 ValRange); 4361 } else if (ID == ".amdhsa_forward_progress") { 4362 if (IVersion.Major < 10) 4363 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4364 IDRange); 4365 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4366 ValRange); 4367 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4368 PARSE_BITS_ENTRY( 4369 KD.compute_pgm_rsrc2, 4370 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4371 ValRange); 4372 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4373 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4374 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4375 Val, ValRange); 4376 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4377 PARSE_BITS_ENTRY( 4378 KD.compute_pgm_rsrc2, 4379 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4380 ValRange); 4381 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4382 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4383 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4384 Val, ValRange); 4385 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4386 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4387 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4388 Val, ValRange); 4389 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4390 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4391 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4392 Val, ValRange); 4393 } else if (ID == ".amdhsa_exception_int_div_zero") { 4394 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4395 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4396 Val, ValRange); 4397 } else { 4398 return getParser().Error(IDRange.Start, 4399 "unknown .amdhsa_kernel directive", IDRange); 4400 } 4401 4402 #undef PARSE_BITS_ENTRY 4403 } 4404 4405 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4406 return TokError(".amdhsa_next_free_vgpr directive is required"); 4407 4408 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4409 return TokError(".amdhsa_next_free_sgpr directive is required"); 4410 4411 unsigned VGPRBlocks; 4412 unsigned SGPRBlocks; 4413 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4414 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4415 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4416 SGPRBlocks)) 4417 return true; 4418 4419 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4420 VGPRBlocks)) 4421 return OutOfRangeError(VGPRRange); 4422 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4423 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4424 4425 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4426 SGPRBlocks)) 4427 return OutOfRangeError(SGPRRange); 4428 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4429 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4430 SGPRBlocks); 4431 4432 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4433 return TokError("too many user SGPRs enabled"); 4434 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4435 UserSGPRCount); 4436 4437 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4438 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4439 ReserveFlatScr, ReserveXNACK); 4440 return false; 4441 } 4442 4443 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4444 uint32_t Major; 4445 uint32_t Minor; 4446 4447 if (ParseDirectiveMajorMinor(Major, Minor)) 4448 return true; 4449 4450 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4451 return false; 4452 } 4453 4454 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4455 uint32_t Major; 4456 uint32_t Minor; 4457 uint32_t Stepping; 4458 StringRef VendorName; 4459 StringRef ArchName; 4460 4461 // If this directive has no arguments, then use the ISA version for the 4462 // targeted GPU. 4463 if (getLexer().is(AsmToken::EndOfStatement)) { 4464 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4465 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4466 ISA.Stepping, 4467 "AMD", "AMDGPU"); 4468 return false; 4469 } 4470 4471 if (ParseDirectiveMajorMinor(Major, Minor)) 4472 return true; 4473 4474 if (getLexer().isNot(AsmToken::Comma)) 4475 return TokError("stepping version number required, comma expected"); 4476 Lex(); 4477 4478 if (ParseAsAbsoluteExpression(Stepping)) 4479 return TokError("invalid stepping version"); 4480 4481 if (getLexer().isNot(AsmToken::Comma)) 4482 return TokError("vendor name required, comma expected"); 4483 Lex(); 4484 4485 if (getLexer().isNot(AsmToken::String)) 4486 return TokError("invalid vendor name"); 4487 4488 VendorName = getLexer().getTok().getStringContents(); 4489 Lex(); 4490 4491 if (getLexer().isNot(AsmToken::Comma)) 4492 return TokError("arch name required, comma expected"); 4493 Lex(); 4494 4495 if (getLexer().isNot(AsmToken::String)) 4496 return TokError("invalid arch name"); 4497 4498 ArchName = getLexer().getTok().getStringContents(); 4499 Lex(); 4500 4501 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4502 VendorName, ArchName); 4503 return false; 4504 } 4505 4506 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4507 amd_kernel_code_t &Header) { 4508 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4509 // assembly for backwards compatibility. 4510 if (ID == "max_scratch_backing_memory_byte_size") { 4511 Parser.eatToEndOfStatement(); 4512 return false; 4513 } 4514 4515 SmallString<40> ErrStr; 4516 raw_svector_ostream Err(ErrStr); 4517 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4518 return TokError(Err.str()); 4519 } 4520 Lex(); 4521 4522 if (ID == "enable_wavefront_size32") { 4523 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4524 if (!isGFX10Plus()) 4525 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4526 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4527 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4528 } else { 4529 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4530 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4531 } 4532 } 4533 4534 if (ID == "wavefront_size") { 4535 if (Header.wavefront_size == 5) { 4536 if (!isGFX10Plus()) 4537 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4538 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4539 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4540 } else if (Header.wavefront_size == 6) { 4541 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4542 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4543 } 4544 } 4545 4546 if (ID == "enable_wgp_mode") { 4547 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4548 !isGFX10Plus()) 4549 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4550 } 4551 4552 if (ID == "enable_mem_ordered") { 4553 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4554 !isGFX10Plus()) 4555 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4556 } 4557 4558 if (ID == "enable_fwd_progress") { 4559 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4560 !isGFX10Plus()) 4561 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4562 } 4563 4564 return false; 4565 } 4566 4567 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4568 amd_kernel_code_t Header; 4569 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4570 4571 while (true) { 4572 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4573 // will set the current token to EndOfStatement. 4574 while(getLexer().is(AsmToken::EndOfStatement)) 4575 Lex(); 4576 4577 if (getLexer().isNot(AsmToken::Identifier)) 4578 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4579 4580 StringRef ID = getLexer().getTok().getIdentifier(); 4581 Lex(); 4582 4583 if (ID == ".end_amd_kernel_code_t") 4584 break; 4585 4586 if (ParseAMDKernelCodeTValue(ID, Header)) 4587 return true; 4588 } 4589 4590 getTargetStreamer().EmitAMDKernelCodeT(Header); 4591 4592 return false; 4593 } 4594 4595 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4596 if (getLexer().isNot(AsmToken::Identifier)) 4597 return TokError("expected symbol name"); 4598 4599 StringRef KernelName = Parser.getTok().getString(); 4600 4601 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4602 ELF::STT_AMDGPU_HSA_KERNEL); 4603 Lex(); 4604 4605 KernelScope.initialize(getContext()); 4606 return false; 4607 } 4608 4609 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4610 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4611 return Error(getParser().getTok().getLoc(), 4612 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4613 "architectures"); 4614 } 4615 4616 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4617 4618 std::string ISAVersionStringFromSTI; 4619 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4620 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4621 4622 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4623 return Error(getParser().getTok().getLoc(), 4624 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4625 "arguments specified through the command line"); 4626 } 4627 4628 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4629 Lex(); 4630 4631 return false; 4632 } 4633 4634 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4635 const char *AssemblerDirectiveBegin; 4636 const char *AssemblerDirectiveEnd; 4637 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4638 isHsaAbiVersion3(&getSTI()) 4639 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4640 HSAMD::V3::AssemblerDirectiveEnd) 4641 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4642 HSAMD::AssemblerDirectiveEnd); 4643 4644 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4645 return Error(getParser().getTok().getLoc(), 4646 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4647 "not available on non-amdhsa OSes")).str()); 4648 } 4649 4650 std::string HSAMetadataString; 4651 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4652 HSAMetadataString)) 4653 return true; 4654 4655 if (isHsaAbiVersion3(&getSTI())) { 4656 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4657 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4658 } else { 4659 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4660 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4661 } 4662 4663 return false; 4664 } 4665 4666 /// Common code to parse out a block of text (typically YAML) between start and 4667 /// end directives. 4668 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4669 const char *AssemblerDirectiveEnd, 4670 std::string &CollectString) { 4671 4672 raw_string_ostream CollectStream(CollectString); 4673 4674 getLexer().setSkipSpace(false); 4675 4676 bool FoundEnd = false; 4677 while (!getLexer().is(AsmToken::Eof)) { 4678 while (getLexer().is(AsmToken::Space)) { 4679 CollectStream << getLexer().getTok().getString(); 4680 Lex(); 4681 } 4682 4683 if (getLexer().is(AsmToken::Identifier)) { 4684 StringRef ID = getLexer().getTok().getIdentifier(); 4685 if (ID == AssemblerDirectiveEnd) { 4686 Lex(); 4687 FoundEnd = true; 4688 break; 4689 } 4690 } 4691 4692 CollectStream << Parser.parseStringToEndOfStatement() 4693 << getContext().getAsmInfo()->getSeparatorString(); 4694 4695 Parser.eatToEndOfStatement(); 4696 } 4697 4698 getLexer().setSkipSpace(true); 4699 4700 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4701 return TokError(Twine("expected directive ") + 4702 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4703 } 4704 4705 CollectStream.flush(); 4706 return false; 4707 } 4708 4709 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4710 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4711 std::string String; 4712 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4713 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4714 return true; 4715 4716 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4717 if (!PALMetadata->setFromString(String)) 4718 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4719 return false; 4720 } 4721 4722 /// Parse the assembler directive for old linear-format PAL metadata. 4723 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4724 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4725 return Error(getParser().getTok().getLoc(), 4726 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4727 "not available on non-amdpal OSes")).str()); 4728 } 4729 4730 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4731 PALMetadata->setLegacy(); 4732 for (;;) { 4733 uint32_t Key, Value; 4734 if (ParseAsAbsoluteExpression(Key)) { 4735 return TokError(Twine("invalid value in ") + 4736 Twine(PALMD::AssemblerDirective)); 4737 } 4738 if (getLexer().isNot(AsmToken::Comma)) { 4739 return TokError(Twine("expected an even number of values in ") + 4740 Twine(PALMD::AssemblerDirective)); 4741 } 4742 Lex(); 4743 if (ParseAsAbsoluteExpression(Value)) { 4744 return TokError(Twine("invalid value in ") + 4745 Twine(PALMD::AssemblerDirective)); 4746 } 4747 PALMetadata->setRegister(Key, Value); 4748 if (getLexer().isNot(AsmToken::Comma)) 4749 break; 4750 Lex(); 4751 } 4752 return false; 4753 } 4754 4755 /// ParseDirectiveAMDGPULDS 4756 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4757 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4758 if (getParser().checkForValidSection()) 4759 return true; 4760 4761 StringRef Name; 4762 SMLoc NameLoc = getLexer().getLoc(); 4763 if (getParser().parseIdentifier(Name)) 4764 return TokError("expected identifier in directive"); 4765 4766 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4767 if (parseToken(AsmToken::Comma, "expected ','")) 4768 return true; 4769 4770 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4771 4772 int64_t Size; 4773 SMLoc SizeLoc = getLexer().getLoc(); 4774 if (getParser().parseAbsoluteExpression(Size)) 4775 return true; 4776 if (Size < 0) 4777 return Error(SizeLoc, "size must be non-negative"); 4778 if (Size > LocalMemorySize) 4779 return Error(SizeLoc, "size is too large"); 4780 4781 int64_t Alignment = 4; 4782 if (getLexer().is(AsmToken::Comma)) { 4783 Lex(); 4784 SMLoc AlignLoc = getLexer().getLoc(); 4785 if (getParser().parseAbsoluteExpression(Alignment)) 4786 return true; 4787 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4788 return Error(AlignLoc, "alignment must be a power of two"); 4789 4790 // Alignment larger than the size of LDS is possible in theory, as long 4791 // as the linker manages to place to symbol at address 0, but we do want 4792 // to make sure the alignment fits nicely into a 32-bit integer. 4793 if (Alignment >= 1u << 31) 4794 return Error(AlignLoc, "alignment is too large"); 4795 } 4796 4797 if (parseToken(AsmToken::EndOfStatement, 4798 "unexpected token in '.amdgpu_lds' directive")) 4799 return true; 4800 4801 Symbol->redefineIfPossible(); 4802 if (!Symbol->isUndefined()) 4803 return Error(NameLoc, "invalid symbol redefinition"); 4804 4805 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4806 return false; 4807 } 4808 4809 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4810 StringRef IDVal = DirectiveID.getString(); 4811 4812 if (isHsaAbiVersion3(&getSTI())) { 4813 if (IDVal == ".amdgcn_target") 4814 return ParseDirectiveAMDGCNTarget(); 4815 4816 if (IDVal == ".amdhsa_kernel") 4817 return ParseDirectiveAMDHSAKernel(); 4818 4819 // TODO: Restructure/combine with PAL metadata directive. 4820 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4821 return ParseDirectiveHSAMetadata(); 4822 } else { 4823 if (IDVal == ".hsa_code_object_version") 4824 return ParseDirectiveHSACodeObjectVersion(); 4825 4826 if (IDVal == ".hsa_code_object_isa") 4827 return ParseDirectiveHSACodeObjectISA(); 4828 4829 if (IDVal == ".amd_kernel_code_t") 4830 return ParseDirectiveAMDKernelCodeT(); 4831 4832 if (IDVal == ".amdgpu_hsa_kernel") 4833 return ParseDirectiveAMDGPUHsaKernel(); 4834 4835 if (IDVal == ".amd_amdgpu_isa") 4836 return ParseDirectiveISAVersion(); 4837 4838 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4839 return ParseDirectiveHSAMetadata(); 4840 } 4841 4842 if (IDVal == ".amdgpu_lds") 4843 return ParseDirectiveAMDGPULDS(); 4844 4845 if (IDVal == PALMD::AssemblerDirectiveBegin) 4846 return ParseDirectivePALMetadataBegin(); 4847 4848 if (IDVal == PALMD::AssemblerDirective) 4849 return ParseDirectivePALMetadata(); 4850 4851 return true; 4852 } 4853 4854 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4855 unsigned RegNo) const { 4856 4857 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4858 R.isValid(); ++R) { 4859 if (*R == RegNo) 4860 return isGFX9Plus(); 4861 } 4862 4863 // GFX10 has 2 more SGPRs 104 and 105. 4864 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4865 R.isValid(); ++R) { 4866 if (*R == RegNo) 4867 return hasSGPR104_SGPR105(); 4868 } 4869 4870 switch (RegNo) { 4871 case AMDGPU::SRC_SHARED_BASE: 4872 case AMDGPU::SRC_SHARED_LIMIT: 4873 case AMDGPU::SRC_PRIVATE_BASE: 4874 case AMDGPU::SRC_PRIVATE_LIMIT: 4875 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4876 return isGFX9Plus(); 4877 case AMDGPU::TBA: 4878 case AMDGPU::TBA_LO: 4879 case AMDGPU::TBA_HI: 4880 case AMDGPU::TMA: 4881 case AMDGPU::TMA_LO: 4882 case AMDGPU::TMA_HI: 4883 return !isGFX9Plus(); 4884 case AMDGPU::XNACK_MASK: 4885 case AMDGPU::XNACK_MASK_LO: 4886 case AMDGPU::XNACK_MASK_HI: 4887 return (isVI() || isGFX9()) && hasXNACK(); 4888 case AMDGPU::SGPR_NULL: 4889 return isGFX10Plus(); 4890 default: 4891 break; 4892 } 4893 4894 if (isCI()) 4895 return true; 4896 4897 if (isSI() || isGFX10Plus()) { 4898 // No flat_scr on SI. 4899 // On GFX10 flat scratch is not a valid register operand and can only be 4900 // accessed with s_setreg/s_getreg. 4901 switch (RegNo) { 4902 case AMDGPU::FLAT_SCR: 4903 case AMDGPU::FLAT_SCR_LO: 4904 case AMDGPU::FLAT_SCR_HI: 4905 return false; 4906 default: 4907 return true; 4908 } 4909 } 4910 4911 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4912 // SI/CI have. 4913 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4914 R.isValid(); ++R) { 4915 if (*R == RegNo) 4916 return hasSGPR102_SGPR103(); 4917 } 4918 4919 return true; 4920 } 4921 4922 OperandMatchResultTy 4923 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4924 OperandMode Mode) { 4925 // Try to parse with a custom parser 4926 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4927 4928 // If we successfully parsed the operand or if there as an error parsing, 4929 // we are done. 4930 // 4931 // If we are parsing after we reach EndOfStatement then this means we 4932 // are appending default values to the Operands list. This is only done 4933 // by custom parser, so we shouldn't continue on to the generic parsing. 4934 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4935 getLexer().is(AsmToken::EndOfStatement)) 4936 return ResTy; 4937 4938 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4939 unsigned Prefix = Operands.size(); 4940 SMLoc LBraceLoc = getTok().getLoc(); 4941 Parser.Lex(); // eat the '[' 4942 4943 for (;;) { 4944 ResTy = parseReg(Operands); 4945 if (ResTy != MatchOperand_Success) 4946 return ResTy; 4947 4948 if (getLexer().is(AsmToken::RBrac)) 4949 break; 4950 4951 if (getLexer().isNot(AsmToken::Comma)) 4952 return MatchOperand_ParseFail; 4953 Parser.Lex(); 4954 } 4955 4956 if (Operands.size() - Prefix > 1) { 4957 Operands.insert(Operands.begin() + Prefix, 4958 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4959 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4960 getTok().getLoc())); 4961 } 4962 4963 Parser.Lex(); // eat the ']' 4964 return MatchOperand_Success; 4965 } 4966 4967 return parseRegOrImm(Operands); 4968 } 4969 4970 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4971 // Clear any forced encodings from the previous instruction. 4972 setForcedEncodingSize(0); 4973 setForcedDPP(false); 4974 setForcedSDWA(false); 4975 4976 if (Name.endswith("_e64")) { 4977 setForcedEncodingSize(64); 4978 return Name.substr(0, Name.size() - 4); 4979 } else if (Name.endswith("_e32")) { 4980 setForcedEncodingSize(32); 4981 return Name.substr(0, Name.size() - 4); 4982 } else if (Name.endswith("_dpp")) { 4983 setForcedDPP(true); 4984 return Name.substr(0, Name.size() - 4); 4985 } else if (Name.endswith("_sdwa")) { 4986 setForcedSDWA(true); 4987 return Name.substr(0, Name.size() - 5); 4988 } 4989 return Name; 4990 } 4991 4992 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4993 StringRef Name, 4994 SMLoc NameLoc, OperandVector &Operands) { 4995 // Add the instruction mnemonic 4996 Name = parseMnemonicSuffix(Name); 4997 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4998 4999 bool IsMIMG = Name.startswith("image_"); 5000 5001 while (!getLexer().is(AsmToken::EndOfStatement)) { 5002 OperandMode Mode = OperandMode_Default; 5003 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5004 Mode = OperandMode_NSA; 5005 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5006 5007 // Eat the comma or space if there is one. 5008 if (getLexer().is(AsmToken::Comma)) 5009 Parser.Lex(); 5010 5011 if (Res != MatchOperand_Success) { 5012 checkUnsupportedInstruction(Name, NameLoc); 5013 if (!Parser.hasPendingError()) { 5014 // FIXME: use real operand location rather than the current location. 5015 StringRef Msg = 5016 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5017 "not a valid operand."; 5018 Error(getLexer().getLoc(), Msg); 5019 } 5020 while (!getLexer().is(AsmToken::EndOfStatement)) { 5021 Parser.Lex(); 5022 } 5023 Parser.Lex(); 5024 return true; 5025 } 5026 } 5027 Parser.Lex(); 5028 5029 return false; 5030 } 5031 5032 //===----------------------------------------------------------------------===// 5033 // Utility functions 5034 //===----------------------------------------------------------------------===// 5035 5036 OperandMatchResultTy 5037 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5038 5039 if (!trySkipId(Prefix, AsmToken::Colon)) 5040 return MatchOperand_NoMatch; 5041 5042 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5043 } 5044 5045 OperandMatchResultTy 5046 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5047 AMDGPUOperand::ImmTy ImmTy, 5048 bool (*ConvertResult)(int64_t&)) { 5049 SMLoc S = getLoc(); 5050 int64_t Value = 0; 5051 5052 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5053 if (Res != MatchOperand_Success) 5054 return Res; 5055 5056 if (ConvertResult && !ConvertResult(Value)) { 5057 Error(S, "invalid " + StringRef(Prefix) + " value."); 5058 } 5059 5060 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5061 return MatchOperand_Success; 5062 } 5063 5064 OperandMatchResultTy 5065 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5066 OperandVector &Operands, 5067 AMDGPUOperand::ImmTy ImmTy, 5068 bool (*ConvertResult)(int64_t&)) { 5069 SMLoc S = getLoc(); 5070 if (!trySkipId(Prefix, AsmToken::Colon)) 5071 return MatchOperand_NoMatch; 5072 5073 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5074 return MatchOperand_ParseFail; 5075 5076 unsigned Val = 0; 5077 const unsigned MaxSize = 4; 5078 5079 // FIXME: How to verify the number of elements matches the number of src 5080 // operands? 5081 for (int I = 0; ; ++I) { 5082 int64_t Op; 5083 SMLoc Loc = getLoc(); 5084 if (!parseExpr(Op)) 5085 return MatchOperand_ParseFail; 5086 5087 if (Op != 0 && Op != 1) { 5088 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5089 return MatchOperand_ParseFail; 5090 } 5091 5092 Val |= (Op << I); 5093 5094 if (trySkipToken(AsmToken::RBrac)) 5095 break; 5096 5097 if (I + 1 == MaxSize) { 5098 Error(getLoc(), "expected a closing square bracket"); 5099 return MatchOperand_ParseFail; 5100 } 5101 5102 if (!skipToken(AsmToken::Comma, "expected a comma")) 5103 return MatchOperand_ParseFail; 5104 } 5105 5106 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5107 return MatchOperand_Success; 5108 } 5109 5110 OperandMatchResultTy 5111 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5112 AMDGPUOperand::ImmTy ImmTy) { 5113 int64_t Bit = 0; 5114 SMLoc S = Parser.getTok().getLoc(); 5115 5116 // We are at the end of the statement, and this is a default argument, so 5117 // use a default value. 5118 if (getLexer().isNot(AsmToken::EndOfStatement)) { 5119 switch(getLexer().getKind()) { 5120 case AsmToken::Identifier: { 5121 StringRef Tok = Parser.getTok().getString(); 5122 if (Tok == Name) { 5123 if (Tok == "r128" && !hasMIMG_R128()) 5124 Error(S, "r128 modifier is not supported on this GPU"); 5125 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5126 Error(S, "a16 modifier is not supported on this GPU"); 5127 Bit = 1; 5128 Parser.Lex(); 5129 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5130 Bit = 0; 5131 Parser.Lex(); 5132 } else { 5133 return MatchOperand_NoMatch; 5134 } 5135 break; 5136 } 5137 default: 5138 return MatchOperand_NoMatch; 5139 } 5140 } 5141 5142 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) 5143 return MatchOperand_ParseFail; 5144 5145 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5146 ImmTy = AMDGPUOperand::ImmTyR128A16; 5147 5148 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5149 return MatchOperand_Success; 5150 } 5151 5152 static void addOptionalImmOperand( 5153 MCInst& Inst, const OperandVector& Operands, 5154 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5155 AMDGPUOperand::ImmTy ImmT, 5156 int64_t Default = 0) { 5157 auto i = OptionalIdx.find(ImmT); 5158 if (i != OptionalIdx.end()) { 5159 unsigned Idx = i->second; 5160 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5161 } else { 5162 Inst.addOperand(MCOperand::createImm(Default)); 5163 } 5164 } 5165 5166 OperandMatchResultTy 5167 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5168 if (getLexer().isNot(AsmToken::Identifier)) { 5169 return MatchOperand_NoMatch; 5170 } 5171 StringRef Tok = Parser.getTok().getString(); 5172 if (Tok != Prefix) { 5173 return MatchOperand_NoMatch; 5174 } 5175 5176 Parser.Lex(); 5177 if (getLexer().isNot(AsmToken::Colon)) { 5178 return MatchOperand_ParseFail; 5179 } 5180 5181 Parser.Lex(); 5182 if (getLexer().isNot(AsmToken::Identifier)) { 5183 return MatchOperand_ParseFail; 5184 } 5185 5186 Value = Parser.getTok().getString(); 5187 return MatchOperand_Success; 5188 } 5189 5190 //===----------------------------------------------------------------------===// 5191 // MTBUF format 5192 //===----------------------------------------------------------------------===// 5193 5194 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5195 int64_t MaxVal, 5196 int64_t &Fmt) { 5197 int64_t Val; 5198 SMLoc Loc = getLoc(); 5199 5200 auto Res = parseIntWithPrefix(Pref, Val); 5201 if (Res == MatchOperand_ParseFail) 5202 return false; 5203 if (Res == MatchOperand_NoMatch) 5204 return true; 5205 5206 if (Val < 0 || Val > MaxVal) { 5207 Error(Loc, Twine("out of range ", StringRef(Pref))); 5208 return false; 5209 } 5210 5211 Fmt = Val; 5212 return true; 5213 } 5214 5215 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5216 // values to live in a joint format operand in the MCInst encoding. 5217 OperandMatchResultTy 5218 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5219 using namespace llvm::AMDGPU::MTBUFFormat; 5220 5221 int64_t Dfmt = DFMT_UNDEF; 5222 int64_t Nfmt = NFMT_UNDEF; 5223 5224 // dfmt and nfmt can appear in either order, and each is optional. 5225 for (int I = 0; I < 2; ++I) { 5226 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5227 return MatchOperand_ParseFail; 5228 5229 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5230 return MatchOperand_ParseFail; 5231 } 5232 // Skip optional comma between dfmt/nfmt 5233 // but guard against 2 commas following each other. 5234 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5235 !peekToken().is(AsmToken::Comma)) { 5236 trySkipToken(AsmToken::Comma); 5237 } 5238 } 5239 5240 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5241 return MatchOperand_NoMatch; 5242 5243 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5244 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5245 5246 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5247 return MatchOperand_Success; 5248 } 5249 5250 OperandMatchResultTy 5251 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5252 using namespace llvm::AMDGPU::MTBUFFormat; 5253 5254 int64_t Fmt = UFMT_UNDEF; 5255 5256 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5257 return MatchOperand_ParseFail; 5258 5259 if (Fmt == UFMT_UNDEF) 5260 return MatchOperand_NoMatch; 5261 5262 Format = Fmt; 5263 return MatchOperand_Success; 5264 } 5265 5266 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5267 int64_t &Nfmt, 5268 StringRef FormatStr, 5269 SMLoc Loc) { 5270 using namespace llvm::AMDGPU::MTBUFFormat; 5271 int64_t Format; 5272 5273 Format = getDfmt(FormatStr); 5274 if (Format != DFMT_UNDEF) { 5275 Dfmt = Format; 5276 return true; 5277 } 5278 5279 Format = getNfmt(FormatStr, getSTI()); 5280 if (Format != NFMT_UNDEF) { 5281 Nfmt = Format; 5282 return true; 5283 } 5284 5285 Error(Loc, "unsupported format"); 5286 return false; 5287 } 5288 5289 OperandMatchResultTy 5290 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5291 SMLoc FormatLoc, 5292 int64_t &Format) { 5293 using namespace llvm::AMDGPU::MTBUFFormat; 5294 5295 int64_t Dfmt = DFMT_UNDEF; 5296 int64_t Nfmt = NFMT_UNDEF; 5297 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5298 return MatchOperand_ParseFail; 5299 5300 if (trySkipToken(AsmToken::Comma)) { 5301 StringRef Str; 5302 SMLoc Loc = getLoc(); 5303 if (!parseId(Str, "expected a format string") || 5304 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5305 return MatchOperand_ParseFail; 5306 } 5307 if (Dfmt == DFMT_UNDEF) { 5308 Error(Loc, "duplicate numeric format"); 5309 return MatchOperand_ParseFail; 5310 } else if (Nfmt == NFMT_UNDEF) { 5311 Error(Loc, "duplicate data format"); 5312 return MatchOperand_ParseFail; 5313 } 5314 } 5315 5316 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5317 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5318 5319 if (isGFX10Plus()) { 5320 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5321 if (Ufmt == UFMT_UNDEF) { 5322 Error(FormatLoc, "unsupported format"); 5323 return MatchOperand_ParseFail; 5324 } 5325 Format = Ufmt; 5326 } else { 5327 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5328 } 5329 5330 return MatchOperand_Success; 5331 } 5332 5333 OperandMatchResultTy 5334 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5335 SMLoc Loc, 5336 int64_t &Format) { 5337 using namespace llvm::AMDGPU::MTBUFFormat; 5338 5339 auto Id = getUnifiedFormat(FormatStr); 5340 if (Id == UFMT_UNDEF) 5341 return MatchOperand_NoMatch; 5342 5343 if (!isGFX10Plus()) { 5344 Error(Loc, "unified format is not supported on this GPU"); 5345 return MatchOperand_ParseFail; 5346 } 5347 5348 Format = Id; 5349 return MatchOperand_Success; 5350 } 5351 5352 OperandMatchResultTy 5353 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5354 using namespace llvm::AMDGPU::MTBUFFormat; 5355 SMLoc Loc = getLoc(); 5356 5357 if (!parseExpr(Format)) 5358 return MatchOperand_ParseFail; 5359 if (!isValidFormatEncoding(Format, getSTI())) { 5360 Error(Loc, "out of range format"); 5361 return MatchOperand_ParseFail; 5362 } 5363 5364 return MatchOperand_Success; 5365 } 5366 5367 OperandMatchResultTy 5368 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5369 using namespace llvm::AMDGPU::MTBUFFormat; 5370 5371 if (!trySkipId("format", AsmToken::Colon)) 5372 return MatchOperand_NoMatch; 5373 5374 if (trySkipToken(AsmToken::LBrac)) { 5375 StringRef FormatStr; 5376 SMLoc Loc = getLoc(); 5377 if (!parseId(FormatStr, "expected a format string")) 5378 return MatchOperand_ParseFail; 5379 5380 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5381 if (Res == MatchOperand_NoMatch) 5382 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5383 if (Res != MatchOperand_Success) 5384 return Res; 5385 5386 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5387 return MatchOperand_ParseFail; 5388 5389 return MatchOperand_Success; 5390 } 5391 5392 return parseNumericFormat(Format); 5393 } 5394 5395 OperandMatchResultTy 5396 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5397 using namespace llvm::AMDGPU::MTBUFFormat; 5398 5399 int64_t Format = getDefaultFormatEncoding(getSTI()); 5400 OperandMatchResultTy Res; 5401 SMLoc Loc = getLoc(); 5402 5403 // Parse legacy format syntax. 5404 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5405 if (Res == MatchOperand_ParseFail) 5406 return Res; 5407 5408 bool FormatFound = (Res == MatchOperand_Success); 5409 5410 Operands.push_back( 5411 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5412 5413 if (FormatFound) 5414 trySkipToken(AsmToken::Comma); 5415 5416 if (isToken(AsmToken::EndOfStatement)) { 5417 // We are expecting an soffset operand, 5418 // but let matcher handle the error. 5419 return MatchOperand_Success; 5420 } 5421 5422 // Parse soffset. 5423 Res = parseRegOrImm(Operands); 5424 if (Res != MatchOperand_Success) 5425 return Res; 5426 5427 trySkipToken(AsmToken::Comma); 5428 5429 if (!FormatFound) { 5430 Res = parseSymbolicOrNumericFormat(Format); 5431 if (Res == MatchOperand_ParseFail) 5432 return Res; 5433 if (Res == MatchOperand_Success) { 5434 auto Size = Operands.size(); 5435 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5436 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5437 Op.setImm(Format); 5438 } 5439 return MatchOperand_Success; 5440 } 5441 5442 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5443 Error(getLoc(), "duplicate format"); 5444 return MatchOperand_ParseFail; 5445 } 5446 return MatchOperand_Success; 5447 } 5448 5449 //===----------------------------------------------------------------------===// 5450 // ds 5451 //===----------------------------------------------------------------------===// 5452 5453 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5454 const OperandVector &Operands) { 5455 OptionalImmIndexMap OptionalIdx; 5456 5457 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5458 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5459 5460 // Add the register arguments 5461 if (Op.isReg()) { 5462 Op.addRegOperands(Inst, 1); 5463 continue; 5464 } 5465 5466 // Handle optional arguments 5467 OptionalIdx[Op.getImmTy()] = i; 5468 } 5469 5470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5473 5474 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5475 } 5476 5477 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5478 bool IsGdsHardcoded) { 5479 OptionalImmIndexMap OptionalIdx; 5480 5481 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5482 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5483 5484 // Add the register arguments 5485 if (Op.isReg()) { 5486 Op.addRegOperands(Inst, 1); 5487 continue; 5488 } 5489 5490 if (Op.isToken() && Op.getToken() == "gds") { 5491 IsGdsHardcoded = true; 5492 continue; 5493 } 5494 5495 // Handle optional arguments 5496 OptionalIdx[Op.getImmTy()] = i; 5497 } 5498 5499 AMDGPUOperand::ImmTy OffsetType = 5500 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5501 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5502 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5503 AMDGPUOperand::ImmTyOffset; 5504 5505 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5506 5507 if (!IsGdsHardcoded) { 5508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5509 } 5510 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5511 } 5512 5513 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5514 OptionalImmIndexMap OptionalIdx; 5515 5516 unsigned OperandIdx[4]; 5517 unsigned EnMask = 0; 5518 int SrcIdx = 0; 5519 5520 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5522 5523 // Add the register arguments 5524 if (Op.isReg()) { 5525 assert(SrcIdx < 4); 5526 OperandIdx[SrcIdx] = Inst.size(); 5527 Op.addRegOperands(Inst, 1); 5528 ++SrcIdx; 5529 continue; 5530 } 5531 5532 if (Op.isOff()) { 5533 assert(SrcIdx < 4); 5534 OperandIdx[SrcIdx] = Inst.size(); 5535 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5536 ++SrcIdx; 5537 continue; 5538 } 5539 5540 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5541 Op.addImmOperands(Inst, 1); 5542 continue; 5543 } 5544 5545 if (Op.isToken() && Op.getToken() == "done") 5546 continue; 5547 5548 // Handle optional arguments 5549 OptionalIdx[Op.getImmTy()] = i; 5550 } 5551 5552 assert(SrcIdx == 4); 5553 5554 bool Compr = false; 5555 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5556 Compr = true; 5557 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5558 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5559 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5560 } 5561 5562 for (auto i = 0; i < SrcIdx; ++i) { 5563 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5564 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5565 } 5566 } 5567 5568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5570 5571 Inst.addOperand(MCOperand::createImm(EnMask)); 5572 } 5573 5574 //===----------------------------------------------------------------------===// 5575 // s_waitcnt 5576 //===----------------------------------------------------------------------===// 5577 5578 static bool 5579 encodeCnt( 5580 const AMDGPU::IsaVersion ISA, 5581 int64_t &IntVal, 5582 int64_t CntVal, 5583 bool Saturate, 5584 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5585 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5586 { 5587 bool Failed = false; 5588 5589 IntVal = encode(ISA, IntVal, CntVal); 5590 if (CntVal != decode(ISA, IntVal)) { 5591 if (Saturate) { 5592 IntVal = encode(ISA, IntVal, -1); 5593 } else { 5594 Failed = true; 5595 } 5596 } 5597 return Failed; 5598 } 5599 5600 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5601 5602 SMLoc CntLoc = getLoc(); 5603 StringRef CntName = getTokenStr(); 5604 5605 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5606 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5607 return false; 5608 5609 int64_t CntVal; 5610 SMLoc ValLoc = getLoc(); 5611 if (!parseExpr(CntVal)) 5612 return false; 5613 5614 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5615 5616 bool Failed = true; 5617 bool Sat = CntName.endswith("_sat"); 5618 5619 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5620 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5621 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5622 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5623 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5624 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5625 } else { 5626 Error(CntLoc, "invalid counter name " + CntName); 5627 return false; 5628 } 5629 5630 if (Failed) { 5631 Error(ValLoc, "too large value for " + CntName); 5632 return false; 5633 } 5634 5635 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5636 return false; 5637 5638 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5639 if (isToken(AsmToken::EndOfStatement)) { 5640 Error(getLoc(), "expected a counter name"); 5641 return false; 5642 } 5643 } 5644 5645 return true; 5646 } 5647 5648 OperandMatchResultTy 5649 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5650 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5651 int64_t Waitcnt = getWaitcntBitMask(ISA); 5652 SMLoc S = getLoc(); 5653 5654 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5655 while (!isToken(AsmToken::EndOfStatement)) { 5656 if (!parseCnt(Waitcnt)) 5657 return MatchOperand_ParseFail; 5658 } 5659 } else { 5660 if (!parseExpr(Waitcnt)) 5661 return MatchOperand_ParseFail; 5662 } 5663 5664 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5665 return MatchOperand_Success; 5666 } 5667 5668 bool 5669 AMDGPUOperand::isSWaitCnt() const { 5670 return isImm(); 5671 } 5672 5673 //===----------------------------------------------------------------------===// 5674 // hwreg 5675 //===----------------------------------------------------------------------===// 5676 5677 bool 5678 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5679 OperandInfoTy &Offset, 5680 OperandInfoTy &Width) { 5681 using namespace llvm::AMDGPU::Hwreg; 5682 5683 // The register may be specified by name or using a numeric code 5684 HwReg.Loc = getLoc(); 5685 if (isToken(AsmToken::Identifier) && 5686 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5687 HwReg.IsSymbolic = true; 5688 lex(); // skip register name 5689 } else if (!parseExpr(HwReg.Id, "a register name")) { 5690 return false; 5691 } 5692 5693 if (trySkipToken(AsmToken::RParen)) 5694 return true; 5695 5696 // parse optional params 5697 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5698 return false; 5699 5700 Offset.Loc = getLoc(); 5701 if (!parseExpr(Offset.Id)) 5702 return false; 5703 5704 if (!skipToken(AsmToken::Comma, "expected a comma")) 5705 return false; 5706 5707 Width.Loc = getLoc(); 5708 return parseExpr(Width.Id) && 5709 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5710 } 5711 5712 bool 5713 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5714 const OperandInfoTy &Offset, 5715 const OperandInfoTy &Width) { 5716 5717 using namespace llvm::AMDGPU::Hwreg; 5718 5719 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5720 Error(HwReg.Loc, 5721 "specified hardware register is not supported on this GPU"); 5722 return false; 5723 } 5724 if (!isValidHwreg(HwReg.Id)) { 5725 Error(HwReg.Loc, 5726 "invalid code of hardware register: only 6-bit values are legal"); 5727 return false; 5728 } 5729 if (!isValidHwregOffset(Offset.Id)) { 5730 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5731 return false; 5732 } 5733 if (!isValidHwregWidth(Width.Id)) { 5734 Error(Width.Loc, 5735 "invalid bitfield width: only values from 1 to 32 are legal"); 5736 return false; 5737 } 5738 return true; 5739 } 5740 5741 OperandMatchResultTy 5742 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5743 using namespace llvm::AMDGPU::Hwreg; 5744 5745 int64_t ImmVal = 0; 5746 SMLoc Loc = getLoc(); 5747 5748 if (trySkipId("hwreg", AsmToken::LParen)) { 5749 OperandInfoTy HwReg(ID_UNKNOWN_); 5750 OperandInfoTy Offset(OFFSET_DEFAULT_); 5751 OperandInfoTy Width(WIDTH_DEFAULT_); 5752 if (parseHwregBody(HwReg, Offset, Width) && 5753 validateHwreg(HwReg, Offset, Width)) { 5754 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5755 } else { 5756 return MatchOperand_ParseFail; 5757 } 5758 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5759 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5760 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5761 return MatchOperand_ParseFail; 5762 } 5763 } else { 5764 return MatchOperand_ParseFail; 5765 } 5766 5767 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5768 return MatchOperand_Success; 5769 } 5770 5771 bool AMDGPUOperand::isHwreg() const { 5772 return isImmTy(ImmTyHwreg); 5773 } 5774 5775 //===----------------------------------------------------------------------===// 5776 // sendmsg 5777 //===----------------------------------------------------------------------===// 5778 5779 bool 5780 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5781 OperandInfoTy &Op, 5782 OperandInfoTy &Stream) { 5783 using namespace llvm::AMDGPU::SendMsg; 5784 5785 Msg.Loc = getLoc(); 5786 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5787 Msg.IsSymbolic = true; 5788 lex(); // skip message name 5789 } else if (!parseExpr(Msg.Id, "a message name")) { 5790 return false; 5791 } 5792 5793 if (trySkipToken(AsmToken::Comma)) { 5794 Op.IsDefined = true; 5795 Op.Loc = getLoc(); 5796 if (isToken(AsmToken::Identifier) && 5797 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5798 lex(); // skip operation name 5799 } else if (!parseExpr(Op.Id, "an operation name")) { 5800 return false; 5801 } 5802 5803 if (trySkipToken(AsmToken::Comma)) { 5804 Stream.IsDefined = true; 5805 Stream.Loc = getLoc(); 5806 if (!parseExpr(Stream.Id)) 5807 return false; 5808 } 5809 } 5810 5811 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5812 } 5813 5814 bool 5815 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5816 const OperandInfoTy &Op, 5817 const OperandInfoTy &Stream) { 5818 using namespace llvm::AMDGPU::SendMsg; 5819 5820 // Validation strictness depends on whether message is specified 5821 // in a symbolc or in a numeric form. In the latter case 5822 // only encoding possibility is checked. 5823 bool Strict = Msg.IsSymbolic; 5824 5825 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5826 Error(Msg.Loc, "invalid message id"); 5827 return false; 5828 } 5829 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5830 if (Op.IsDefined) { 5831 Error(Op.Loc, "message does not support operations"); 5832 } else { 5833 Error(Msg.Loc, "missing message operation"); 5834 } 5835 return false; 5836 } 5837 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5838 Error(Op.Loc, "invalid operation id"); 5839 return false; 5840 } 5841 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5842 Error(Stream.Loc, "message operation does not support streams"); 5843 return false; 5844 } 5845 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5846 Error(Stream.Loc, "invalid message stream id"); 5847 return false; 5848 } 5849 return true; 5850 } 5851 5852 OperandMatchResultTy 5853 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5854 using namespace llvm::AMDGPU::SendMsg; 5855 5856 int64_t ImmVal = 0; 5857 SMLoc Loc = getLoc(); 5858 5859 if (trySkipId("sendmsg", AsmToken::LParen)) { 5860 OperandInfoTy Msg(ID_UNKNOWN_); 5861 OperandInfoTy Op(OP_NONE_); 5862 OperandInfoTy Stream(STREAM_ID_NONE_); 5863 if (parseSendMsgBody(Msg, Op, Stream) && 5864 validateSendMsg(Msg, Op, Stream)) { 5865 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5866 } else { 5867 return MatchOperand_ParseFail; 5868 } 5869 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5870 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5871 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5872 return MatchOperand_ParseFail; 5873 } 5874 } else { 5875 return MatchOperand_ParseFail; 5876 } 5877 5878 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5879 return MatchOperand_Success; 5880 } 5881 5882 bool AMDGPUOperand::isSendMsg() const { 5883 return isImmTy(ImmTySendMsg); 5884 } 5885 5886 //===----------------------------------------------------------------------===// 5887 // v_interp 5888 //===----------------------------------------------------------------------===// 5889 5890 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5891 if (getLexer().getKind() != AsmToken::Identifier) 5892 return MatchOperand_NoMatch; 5893 5894 StringRef Str = Parser.getTok().getString(); 5895 int Slot = StringSwitch<int>(Str) 5896 .Case("p10", 0) 5897 .Case("p20", 1) 5898 .Case("p0", 2) 5899 .Default(-1); 5900 5901 SMLoc S = Parser.getTok().getLoc(); 5902 if (Slot == -1) 5903 return MatchOperand_ParseFail; 5904 5905 Parser.Lex(); 5906 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5907 AMDGPUOperand::ImmTyInterpSlot)); 5908 return MatchOperand_Success; 5909 } 5910 5911 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5912 if (getLexer().getKind() != AsmToken::Identifier) 5913 return MatchOperand_NoMatch; 5914 5915 StringRef Str = Parser.getTok().getString(); 5916 if (!Str.startswith("attr")) 5917 return MatchOperand_NoMatch; 5918 5919 StringRef Chan = Str.take_back(2); 5920 int AttrChan = StringSwitch<int>(Chan) 5921 .Case(".x", 0) 5922 .Case(".y", 1) 5923 .Case(".z", 2) 5924 .Case(".w", 3) 5925 .Default(-1); 5926 if (AttrChan == -1) 5927 return MatchOperand_ParseFail; 5928 5929 Str = Str.drop_back(2).drop_front(4); 5930 5931 uint8_t Attr; 5932 if (Str.getAsInteger(10, Attr)) 5933 return MatchOperand_ParseFail; 5934 5935 SMLoc S = Parser.getTok().getLoc(); 5936 Parser.Lex(); 5937 if (Attr > 63) { 5938 Error(S, "out of bounds attr"); 5939 return MatchOperand_ParseFail; 5940 } 5941 5942 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5943 5944 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5945 AMDGPUOperand::ImmTyInterpAttr)); 5946 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5947 AMDGPUOperand::ImmTyAttrChan)); 5948 return MatchOperand_Success; 5949 } 5950 5951 //===----------------------------------------------------------------------===// 5952 // exp 5953 //===----------------------------------------------------------------------===// 5954 5955 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5956 uint8_t &Val) { 5957 if (Str == "null") { 5958 Val = Exp::ET_NULL; 5959 return MatchOperand_Success; 5960 } 5961 5962 if (Str.startswith("mrt")) { 5963 Str = Str.drop_front(3); 5964 if (Str == "z") { // == mrtz 5965 Val = Exp::ET_MRTZ; 5966 return MatchOperand_Success; 5967 } 5968 5969 if (Str.getAsInteger(10, Val)) 5970 return MatchOperand_ParseFail; 5971 5972 if (Val > Exp::ET_MRT7) 5973 return MatchOperand_ParseFail; 5974 5975 return MatchOperand_Success; 5976 } 5977 5978 if (Str.startswith("pos")) { 5979 Str = Str.drop_front(3); 5980 if (Str.getAsInteger(10, Val)) 5981 return MatchOperand_ParseFail; 5982 5983 if (Val > (isGFX10Plus() ? 4 : 3)) 5984 return MatchOperand_ParseFail; 5985 5986 Val += Exp::ET_POS0; 5987 return MatchOperand_Success; 5988 } 5989 5990 if (isGFX10Plus() && Str == "prim") { 5991 Val = Exp::ET_PRIM; 5992 return MatchOperand_Success; 5993 } 5994 5995 if (Str.startswith("param")) { 5996 Str = Str.drop_front(5); 5997 if (Str.getAsInteger(10, Val)) 5998 return MatchOperand_ParseFail; 5999 6000 if (Val >= 32) 6001 return MatchOperand_ParseFail; 6002 6003 Val += Exp::ET_PARAM0; 6004 return MatchOperand_Success; 6005 } 6006 6007 return MatchOperand_ParseFail; 6008 } 6009 6010 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6011 if (!isToken(AsmToken::Identifier)) 6012 return MatchOperand_NoMatch; 6013 6014 SMLoc S = getLoc(); 6015 6016 uint8_t Val; 6017 auto Res = parseExpTgtImpl(getTokenStr(), Val); 6018 if (Res != MatchOperand_Success) { 6019 Error(S, "invalid exp target"); 6020 return Res; 6021 } 6022 6023 Parser.Lex(); 6024 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 6025 AMDGPUOperand::ImmTyExpTgt)); 6026 return MatchOperand_Success; 6027 } 6028 6029 //===----------------------------------------------------------------------===// 6030 // parser helpers 6031 //===----------------------------------------------------------------------===// 6032 6033 bool 6034 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6035 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6036 } 6037 6038 bool 6039 AMDGPUAsmParser::isId(const StringRef Id) const { 6040 return isId(getToken(), Id); 6041 } 6042 6043 bool 6044 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6045 return getTokenKind() == Kind; 6046 } 6047 6048 bool 6049 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6050 if (isId(Id)) { 6051 lex(); 6052 return true; 6053 } 6054 return false; 6055 } 6056 6057 bool 6058 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6059 if (isId(Id) && peekToken().is(Kind)) { 6060 lex(); 6061 lex(); 6062 return true; 6063 } 6064 return false; 6065 } 6066 6067 bool 6068 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6069 if (isToken(Kind)) { 6070 lex(); 6071 return true; 6072 } 6073 return false; 6074 } 6075 6076 bool 6077 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6078 const StringRef ErrMsg) { 6079 if (!trySkipToken(Kind)) { 6080 Error(getLoc(), ErrMsg); 6081 return false; 6082 } 6083 return true; 6084 } 6085 6086 bool 6087 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6088 SMLoc S = getLoc(); 6089 6090 const MCExpr *Expr; 6091 if (Parser.parseExpression(Expr)) 6092 return false; 6093 6094 if (Expr->evaluateAsAbsolute(Imm)) 6095 return true; 6096 6097 if (Expected.empty()) { 6098 Error(S, "expected absolute expression"); 6099 } else { 6100 Error(S, Twine("expected ", Expected) + 6101 Twine(" or an absolute expression")); 6102 } 6103 return false; 6104 } 6105 6106 bool 6107 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6108 SMLoc S = getLoc(); 6109 6110 const MCExpr *Expr; 6111 if (Parser.parseExpression(Expr)) 6112 return false; 6113 6114 int64_t IntVal; 6115 if (Expr->evaluateAsAbsolute(IntVal)) { 6116 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6117 } else { 6118 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6119 } 6120 return true; 6121 } 6122 6123 bool 6124 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6125 if (isToken(AsmToken::String)) { 6126 Val = getToken().getStringContents(); 6127 lex(); 6128 return true; 6129 } else { 6130 Error(getLoc(), ErrMsg); 6131 return false; 6132 } 6133 } 6134 6135 bool 6136 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6137 if (isToken(AsmToken::Identifier)) { 6138 Val = getTokenStr(); 6139 lex(); 6140 return true; 6141 } else { 6142 Error(getLoc(), ErrMsg); 6143 return false; 6144 } 6145 } 6146 6147 AsmToken 6148 AMDGPUAsmParser::getToken() const { 6149 return Parser.getTok(); 6150 } 6151 6152 AsmToken 6153 AMDGPUAsmParser::peekToken() { 6154 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6155 } 6156 6157 void 6158 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6159 auto TokCount = getLexer().peekTokens(Tokens); 6160 6161 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6162 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6163 } 6164 6165 AsmToken::TokenKind 6166 AMDGPUAsmParser::getTokenKind() const { 6167 return getLexer().getKind(); 6168 } 6169 6170 SMLoc 6171 AMDGPUAsmParser::getLoc() const { 6172 return getToken().getLoc(); 6173 } 6174 6175 StringRef 6176 AMDGPUAsmParser::getTokenStr() const { 6177 return getToken().getString(); 6178 } 6179 6180 void 6181 AMDGPUAsmParser::lex() { 6182 Parser.Lex(); 6183 } 6184 6185 SMLoc 6186 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6187 const OperandVector &Operands) const { 6188 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6189 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6190 if (Test(Op)) 6191 return Op.getStartLoc(); 6192 } 6193 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6194 } 6195 6196 SMLoc 6197 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6198 const OperandVector &Operands) const { 6199 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6200 return getOperandLoc(Test, Operands); 6201 } 6202 6203 SMLoc 6204 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6205 const OperandVector &Operands) const { 6206 auto Test = [=](const AMDGPUOperand& Op) { 6207 return Op.isRegKind() && Op.getReg() == Reg; 6208 }; 6209 return getOperandLoc(Test, Operands); 6210 } 6211 6212 SMLoc 6213 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6214 auto Test = [](const AMDGPUOperand& Op) { 6215 return Op.IsImmKindLiteral() || Op.isExpr(); 6216 }; 6217 return getOperandLoc(Test, Operands); 6218 } 6219 6220 SMLoc 6221 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6222 auto Test = [](const AMDGPUOperand& Op) { 6223 return Op.isImmKindConst(); 6224 }; 6225 return getOperandLoc(Test, Operands); 6226 } 6227 6228 //===----------------------------------------------------------------------===// 6229 // swizzle 6230 //===----------------------------------------------------------------------===// 6231 6232 LLVM_READNONE 6233 static unsigned 6234 encodeBitmaskPerm(const unsigned AndMask, 6235 const unsigned OrMask, 6236 const unsigned XorMask) { 6237 using namespace llvm::AMDGPU::Swizzle; 6238 6239 return BITMASK_PERM_ENC | 6240 (AndMask << BITMASK_AND_SHIFT) | 6241 (OrMask << BITMASK_OR_SHIFT) | 6242 (XorMask << BITMASK_XOR_SHIFT); 6243 } 6244 6245 bool 6246 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6247 const unsigned MinVal, 6248 const unsigned MaxVal, 6249 const StringRef ErrMsg, 6250 SMLoc &Loc) { 6251 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6252 return false; 6253 } 6254 Loc = Parser.getTok().getLoc(); 6255 if (!parseExpr(Op)) { 6256 return false; 6257 } 6258 if (Op < MinVal || Op > MaxVal) { 6259 Error(Loc, ErrMsg); 6260 return false; 6261 } 6262 6263 return true; 6264 } 6265 6266 bool 6267 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6268 const unsigned MinVal, 6269 const unsigned MaxVal, 6270 const StringRef ErrMsg) { 6271 SMLoc Loc; 6272 for (unsigned i = 0; i < OpNum; ++i) { 6273 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6274 return false; 6275 } 6276 6277 return true; 6278 } 6279 6280 bool 6281 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6282 using namespace llvm::AMDGPU::Swizzle; 6283 6284 int64_t Lane[LANE_NUM]; 6285 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6286 "expected a 2-bit lane id")) { 6287 Imm = QUAD_PERM_ENC; 6288 for (unsigned I = 0; I < LANE_NUM; ++I) { 6289 Imm |= Lane[I] << (LANE_SHIFT * I); 6290 } 6291 return true; 6292 } 6293 return false; 6294 } 6295 6296 bool 6297 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6298 using namespace llvm::AMDGPU::Swizzle; 6299 6300 SMLoc Loc; 6301 int64_t GroupSize; 6302 int64_t LaneIdx; 6303 6304 if (!parseSwizzleOperand(GroupSize, 6305 2, 32, 6306 "group size must be in the interval [2,32]", 6307 Loc)) { 6308 return false; 6309 } 6310 if (!isPowerOf2_64(GroupSize)) { 6311 Error(Loc, "group size must be a power of two"); 6312 return false; 6313 } 6314 if (parseSwizzleOperand(LaneIdx, 6315 0, GroupSize - 1, 6316 "lane id must be in the interval [0,group size - 1]", 6317 Loc)) { 6318 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6319 return true; 6320 } 6321 return false; 6322 } 6323 6324 bool 6325 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6326 using namespace llvm::AMDGPU::Swizzle; 6327 6328 SMLoc Loc; 6329 int64_t GroupSize; 6330 6331 if (!parseSwizzleOperand(GroupSize, 6332 2, 32, 6333 "group size must be in the interval [2,32]", 6334 Loc)) { 6335 return false; 6336 } 6337 if (!isPowerOf2_64(GroupSize)) { 6338 Error(Loc, "group size must be a power of two"); 6339 return false; 6340 } 6341 6342 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6343 return true; 6344 } 6345 6346 bool 6347 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6348 using namespace llvm::AMDGPU::Swizzle; 6349 6350 SMLoc Loc; 6351 int64_t GroupSize; 6352 6353 if (!parseSwizzleOperand(GroupSize, 6354 1, 16, 6355 "group size must be in the interval [1,16]", 6356 Loc)) { 6357 return false; 6358 } 6359 if (!isPowerOf2_64(GroupSize)) { 6360 Error(Loc, "group size must be a power of two"); 6361 return false; 6362 } 6363 6364 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6365 return true; 6366 } 6367 6368 bool 6369 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6370 using namespace llvm::AMDGPU::Swizzle; 6371 6372 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6373 return false; 6374 } 6375 6376 StringRef Ctl; 6377 SMLoc StrLoc = Parser.getTok().getLoc(); 6378 if (!parseString(Ctl)) { 6379 return false; 6380 } 6381 if (Ctl.size() != BITMASK_WIDTH) { 6382 Error(StrLoc, "expected a 5-character mask"); 6383 return false; 6384 } 6385 6386 unsigned AndMask = 0; 6387 unsigned OrMask = 0; 6388 unsigned XorMask = 0; 6389 6390 for (size_t i = 0; i < Ctl.size(); ++i) { 6391 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6392 switch(Ctl[i]) { 6393 default: 6394 Error(StrLoc, "invalid mask"); 6395 return false; 6396 case '0': 6397 break; 6398 case '1': 6399 OrMask |= Mask; 6400 break; 6401 case 'p': 6402 AndMask |= Mask; 6403 break; 6404 case 'i': 6405 AndMask |= Mask; 6406 XorMask |= Mask; 6407 break; 6408 } 6409 } 6410 6411 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6412 return true; 6413 } 6414 6415 bool 6416 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6417 6418 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6419 6420 if (!parseExpr(Imm, "a swizzle macro")) { 6421 return false; 6422 } 6423 if (!isUInt<16>(Imm)) { 6424 Error(OffsetLoc, "expected a 16-bit offset"); 6425 return false; 6426 } 6427 return true; 6428 } 6429 6430 bool 6431 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6432 using namespace llvm::AMDGPU::Swizzle; 6433 6434 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6435 6436 SMLoc ModeLoc = Parser.getTok().getLoc(); 6437 bool Ok = false; 6438 6439 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6440 Ok = parseSwizzleQuadPerm(Imm); 6441 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6442 Ok = parseSwizzleBitmaskPerm(Imm); 6443 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6444 Ok = parseSwizzleBroadcast(Imm); 6445 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6446 Ok = parseSwizzleSwap(Imm); 6447 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6448 Ok = parseSwizzleReverse(Imm); 6449 } else { 6450 Error(ModeLoc, "expected a swizzle mode"); 6451 } 6452 6453 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6454 } 6455 6456 return false; 6457 } 6458 6459 OperandMatchResultTy 6460 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6461 SMLoc S = Parser.getTok().getLoc(); 6462 int64_t Imm = 0; 6463 6464 if (trySkipId("offset")) { 6465 6466 bool Ok = false; 6467 if (skipToken(AsmToken::Colon, "expected a colon")) { 6468 if (trySkipId("swizzle")) { 6469 Ok = parseSwizzleMacro(Imm); 6470 } else { 6471 Ok = parseSwizzleOffset(Imm); 6472 } 6473 } 6474 6475 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6476 6477 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6478 } else { 6479 // Swizzle "offset" operand is optional. 6480 // If it is omitted, try parsing other optional operands. 6481 return parseOptionalOpr(Operands); 6482 } 6483 } 6484 6485 bool 6486 AMDGPUOperand::isSwizzle() const { 6487 return isImmTy(ImmTySwizzle); 6488 } 6489 6490 //===----------------------------------------------------------------------===// 6491 // VGPR Index Mode 6492 //===----------------------------------------------------------------------===// 6493 6494 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6495 6496 using namespace llvm::AMDGPU::VGPRIndexMode; 6497 6498 if (trySkipToken(AsmToken::RParen)) { 6499 return OFF; 6500 } 6501 6502 int64_t Imm = 0; 6503 6504 while (true) { 6505 unsigned Mode = 0; 6506 SMLoc S = Parser.getTok().getLoc(); 6507 6508 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6509 if (trySkipId(IdSymbolic[ModeId])) { 6510 Mode = 1 << ModeId; 6511 break; 6512 } 6513 } 6514 6515 if (Mode == 0) { 6516 Error(S, (Imm == 0)? 6517 "expected a VGPR index mode or a closing parenthesis" : 6518 "expected a VGPR index mode"); 6519 return UNDEF; 6520 } 6521 6522 if (Imm & Mode) { 6523 Error(S, "duplicate VGPR index mode"); 6524 return UNDEF; 6525 } 6526 Imm |= Mode; 6527 6528 if (trySkipToken(AsmToken::RParen)) 6529 break; 6530 if (!skipToken(AsmToken::Comma, 6531 "expected a comma or a closing parenthesis")) 6532 return UNDEF; 6533 } 6534 6535 return Imm; 6536 } 6537 6538 OperandMatchResultTy 6539 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6540 6541 using namespace llvm::AMDGPU::VGPRIndexMode; 6542 6543 int64_t Imm = 0; 6544 SMLoc S = Parser.getTok().getLoc(); 6545 6546 if (getLexer().getKind() == AsmToken::Identifier && 6547 Parser.getTok().getString() == "gpr_idx" && 6548 getLexer().peekTok().is(AsmToken::LParen)) { 6549 6550 Parser.Lex(); 6551 Parser.Lex(); 6552 6553 Imm = parseGPRIdxMacro(); 6554 if (Imm == UNDEF) 6555 return MatchOperand_ParseFail; 6556 6557 } else { 6558 if (getParser().parseAbsoluteExpression(Imm)) 6559 return MatchOperand_ParseFail; 6560 if (Imm < 0 || !isUInt<4>(Imm)) { 6561 Error(S, "invalid immediate: only 4-bit values are legal"); 6562 return MatchOperand_ParseFail; 6563 } 6564 } 6565 6566 Operands.push_back( 6567 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6568 return MatchOperand_Success; 6569 } 6570 6571 bool AMDGPUOperand::isGPRIdxMode() const { 6572 return isImmTy(ImmTyGprIdxMode); 6573 } 6574 6575 //===----------------------------------------------------------------------===// 6576 // sopp branch targets 6577 //===----------------------------------------------------------------------===// 6578 6579 OperandMatchResultTy 6580 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6581 6582 // Make sure we are not parsing something 6583 // that looks like a label or an expression but is not. 6584 // This will improve error messages. 6585 if (isRegister() || isModifier()) 6586 return MatchOperand_NoMatch; 6587 6588 if (!parseExpr(Operands)) 6589 return MatchOperand_ParseFail; 6590 6591 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6592 assert(Opr.isImm() || Opr.isExpr()); 6593 SMLoc Loc = Opr.getStartLoc(); 6594 6595 // Currently we do not support arbitrary expressions as branch targets. 6596 // Only labels and absolute expressions are accepted. 6597 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6598 Error(Loc, "expected an absolute expression or a label"); 6599 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6600 Error(Loc, "expected a 16-bit signed jump offset"); 6601 } 6602 6603 return MatchOperand_Success; 6604 } 6605 6606 //===----------------------------------------------------------------------===// 6607 // Boolean holding registers 6608 //===----------------------------------------------------------------------===// 6609 6610 OperandMatchResultTy 6611 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6612 return parseReg(Operands); 6613 } 6614 6615 //===----------------------------------------------------------------------===// 6616 // mubuf 6617 //===----------------------------------------------------------------------===// 6618 6619 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6620 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6621 } 6622 6623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6624 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6625 } 6626 6627 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6628 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6629 } 6630 6631 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6632 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6633 } 6634 6635 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6636 const OperandVector &Operands, 6637 bool IsAtomic, 6638 bool IsAtomicReturn, 6639 bool IsLds) { 6640 bool IsLdsOpcode = IsLds; 6641 bool HasLdsModifier = false; 6642 OptionalImmIndexMap OptionalIdx; 6643 assert(IsAtomicReturn ? IsAtomic : true); 6644 unsigned FirstOperandIdx = 1; 6645 6646 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6647 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6648 6649 // Add the register arguments 6650 if (Op.isReg()) { 6651 Op.addRegOperands(Inst, 1); 6652 // Insert a tied src for atomic return dst. 6653 // This cannot be postponed as subsequent calls to 6654 // addImmOperands rely on correct number of MC operands. 6655 if (IsAtomicReturn && i == FirstOperandIdx) 6656 Op.addRegOperands(Inst, 1); 6657 continue; 6658 } 6659 6660 // Handle the case where soffset is an immediate 6661 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6662 Op.addImmOperands(Inst, 1); 6663 continue; 6664 } 6665 6666 HasLdsModifier |= Op.isLDS(); 6667 6668 // Handle tokens like 'offen' which are sometimes hard-coded into the 6669 // asm string. There are no MCInst operands for these. 6670 if (Op.isToken()) { 6671 continue; 6672 } 6673 assert(Op.isImm()); 6674 6675 // Handle optional arguments 6676 OptionalIdx[Op.getImmTy()] = i; 6677 } 6678 6679 // This is a workaround for an llvm quirk which may result in an 6680 // incorrect instruction selection. Lds and non-lds versions of 6681 // MUBUF instructions are identical except that lds versions 6682 // have mandatory 'lds' modifier. However this modifier follows 6683 // optional modifiers and llvm asm matcher regards this 'lds' 6684 // modifier as an optional one. As a result, an lds version 6685 // of opcode may be selected even if it has no 'lds' modifier. 6686 if (IsLdsOpcode && !HasLdsModifier) { 6687 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6688 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6689 Inst.setOpcode(NoLdsOpcode); 6690 IsLdsOpcode = false; 6691 } 6692 } 6693 6694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6695 if (!IsAtomic || IsAtomicReturn) { 6696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6697 IsAtomicReturn ? -1 : 0); 6698 } 6699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6700 6701 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6703 } 6704 6705 if (isGFX10Plus()) 6706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6707 } 6708 6709 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6710 OptionalImmIndexMap OptionalIdx; 6711 6712 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6713 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6714 6715 // Add the register arguments 6716 if (Op.isReg()) { 6717 Op.addRegOperands(Inst, 1); 6718 continue; 6719 } 6720 6721 // Handle the case where soffset is an immediate 6722 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6723 Op.addImmOperands(Inst, 1); 6724 continue; 6725 } 6726 6727 // Handle tokens like 'offen' which are sometimes hard-coded into the 6728 // asm string. There are no MCInst operands for these. 6729 if (Op.isToken()) { 6730 continue; 6731 } 6732 assert(Op.isImm()); 6733 6734 // Handle optional arguments 6735 OptionalIdx[Op.getImmTy()] = i; 6736 } 6737 6738 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6739 AMDGPUOperand::ImmTyOffset); 6740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6741 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6744 6745 if (isGFX10Plus()) 6746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6747 } 6748 6749 //===----------------------------------------------------------------------===// 6750 // mimg 6751 //===----------------------------------------------------------------------===// 6752 6753 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6754 bool IsAtomic) { 6755 unsigned I = 1; 6756 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6757 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6758 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6759 } 6760 6761 if (IsAtomic) { 6762 // Add src, same as dst 6763 assert(Desc.getNumDefs() == 1); 6764 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6765 } 6766 6767 OptionalImmIndexMap OptionalIdx; 6768 6769 for (unsigned E = Operands.size(); I != E; ++I) { 6770 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6771 6772 // Add the register arguments 6773 if (Op.isReg()) { 6774 Op.addRegOperands(Inst, 1); 6775 } else if (Op.isImmModifier()) { 6776 OptionalIdx[Op.getImmTy()] = I; 6777 } else if (!Op.isToken()) { 6778 llvm_unreachable("unexpected operand type"); 6779 } 6780 } 6781 6782 bool IsGFX10Plus = isGFX10Plus(); 6783 6784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6785 if (IsGFX10Plus) 6786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6788 if (IsGFX10Plus) 6789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6793 if (IsGFX10Plus) 6794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6797 if (!IsGFX10Plus) 6798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6799 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6800 } 6801 6802 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6803 cvtMIMG(Inst, Operands, true); 6804 } 6805 6806 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6807 const OperandVector &Operands) { 6808 for (unsigned I = 1; I < Operands.size(); ++I) { 6809 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6810 if (Operand.isReg()) 6811 Operand.addRegOperands(Inst, 1); 6812 } 6813 6814 Inst.addOperand(MCOperand::createImm(1)); // a16 6815 } 6816 6817 //===----------------------------------------------------------------------===// 6818 // smrd 6819 //===----------------------------------------------------------------------===// 6820 6821 bool AMDGPUOperand::isSMRDOffset8() const { 6822 return isImm() && isUInt<8>(getImm()); 6823 } 6824 6825 bool AMDGPUOperand::isSMEMOffset() const { 6826 return isImm(); // Offset range is checked later by validator. 6827 } 6828 6829 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6830 // 32-bit literals are only supported on CI and we only want to use them 6831 // when the offset is > 8-bits. 6832 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6833 } 6834 6835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6836 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6837 } 6838 6839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6840 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6841 } 6842 6843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6844 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6845 } 6846 6847 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6848 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6849 } 6850 6851 //===----------------------------------------------------------------------===// 6852 // vop3 6853 //===----------------------------------------------------------------------===// 6854 6855 static bool ConvertOmodMul(int64_t &Mul) { 6856 if (Mul != 1 && Mul != 2 && Mul != 4) 6857 return false; 6858 6859 Mul >>= 1; 6860 return true; 6861 } 6862 6863 static bool ConvertOmodDiv(int64_t &Div) { 6864 if (Div == 1) { 6865 Div = 0; 6866 return true; 6867 } 6868 6869 if (Div == 2) { 6870 Div = 3; 6871 return true; 6872 } 6873 6874 return false; 6875 } 6876 6877 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6878 if (BoundCtrl == 0) { 6879 BoundCtrl = 1; 6880 return true; 6881 } 6882 6883 if (BoundCtrl == -1) { 6884 BoundCtrl = 0; 6885 return true; 6886 } 6887 6888 return false; 6889 } 6890 6891 // Note: the order in this table matches the order of operands in AsmString. 6892 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6893 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6894 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6895 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6896 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6897 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6898 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6899 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6900 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6901 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6902 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6903 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6904 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6905 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6906 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6907 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6908 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6909 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6910 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6911 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6912 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6913 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6914 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6915 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6916 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6917 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6918 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6919 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6920 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6921 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6922 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6923 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6924 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6925 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6926 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6927 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6928 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6929 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6930 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6931 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6932 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6933 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6934 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6935 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6936 }; 6937 6938 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6939 6940 OperandMatchResultTy res = parseOptionalOpr(Operands); 6941 6942 // This is a hack to enable hardcoded mandatory operands which follow 6943 // optional operands. 6944 // 6945 // Current design assumes that all operands after the first optional operand 6946 // are also optional. However implementation of some instructions violates 6947 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6948 // 6949 // To alleviate this problem, we have to (implicitly) parse extra operands 6950 // to make sure autogenerated parser of custom operands never hit hardcoded 6951 // mandatory operands. 6952 6953 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6954 if (res != MatchOperand_Success || 6955 isToken(AsmToken::EndOfStatement)) 6956 break; 6957 6958 trySkipToken(AsmToken::Comma); 6959 res = parseOptionalOpr(Operands); 6960 } 6961 6962 return res; 6963 } 6964 6965 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6966 OperandMatchResultTy res; 6967 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6968 // try to parse any optional operand here 6969 if (Op.IsBit) { 6970 res = parseNamedBit(Op.Name, Operands, Op.Type); 6971 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6972 res = parseOModOperand(Operands); 6973 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6974 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6975 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6976 res = parseSDWASel(Operands, Op.Name, Op.Type); 6977 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6978 res = parseSDWADstUnused(Operands); 6979 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6980 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6981 Op.Type == AMDGPUOperand::ImmTyNegLo || 6982 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6983 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6984 Op.ConvertResult); 6985 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6986 res = parseDim(Operands); 6987 } else { 6988 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6989 } 6990 if (res != MatchOperand_NoMatch) { 6991 return res; 6992 } 6993 } 6994 return MatchOperand_NoMatch; 6995 } 6996 6997 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6998 StringRef Name = Parser.getTok().getString(); 6999 if (Name == "mul") { 7000 return parseIntWithPrefix("mul", Operands, 7001 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7002 } 7003 7004 if (Name == "div") { 7005 return parseIntWithPrefix("div", Operands, 7006 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7007 } 7008 7009 return MatchOperand_NoMatch; 7010 } 7011 7012 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7013 cvtVOP3P(Inst, Operands); 7014 7015 int Opc = Inst.getOpcode(); 7016 7017 int SrcNum; 7018 const int Ops[] = { AMDGPU::OpName::src0, 7019 AMDGPU::OpName::src1, 7020 AMDGPU::OpName::src2 }; 7021 for (SrcNum = 0; 7022 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7023 ++SrcNum); 7024 assert(SrcNum > 0); 7025 7026 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7027 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7028 7029 if ((OpSel & (1 << SrcNum)) != 0) { 7030 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7031 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7032 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7033 } 7034 } 7035 7036 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7037 // 1. This operand is input modifiers 7038 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7039 // 2. This is not last operand 7040 && Desc.NumOperands > (OpNum + 1) 7041 // 3. Next operand is register class 7042 && Desc.OpInfo[OpNum + 1].RegClass != -1 7043 // 4. Next register is not tied to any other operand 7044 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7045 } 7046 7047 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7048 { 7049 OptionalImmIndexMap OptionalIdx; 7050 unsigned Opc = Inst.getOpcode(); 7051 7052 unsigned I = 1; 7053 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7054 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7055 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7056 } 7057 7058 for (unsigned E = Operands.size(); I != E; ++I) { 7059 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7060 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7061 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7062 } else if (Op.isInterpSlot() || 7063 Op.isInterpAttr() || 7064 Op.isAttrChan()) { 7065 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7066 } else if (Op.isImmModifier()) { 7067 OptionalIdx[Op.getImmTy()] = I; 7068 } else { 7069 llvm_unreachable("unhandled operand type"); 7070 } 7071 } 7072 7073 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7075 } 7076 7077 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7079 } 7080 7081 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7083 } 7084 } 7085 7086 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7087 OptionalImmIndexMap &OptionalIdx) { 7088 unsigned Opc = Inst.getOpcode(); 7089 7090 unsigned I = 1; 7091 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7092 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7093 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7094 } 7095 7096 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7097 // This instruction has src modifiers 7098 for (unsigned E = Operands.size(); I != E; ++I) { 7099 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7100 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7101 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7102 } else if (Op.isImmModifier()) { 7103 OptionalIdx[Op.getImmTy()] = I; 7104 } else if (Op.isRegOrImm()) { 7105 Op.addRegOrImmOperands(Inst, 1); 7106 } else { 7107 llvm_unreachable("unhandled operand type"); 7108 } 7109 } 7110 } else { 7111 // No src modifiers 7112 for (unsigned E = Operands.size(); I != E; ++I) { 7113 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7114 if (Op.isMod()) { 7115 OptionalIdx[Op.getImmTy()] = I; 7116 } else { 7117 Op.addRegOrImmOperands(Inst, 1); 7118 } 7119 } 7120 } 7121 7122 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7124 } 7125 7126 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7128 } 7129 7130 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7131 // it has src2 register operand that is tied to dst operand 7132 // we don't allow modifiers for this operand in assembler so src2_modifiers 7133 // should be 0. 7134 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7135 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7136 Opc == AMDGPU::V_MAC_F32_e64_vi || 7137 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7138 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7139 Opc == AMDGPU::V_MAC_F16_e64_vi || 7140 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7141 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7142 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7143 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7144 auto it = Inst.begin(); 7145 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7146 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7147 ++it; 7148 // Copy the operand to ensure it's not invalidated when Inst grows. 7149 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7150 } 7151 } 7152 7153 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7154 OptionalImmIndexMap OptionalIdx; 7155 cvtVOP3(Inst, Operands, OptionalIdx); 7156 } 7157 7158 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7159 const OperandVector &Operands) { 7160 OptionalImmIndexMap OptIdx; 7161 const int Opc = Inst.getOpcode(); 7162 const MCInstrDesc &Desc = MII.get(Opc); 7163 7164 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7165 7166 cvtVOP3(Inst, Operands, OptIdx); 7167 7168 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7169 assert(!IsPacked); 7170 Inst.addOperand(Inst.getOperand(0)); 7171 } 7172 7173 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7174 // instruction, and then figure out where to actually put the modifiers 7175 7176 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7177 7178 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7179 if (OpSelHiIdx != -1) { 7180 int DefaultVal = IsPacked ? -1 : 0; 7181 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7182 DefaultVal); 7183 } 7184 7185 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7186 if (NegLoIdx != -1) { 7187 assert(IsPacked); 7188 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7189 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7190 } 7191 7192 const int Ops[] = { AMDGPU::OpName::src0, 7193 AMDGPU::OpName::src1, 7194 AMDGPU::OpName::src2 }; 7195 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7196 AMDGPU::OpName::src1_modifiers, 7197 AMDGPU::OpName::src2_modifiers }; 7198 7199 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7200 7201 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7202 unsigned OpSelHi = 0; 7203 unsigned NegLo = 0; 7204 unsigned NegHi = 0; 7205 7206 if (OpSelHiIdx != -1) { 7207 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7208 } 7209 7210 if (NegLoIdx != -1) { 7211 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7212 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7213 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7214 } 7215 7216 for (int J = 0; J < 3; ++J) { 7217 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7218 if (OpIdx == -1) 7219 break; 7220 7221 uint32_t ModVal = 0; 7222 7223 if ((OpSel & (1 << J)) != 0) 7224 ModVal |= SISrcMods::OP_SEL_0; 7225 7226 if ((OpSelHi & (1 << J)) != 0) 7227 ModVal |= SISrcMods::OP_SEL_1; 7228 7229 if ((NegLo & (1 << J)) != 0) 7230 ModVal |= SISrcMods::NEG; 7231 7232 if ((NegHi & (1 << J)) != 0) 7233 ModVal |= SISrcMods::NEG_HI; 7234 7235 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7236 7237 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7238 } 7239 } 7240 7241 //===----------------------------------------------------------------------===// 7242 // dpp 7243 //===----------------------------------------------------------------------===// 7244 7245 bool AMDGPUOperand::isDPP8() const { 7246 return isImmTy(ImmTyDPP8); 7247 } 7248 7249 bool AMDGPUOperand::isDPPCtrl() const { 7250 using namespace AMDGPU::DPP; 7251 7252 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7253 if (result) { 7254 int64_t Imm = getImm(); 7255 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7256 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7257 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7258 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7259 (Imm == DppCtrl::WAVE_SHL1) || 7260 (Imm == DppCtrl::WAVE_ROL1) || 7261 (Imm == DppCtrl::WAVE_SHR1) || 7262 (Imm == DppCtrl::WAVE_ROR1) || 7263 (Imm == DppCtrl::ROW_MIRROR) || 7264 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7265 (Imm == DppCtrl::BCAST15) || 7266 (Imm == DppCtrl::BCAST31) || 7267 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7268 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7269 } 7270 return false; 7271 } 7272 7273 //===----------------------------------------------------------------------===// 7274 // mAI 7275 //===----------------------------------------------------------------------===// 7276 7277 bool AMDGPUOperand::isBLGP() const { 7278 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7279 } 7280 7281 bool AMDGPUOperand::isCBSZ() const { 7282 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7283 } 7284 7285 bool AMDGPUOperand::isABID() const { 7286 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7287 } 7288 7289 bool AMDGPUOperand::isS16Imm() const { 7290 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7291 } 7292 7293 bool AMDGPUOperand::isU16Imm() const { 7294 return isImm() && isUInt<16>(getImm()); 7295 } 7296 7297 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7298 if (!isGFX10Plus()) 7299 return MatchOperand_NoMatch; 7300 7301 SMLoc S = Parser.getTok().getLoc(); 7302 7303 if (getLexer().isNot(AsmToken::Identifier)) 7304 return MatchOperand_NoMatch; 7305 if (getLexer().getTok().getString() != "dim") 7306 return MatchOperand_NoMatch; 7307 7308 Parser.Lex(); 7309 if (getLexer().isNot(AsmToken::Colon)) 7310 return MatchOperand_ParseFail; 7311 7312 Parser.Lex(); 7313 7314 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7315 // integer. 7316 std::string Token; 7317 if (getLexer().is(AsmToken::Integer)) { 7318 SMLoc Loc = getLexer().getTok().getEndLoc(); 7319 Token = std::string(getLexer().getTok().getString()); 7320 Parser.Lex(); 7321 if (getLexer().getTok().getLoc() != Loc) 7322 return MatchOperand_ParseFail; 7323 } 7324 if (getLexer().isNot(AsmToken::Identifier)) 7325 return MatchOperand_ParseFail; 7326 Token += getLexer().getTok().getString(); 7327 7328 StringRef DimId = Token; 7329 if (DimId.startswith("SQ_RSRC_IMG_")) 7330 DimId = DimId.substr(12); 7331 7332 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7333 if (!DimInfo) 7334 return MatchOperand_ParseFail; 7335 7336 Parser.Lex(); 7337 7338 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7339 AMDGPUOperand::ImmTyDim)); 7340 return MatchOperand_Success; 7341 } 7342 7343 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7344 SMLoc S = Parser.getTok().getLoc(); 7345 StringRef Prefix; 7346 7347 if (getLexer().getKind() == AsmToken::Identifier) { 7348 Prefix = Parser.getTok().getString(); 7349 } else { 7350 return MatchOperand_NoMatch; 7351 } 7352 7353 if (Prefix != "dpp8") 7354 return parseDPPCtrl(Operands); 7355 if (!isGFX10Plus()) 7356 return MatchOperand_NoMatch; 7357 7358 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7359 7360 int64_t Sels[8]; 7361 7362 Parser.Lex(); 7363 if (getLexer().isNot(AsmToken::Colon)) 7364 return MatchOperand_ParseFail; 7365 7366 Parser.Lex(); 7367 if (getLexer().isNot(AsmToken::LBrac)) 7368 return MatchOperand_ParseFail; 7369 7370 Parser.Lex(); 7371 if (getParser().parseAbsoluteExpression(Sels[0])) 7372 return MatchOperand_ParseFail; 7373 if (0 > Sels[0] || 7 < Sels[0]) 7374 return MatchOperand_ParseFail; 7375 7376 for (size_t i = 1; i < 8; ++i) { 7377 if (getLexer().isNot(AsmToken::Comma)) 7378 return MatchOperand_ParseFail; 7379 7380 Parser.Lex(); 7381 if (getParser().parseAbsoluteExpression(Sels[i])) 7382 return MatchOperand_ParseFail; 7383 if (0 > Sels[i] || 7 < Sels[i]) 7384 return MatchOperand_ParseFail; 7385 } 7386 7387 if (getLexer().isNot(AsmToken::RBrac)) 7388 return MatchOperand_ParseFail; 7389 Parser.Lex(); 7390 7391 unsigned DPP8 = 0; 7392 for (size_t i = 0; i < 8; ++i) 7393 DPP8 |= (Sels[i] << (i * 3)); 7394 7395 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7396 return MatchOperand_Success; 7397 } 7398 7399 OperandMatchResultTy 7400 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7401 using namespace AMDGPU::DPP; 7402 7403 SMLoc S = Parser.getTok().getLoc(); 7404 StringRef Prefix; 7405 int64_t Int; 7406 7407 if (getLexer().getKind() == AsmToken::Identifier) { 7408 Prefix = Parser.getTok().getString(); 7409 } else { 7410 return MatchOperand_NoMatch; 7411 } 7412 7413 if (Prefix == "row_mirror") { 7414 Int = DppCtrl::ROW_MIRROR; 7415 Parser.Lex(); 7416 } else if (Prefix == "row_half_mirror") { 7417 Int = DppCtrl::ROW_HALF_MIRROR; 7418 Parser.Lex(); 7419 } else { 7420 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7421 if (Prefix != "quad_perm" 7422 && Prefix != "row_shl" 7423 && Prefix != "row_shr" 7424 && Prefix != "row_ror" 7425 && Prefix != "wave_shl" 7426 && Prefix != "wave_rol" 7427 && Prefix != "wave_shr" 7428 && Prefix != "wave_ror" 7429 && Prefix != "row_bcast" 7430 && Prefix != "row_share" 7431 && Prefix != "row_xmask") { 7432 return MatchOperand_NoMatch; 7433 } 7434 7435 if (!isGFX10Plus() && (Prefix == "row_share" || Prefix == "row_xmask")) 7436 return MatchOperand_NoMatch; 7437 7438 if (!isVI() && !isGFX9() && 7439 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7440 Prefix == "wave_rol" || Prefix == "wave_ror" || 7441 Prefix == "row_bcast")) 7442 return MatchOperand_NoMatch; 7443 7444 Parser.Lex(); 7445 if (getLexer().isNot(AsmToken::Colon)) 7446 return MatchOperand_ParseFail; 7447 7448 if (Prefix == "quad_perm") { 7449 // quad_perm:[%d,%d,%d,%d] 7450 Parser.Lex(); 7451 if (getLexer().isNot(AsmToken::LBrac)) 7452 return MatchOperand_ParseFail; 7453 Parser.Lex(); 7454 7455 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7456 return MatchOperand_ParseFail; 7457 7458 for (int i = 0; i < 3; ++i) { 7459 if (getLexer().isNot(AsmToken::Comma)) 7460 return MatchOperand_ParseFail; 7461 Parser.Lex(); 7462 7463 int64_t Temp; 7464 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7465 return MatchOperand_ParseFail; 7466 const int shift = i*2 + 2; 7467 Int += (Temp << shift); 7468 } 7469 7470 if (getLexer().isNot(AsmToken::RBrac)) 7471 return MatchOperand_ParseFail; 7472 Parser.Lex(); 7473 } else { 7474 // sel:%d 7475 Parser.Lex(); 7476 if (getParser().parseAbsoluteExpression(Int)) 7477 return MatchOperand_ParseFail; 7478 7479 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7480 Int |= DppCtrl::ROW_SHL0; 7481 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7482 Int |= DppCtrl::ROW_SHR0; 7483 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7484 Int |= DppCtrl::ROW_ROR0; 7485 } else if (Prefix == "wave_shl" && 1 == Int) { 7486 Int = DppCtrl::WAVE_SHL1; 7487 } else if (Prefix == "wave_rol" && 1 == Int) { 7488 Int = DppCtrl::WAVE_ROL1; 7489 } else if (Prefix == "wave_shr" && 1 == Int) { 7490 Int = DppCtrl::WAVE_SHR1; 7491 } else if (Prefix == "wave_ror" && 1 == Int) { 7492 Int = DppCtrl::WAVE_ROR1; 7493 } else if (Prefix == "row_bcast") { 7494 if (Int == 15) { 7495 Int = DppCtrl::BCAST15; 7496 } else if (Int == 31) { 7497 Int = DppCtrl::BCAST31; 7498 } else { 7499 return MatchOperand_ParseFail; 7500 } 7501 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7502 Int |= DppCtrl::ROW_SHARE_FIRST; 7503 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7504 Int |= DppCtrl::ROW_XMASK_FIRST; 7505 } else { 7506 return MatchOperand_ParseFail; 7507 } 7508 } 7509 } 7510 7511 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7512 return MatchOperand_Success; 7513 } 7514 7515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7516 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7517 } 7518 7519 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7520 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7521 } 7522 7523 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7524 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7525 } 7526 7527 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7528 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7529 } 7530 7531 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7532 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7533 } 7534 7535 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7536 OptionalImmIndexMap OptionalIdx; 7537 7538 unsigned I = 1; 7539 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7540 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7541 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7542 } 7543 7544 int Fi = 0; 7545 for (unsigned E = Operands.size(); I != E; ++I) { 7546 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7547 MCOI::TIED_TO); 7548 if (TiedTo != -1) { 7549 assert((unsigned)TiedTo < Inst.getNumOperands()); 7550 // handle tied old or src2 for MAC instructions 7551 Inst.addOperand(Inst.getOperand(TiedTo)); 7552 } 7553 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7554 // Add the register arguments 7555 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7556 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7557 // Skip it. 7558 continue; 7559 } 7560 7561 if (IsDPP8) { 7562 if (Op.isDPP8()) { 7563 Op.addImmOperands(Inst, 1); 7564 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7565 Op.addRegWithFPInputModsOperands(Inst, 2); 7566 } else if (Op.isFI()) { 7567 Fi = Op.getImm(); 7568 } else if (Op.isReg()) { 7569 Op.addRegOperands(Inst, 1); 7570 } else { 7571 llvm_unreachable("Invalid operand type"); 7572 } 7573 } else { 7574 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7575 Op.addRegWithFPInputModsOperands(Inst, 2); 7576 } else if (Op.isDPPCtrl()) { 7577 Op.addImmOperands(Inst, 1); 7578 } else if (Op.isImm()) { 7579 // Handle optional arguments 7580 OptionalIdx[Op.getImmTy()] = I; 7581 } else { 7582 llvm_unreachable("Invalid operand type"); 7583 } 7584 } 7585 } 7586 7587 if (IsDPP8) { 7588 using namespace llvm::AMDGPU::DPP; 7589 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7590 } else { 7591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7594 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7595 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7596 } 7597 } 7598 } 7599 7600 //===----------------------------------------------------------------------===// 7601 // sdwa 7602 //===----------------------------------------------------------------------===// 7603 7604 OperandMatchResultTy 7605 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7606 AMDGPUOperand::ImmTy Type) { 7607 using namespace llvm::AMDGPU::SDWA; 7608 7609 SMLoc S = Parser.getTok().getLoc(); 7610 StringRef Value; 7611 OperandMatchResultTy res; 7612 7613 res = parseStringWithPrefix(Prefix, Value); 7614 if (res != MatchOperand_Success) { 7615 return res; 7616 } 7617 7618 int64_t Int; 7619 Int = StringSwitch<int64_t>(Value) 7620 .Case("BYTE_0", SdwaSel::BYTE_0) 7621 .Case("BYTE_1", SdwaSel::BYTE_1) 7622 .Case("BYTE_2", SdwaSel::BYTE_2) 7623 .Case("BYTE_3", SdwaSel::BYTE_3) 7624 .Case("WORD_0", SdwaSel::WORD_0) 7625 .Case("WORD_1", SdwaSel::WORD_1) 7626 .Case("DWORD", SdwaSel::DWORD) 7627 .Default(0xffffffff); 7628 Parser.Lex(); // eat last token 7629 7630 if (Int == 0xffffffff) { 7631 return MatchOperand_ParseFail; 7632 } 7633 7634 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7635 return MatchOperand_Success; 7636 } 7637 7638 OperandMatchResultTy 7639 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7640 using namespace llvm::AMDGPU::SDWA; 7641 7642 SMLoc S = Parser.getTok().getLoc(); 7643 StringRef Value; 7644 OperandMatchResultTy res; 7645 7646 res = parseStringWithPrefix("dst_unused", Value); 7647 if (res != MatchOperand_Success) { 7648 return res; 7649 } 7650 7651 int64_t Int; 7652 Int = StringSwitch<int64_t>(Value) 7653 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7654 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7655 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7656 .Default(0xffffffff); 7657 Parser.Lex(); // eat last token 7658 7659 if (Int == 0xffffffff) { 7660 return MatchOperand_ParseFail; 7661 } 7662 7663 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7664 return MatchOperand_Success; 7665 } 7666 7667 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7668 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7669 } 7670 7671 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7672 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7673 } 7674 7675 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7676 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7677 } 7678 7679 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7680 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7681 } 7682 7683 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7684 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7685 } 7686 7687 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7688 uint64_t BasicInstType, 7689 bool SkipDstVcc, 7690 bool SkipSrcVcc) { 7691 using namespace llvm::AMDGPU::SDWA; 7692 7693 OptionalImmIndexMap OptionalIdx; 7694 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7695 bool SkippedVcc = false; 7696 7697 unsigned I = 1; 7698 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7699 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7700 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7701 } 7702 7703 for (unsigned E = Operands.size(); I != E; ++I) { 7704 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7705 if (SkipVcc && !SkippedVcc && Op.isReg() && 7706 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7707 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7708 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7709 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7710 // Skip VCC only if we didn't skip it on previous iteration. 7711 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7712 if (BasicInstType == SIInstrFlags::VOP2 && 7713 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7714 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7715 SkippedVcc = true; 7716 continue; 7717 } else if (BasicInstType == SIInstrFlags::VOPC && 7718 Inst.getNumOperands() == 0) { 7719 SkippedVcc = true; 7720 continue; 7721 } 7722 } 7723 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7724 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7725 } else if (Op.isImm()) { 7726 // Handle optional arguments 7727 OptionalIdx[Op.getImmTy()] = I; 7728 } else { 7729 llvm_unreachable("Invalid operand type"); 7730 } 7731 SkippedVcc = false; 7732 } 7733 7734 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7735 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7736 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7737 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7738 switch (BasicInstType) { 7739 case SIInstrFlags::VOP1: 7740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7741 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7743 } 7744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7745 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7747 break; 7748 7749 case SIInstrFlags::VOP2: 7750 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7751 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7752 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7753 } 7754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7755 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7758 break; 7759 7760 case SIInstrFlags::VOPC: 7761 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7765 break; 7766 7767 default: 7768 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7769 } 7770 } 7771 7772 // special case v_mac_{f16, f32}: 7773 // it has src2 register operand that is tied to dst operand 7774 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7775 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7776 auto it = Inst.begin(); 7777 std::advance( 7778 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7779 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7780 } 7781 } 7782 7783 //===----------------------------------------------------------------------===// 7784 // mAI 7785 //===----------------------------------------------------------------------===// 7786 7787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7788 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7789 } 7790 7791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7792 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7793 } 7794 7795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7796 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7797 } 7798 7799 /// Force static initialization. 7800 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7801 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7802 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7803 } 7804 7805 #define GET_REGISTER_MATCHER 7806 #define GET_MATCHER_IMPLEMENTATION 7807 #define GET_MNEMONIC_SPELL_CHECKER 7808 #define GET_MNEMONIC_CHECKER 7809 #include "AMDGPUGenAsmMatcher.inc" 7810 7811 // This fuction should be defined after auto-generated include so that we have 7812 // MatchClassKind enum defined 7813 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7814 unsigned Kind) { 7815 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7816 // But MatchInstructionImpl() expects to meet token and fails to validate 7817 // operand. This method checks if we are given immediate operand but expect to 7818 // get corresponding token. 7819 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7820 switch (Kind) { 7821 case MCK_addr64: 7822 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7823 case MCK_gds: 7824 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7825 case MCK_lds: 7826 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7827 case MCK_glc: 7828 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7829 case MCK_idxen: 7830 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7831 case MCK_offen: 7832 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7833 case MCK_SSrcB32: 7834 // When operands have expression values, they will return true for isToken, 7835 // because it is not possible to distinguish between a token and an 7836 // expression at parse time. MatchInstructionImpl() will always try to 7837 // match an operand as a token, when isToken returns true, and when the 7838 // name of the expression is not a valid token, the match will fail, 7839 // so we need to handle it here. 7840 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7841 case MCK_SSrcF32: 7842 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7843 case MCK_SoppBrTarget: 7844 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7845 case MCK_VReg32OrOff: 7846 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7847 case MCK_InterpSlot: 7848 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7849 case MCK_Attr: 7850 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7851 case MCK_AttrChan: 7852 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7853 case MCK_ImmSMEMOffset: 7854 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7855 case MCK_SReg_64: 7856 case MCK_SReg_64_XEXEC: 7857 // Null is defined as a 32-bit register but 7858 // it should also be enabled with 64-bit operands. 7859 // The following code enables it for SReg_64 operands 7860 // used as source and destination. Remaining source 7861 // operands are handled in isInlinableImm. 7862 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7863 default: 7864 return Match_InvalidOperand; 7865 } 7866 } 7867 7868 //===----------------------------------------------------------------------===// 7869 // endpgm 7870 //===----------------------------------------------------------------------===// 7871 7872 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7873 SMLoc S = Parser.getTok().getLoc(); 7874 int64_t Imm = 0; 7875 7876 if (!parseExpr(Imm)) { 7877 // The operand is optional, if not present default to 0 7878 Imm = 0; 7879 } 7880 7881 if (!isUInt<16>(Imm)) { 7882 Error(S, "expected a 16-bit value"); 7883 return MatchOperand_ParseFail; 7884 } 7885 7886 Operands.push_back( 7887 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7888 return MatchOperand_Success; 7889 } 7890 7891 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7892