1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDKernelCodeT.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 14 #include "SIDefines.h" 15 #include "SIInstrInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0; 107 Operand |= Neg ? SISrcMods::NEG : 0; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyGLC, 144 ImmTySLC, 145 ImmTyTFE, 146 ImmTyD16, 147 ImmTyClampSI, 148 ImmTyOModSI, 149 ImmTyDppCtrl, 150 ImmTyDppRowMask, 151 ImmTyDppBankMask, 152 ImmTyDppBoundCtrl, 153 ImmTySdwaDstSel, 154 ImmTySdwaSrc0Sel, 155 ImmTySdwaSrc1Sel, 156 ImmTySdwaDstUnused, 157 ImmTyDMask, 158 ImmTyUNorm, 159 ImmTyDA, 160 ImmTyR128A16, 161 ImmTyLWE, 162 ImmTyExpTgt, 163 ImmTyExpCompr, 164 ImmTyExpVM, 165 ImmTyFORMAT, 166 ImmTyHwreg, 167 ImmTyOff, 168 ImmTySendMsg, 169 ImmTyInterpSlot, 170 ImmTyInterpAttr, 171 ImmTyAttrChan, 172 ImmTyOpSel, 173 ImmTyOpSelHi, 174 ImmTyNegLo, 175 ImmTyNegHi, 176 ImmTySwizzle, 177 ImmTyHigh 178 }; 179 180 struct TokOp { 181 const char *Data; 182 unsigned Length; 183 }; 184 185 struct ImmOp { 186 int64_t Val; 187 ImmTy Type; 188 bool IsFPImm; 189 Modifiers Mods; 190 }; 191 192 struct RegOp { 193 unsigned RegNo; 194 bool IsForcedVOP3; 195 Modifiers Mods; 196 }; 197 198 union { 199 TokOp Tok; 200 ImmOp Imm; 201 RegOp Reg; 202 const MCExpr *Expr; 203 }; 204 205 bool isToken() const override { 206 if (Kind == Token) 207 return true; 208 209 if (Kind != Expression || !Expr) 210 return false; 211 212 // When parsing operands, we can't always tell if something was meant to be 213 // a token, like 'gds', or an expression that references a global variable. 214 // In this case, we assume the string is an expression, and if we need to 215 // interpret is a token, then we treat the symbol name as the token. 216 return isa<MCSymbolRefExpr>(Expr); 217 } 218 219 bool isImm() const override { 220 return Kind == Immediate; 221 } 222 223 bool isInlinableImm(MVT type) const; 224 bool isLiteralImm(MVT type) const; 225 226 bool isRegKind() const { 227 return Kind == Register; 228 } 229 230 bool isReg() const override { 231 return isRegKind() && !hasModifiers(); 232 } 233 234 bool isRegOrImmWithInputMods(MVT type) const { 235 return isRegKind() || isInlinableImm(type); 236 } 237 238 bool isRegOrImmWithInt16InputMods() const { 239 return isRegOrImmWithInputMods(MVT::i16); 240 } 241 242 bool isRegOrImmWithInt32InputMods() const { 243 return isRegOrImmWithInputMods(MVT::i32); 244 } 245 246 bool isRegOrImmWithInt64InputMods() const { 247 return isRegOrImmWithInputMods(MVT::i64); 248 } 249 250 bool isRegOrImmWithFP16InputMods() const { 251 return isRegOrImmWithInputMods(MVT::f16); 252 } 253 254 bool isRegOrImmWithFP32InputMods() const { 255 return isRegOrImmWithInputMods(MVT::f32); 256 } 257 258 bool isRegOrImmWithFP64InputMods() const { 259 return isRegOrImmWithInputMods(MVT::f64); 260 } 261 262 bool isVReg() const { 263 return isRegClass(AMDGPU::VGPR_32RegClassID) || 264 isRegClass(AMDGPU::VReg_64RegClassID) || 265 isRegClass(AMDGPU::VReg_96RegClassID) || 266 isRegClass(AMDGPU::VReg_128RegClassID) || 267 isRegClass(AMDGPU::VReg_256RegClassID) || 268 isRegClass(AMDGPU::VReg_512RegClassID); 269 } 270 271 bool isVReg32OrOff() const { 272 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 273 } 274 275 bool isSDWAOperand(MVT type) const; 276 bool isSDWAFP16Operand() const; 277 bool isSDWAFP32Operand() const; 278 bool isSDWAInt16Operand() const; 279 bool isSDWAInt32Operand() const; 280 281 bool isImmTy(ImmTy ImmT) const { 282 return isImm() && Imm.Type == ImmT; 283 } 284 285 bool isImmModifier() const { 286 return isImm() && Imm.Type != ImmTyNone; 287 } 288 289 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 290 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 291 bool isDMask() const { return isImmTy(ImmTyDMask); } 292 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 293 bool isDA() const { return isImmTy(ImmTyDA); } 294 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 295 bool isLWE() const { return isImmTy(ImmTyLWE); } 296 bool isOff() const { return isImmTy(ImmTyOff); } 297 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 298 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 299 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 300 bool isOffen() const { return isImmTy(ImmTyOffen); } 301 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 302 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 303 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 304 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 305 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 306 307 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 308 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 309 bool isGDS() const { return isImmTy(ImmTyGDS); } 310 bool isLDS() const { return isImmTy(ImmTyLDS); } 311 bool isGLC() const { return isImmTy(ImmTyGLC); } 312 bool isSLC() const { return isImmTy(ImmTySLC); } 313 bool isTFE() const { return isImmTy(ImmTyTFE); } 314 bool isD16() const { return isImmTy(ImmTyD16); } 315 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 316 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 317 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 318 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 319 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 320 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 321 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 322 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 323 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 324 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 325 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 326 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 327 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 328 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 329 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 330 bool isHigh() const { return isImmTy(ImmTyHigh); } 331 332 bool isMod() const { 333 return isClampSI() || isOModSI(); 334 } 335 336 bool isRegOrImm() const { 337 return isReg() || isImm(); 338 } 339 340 bool isRegClass(unsigned RCID) const; 341 342 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 343 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 344 } 345 346 bool isSCSrcB16() const { 347 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 348 } 349 350 bool isSCSrcV2B16() const { 351 return isSCSrcB16(); 352 } 353 354 bool isSCSrcB32() const { 355 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 356 } 357 358 bool isSCSrcB64() const { 359 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 360 } 361 362 bool isSCSrcF16() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 364 } 365 366 bool isSCSrcV2F16() const { 367 return isSCSrcF16(); 368 } 369 370 bool isSCSrcF32() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 372 } 373 374 bool isSCSrcF64() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 376 } 377 378 bool isSSrcB32() const { 379 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 380 } 381 382 bool isSSrcB16() const { 383 return isSCSrcB16() || isLiteralImm(MVT::i16); 384 } 385 386 bool isSSrcV2B16() const { 387 llvm_unreachable("cannot happen"); 388 return isSSrcB16(); 389 } 390 391 bool isSSrcB64() const { 392 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 393 // See isVSrc64(). 394 return isSCSrcB64() || isLiteralImm(MVT::i64); 395 } 396 397 bool isSSrcF32() const { 398 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 399 } 400 401 bool isSSrcF64() const { 402 return isSCSrcB64() || isLiteralImm(MVT::f64); 403 } 404 405 bool isSSrcF16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::f16); 407 } 408 409 bool isSSrcV2F16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcF16(); 412 } 413 414 bool isVCSrcB32() const { 415 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 416 } 417 418 bool isVCSrcB64() const { 419 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 420 } 421 422 bool isVCSrcB16() const { 423 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 424 } 425 426 bool isVCSrcV2B16() const { 427 return isVCSrcB16(); 428 } 429 430 bool isVCSrcF32() const { 431 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 432 } 433 434 bool isVCSrcF64() const { 435 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 436 } 437 438 bool isVCSrcF16() const { 439 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 440 } 441 442 bool isVCSrcV2F16() const { 443 return isVCSrcF16(); 444 } 445 446 bool isVSrcB32() const { 447 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isVSrcB64() const { 451 return isVCSrcF64() || isLiteralImm(MVT::i64); 452 } 453 454 bool isVSrcB16() const { 455 return isVCSrcF16() || isLiteralImm(MVT::i16); 456 } 457 458 bool isVSrcV2B16() const { 459 llvm_unreachable("cannot happen"); 460 return isVSrcB16(); 461 } 462 463 bool isVSrcF32() const { 464 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 465 } 466 467 bool isVSrcF64() const { 468 return isVCSrcF64() || isLiteralImm(MVT::f64); 469 } 470 471 bool isVSrcF16() const { 472 return isVCSrcF16() || isLiteralImm(MVT::f16); 473 } 474 475 bool isVSrcV2F16() const { 476 llvm_unreachable("cannot happen"); 477 return isVSrcF16(); 478 } 479 480 bool isKImmFP32() const { 481 return isLiteralImm(MVT::f32); 482 } 483 484 bool isKImmFP16() const { 485 return isLiteralImm(MVT::f16); 486 } 487 488 bool isMem() const override { 489 return false; 490 } 491 492 bool isExpr() const { 493 return Kind == Expression; 494 } 495 496 bool isSoppBrTarget() const { 497 return isExpr() || isImm(); 498 } 499 500 bool isSWaitCnt() const; 501 bool isHwreg() const; 502 bool isSendMsg() const; 503 bool isSwizzle() const; 504 bool isSMRDOffset8() const; 505 bool isSMRDOffset20() const; 506 bool isSMRDLiteralOffset() const; 507 bool isDPPCtrl() const; 508 bool isGPRIdxMode() const; 509 bool isS16Imm() const; 510 bool isU16Imm() const; 511 512 StringRef getExpressionAsToken() const { 513 assert(isExpr()); 514 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 515 return S->getSymbol().getName(); 516 } 517 518 StringRef getToken() const { 519 assert(isToken()); 520 521 if (Kind == Expression) 522 return getExpressionAsToken(); 523 524 return StringRef(Tok.Data, Tok.Length); 525 } 526 527 int64_t getImm() const { 528 assert(isImm()); 529 return Imm.Val; 530 } 531 532 ImmTy getImmTy() const { 533 assert(isImm()); 534 return Imm.Type; 535 } 536 537 unsigned getReg() const override { 538 return Reg.RegNo; 539 } 540 541 SMLoc getStartLoc() const override { 542 return StartLoc; 543 } 544 545 SMLoc getEndLoc() const override { 546 return EndLoc; 547 } 548 549 SMRange getLocRange() const { 550 return SMRange(StartLoc, EndLoc); 551 } 552 553 Modifiers getModifiers() const { 554 assert(isRegKind() || isImmTy(ImmTyNone)); 555 return isRegKind() ? Reg.Mods : Imm.Mods; 556 } 557 558 void setModifiers(Modifiers Mods) { 559 assert(isRegKind() || isImmTy(ImmTyNone)); 560 if (isRegKind()) 561 Reg.Mods = Mods; 562 else 563 Imm.Mods = Mods; 564 } 565 566 bool hasModifiers() const { 567 return getModifiers().hasModifiers(); 568 } 569 570 bool hasFPModifiers() const { 571 return getModifiers().hasFPModifiers(); 572 } 573 574 bool hasIntModifiers() const { 575 return getModifiers().hasIntModifiers(); 576 } 577 578 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 579 580 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 581 582 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 583 584 template <unsigned Bitwidth> 585 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 586 587 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 588 addKImmFPOperands<16>(Inst, N); 589 } 590 591 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 592 addKImmFPOperands<32>(Inst, N); 593 } 594 595 void addRegOperands(MCInst &Inst, unsigned N) const; 596 597 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 598 if (isRegKind()) 599 addRegOperands(Inst, N); 600 else if (isExpr()) 601 Inst.addOperand(MCOperand::createExpr(Expr)); 602 else 603 addImmOperands(Inst, N); 604 } 605 606 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 607 Modifiers Mods = getModifiers(); 608 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 609 if (isRegKind()) { 610 addRegOperands(Inst, N); 611 } else { 612 addImmOperands(Inst, N, false); 613 } 614 } 615 616 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 617 assert(!hasIntModifiers()); 618 addRegOrImmWithInputModsOperands(Inst, N); 619 } 620 621 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 622 assert(!hasFPModifiers()); 623 addRegOrImmWithInputModsOperands(Inst, N); 624 } 625 626 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 627 Modifiers Mods = getModifiers(); 628 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 629 assert(isRegKind()); 630 addRegOperands(Inst, N); 631 } 632 633 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 634 assert(!hasIntModifiers()); 635 addRegWithInputModsOperands(Inst, N); 636 } 637 638 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 639 assert(!hasFPModifiers()); 640 addRegWithInputModsOperands(Inst, N); 641 } 642 643 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 644 if (isImm()) 645 addImmOperands(Inst, N); 646 else { 647 assert(isExpr()); 648 Inst.addOperand(MCOperand::createExpr(Expr)); 649 } 650 } 651 652 static void printImmTy(raw_ostream& OS, ImmTy Type) { 653 switch (Type) { 654 case ImmTyNone: OS << "None"; break; 655 case ImmTyGDS: OS << "GDS"; break; 656 case ImmTyLDS: OS << "LDS"; break; 657 case ImmTyOffen: OS << "Offen"; break; 658 case ImmTyIdxen: OS << "Idxen"; break; 659 case ImmTyAddr64: OS << "Addr64"; break; 660 case ImmTyOffset: OS << "Offset"; break; 661 case ImmTyInstOffset: OS << "InstOffset"; break; 662 case ImmTyOffset0: OS << "Offset0"; break; 663 case ImmTyOffset1: OS << "Offset1"; break; 664 case ImmTyGLC: OS << "GLC"; break; 665 case ImmTySLC: OS << "SLC"; break; 666 case ImmTyTFE: OS << "TFE"; break; 667 case ImmTyD16: OS << "D16"; break; 668 case ImmTyFORMAT: OS << "FORMAT"; break; 669 case ImmTyClampSI: OS << "ClampSI"; break; 670 case ImmTyOModSI: OS << "OModSI"; break; 671 case ImmTyDppCtrl: OS << "DppCtrl"; break; 672 case ImmTyDppRowMask: OS << "DppRowMask"; break; 673 case ImmTyDppBankMask: OS << "DppBankMask"; break; 674 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 675 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 676 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 677 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 678 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 679 case ImmTyDMask: OS << "DMask"; break; 680 case ImmTyUNorm: OS << "UNorm"; break; 681 case ImmTyDA: OS << "DA"; break; 682 case ImmTyR128A16: OS << "R128A16"; break; 683 case ImmTyLWE: OS << "LWE"; break; 684 case ImmTyOff: OS << "Off"; break; 685 case ImmTyExpTgt: OS << "ExpTgt"; break; 686 case ImmTyExpCompr: OS << "ExpCompr"; break; 687 case ImmTyExpVM: OS << "ExpVM"; break; 688 case ImmTyHwreg: OS << "Hwreg"; break; 689 case ImmTySendMsg: OS << "SendMsg"; break; 690 case ImmTyInterpSlot: OS << "InterpSlot"; break; 691 case ImmTyInterpAttr: OS << "InterpAttr"; break; 692 case ImmTyAttrChan: OS << "AttrChan"; break; 693 case ImmTyOpSel: OS << "OpSel"; break; 694 case ImmTyOpSelHi: OS << "OpSelHi"; break; 695 case ImmTyNegLo: OS << "NegLo"; break; 696 case ImmTyNegHi: OS << "NegHi"; break; 697 case ImmTySwizzle: OS << "Swizzle"; break; 698 case ImmTyHigh: OS << "High"; break; 699 } 700 } 701 702 void print(raw_ostream &OS) const override { 703 switch (Kind) { 704 case Register: 705 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 706 break; 707 case Immediate: 708 OS << '<' << getImm(); 709 if (getImmTy() != ImmTyNone) { 710 OS << " type: "; printImmTy(OS, getImmTy()); 711 } 712 OS << " mods: " << Imm.Mods << '>'; 713 break; 714 case Token: 715 OS << '\'' << getToken() << '\''; 716 break; 717 case Expression: 718 OS << "<expr " << *Expr << '>'; 719 break; 720 } 721 } 722 723 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 724 int64_t Val, SMLoc Loc, 725 ImmTy Type = ImmTyNone, 726 bool IsFPImm = false) { 727 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 728 Op->Imm.Val = Val; 729 Op->Imm.IsFPImm = IsFPImm; 730 Op->Imm.Type = Type; 731 Op->Imm.Mods = Modifiers(); 732 Op->StartLoc = Loc; 733 Op->EndLoc = Loc; 734 return Op; 735 } 736 737 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 738 StringRef Str, SMLoc Loc, 739 bool HasExplicitEncodingSize = true) { 740 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 741 Res->Tok.Data = Str.data(); 742 Res->Tok.Length = Str.size(); 743 Res->StartLoc = Loc; 744 Res->EndLoc = Loc; 745 return Res; 746 } 747 748 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 749 unsigned RegNo, SMLoc S, 750 SMLoc E, 751 bool ForceVOP3) { 752 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 753 Op->Reg.RegNo = RegNo; 754 Op->Reg.Mods = Modifiers(); 755 Op->Reg.IsForcedVOP3 = ForceVOP3; 756 Op->StartLoc = S; 757 Op->EndLoc = E; 758 return Op; 759 } 760 761 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 762 const class MCExpr *Expr, SMLoc S) { 763 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 764 Op->Expr = Expr; 765 Op->StartLoc = S; 766 Op->EndLoc = S; 767 return Op; 768 } 769 }; 770 771 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 772 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 773 return OS; 774 } 775 776 //===----------------------------------------------------------------------===// 777 // AsmParser 778 //===----------------------------------------------------------------------===// 779 780 // Holds info related to the current kernel, e.g. count of SGPRs used. 781 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 782 // .amdgpu_hsa_kernel or at EOF. 783 class KernelScopeInfo { 784 int SgprIndexUnusedMin = -1; 785 int VgprIndexUnusedMin = -1; 786 MCContext *Ctx = nullptr; 787 788 void usesSgprAt(int i) { 789 if (i >= SgprIndexUnusedMin) { 790 SgprIndexUnusedMin = ++i; 791 if (Ctx) { 792 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 793 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 794 } 795 } 796 } 797 798 void usesVgprAt(int i) { 799 if (i >= VgprIndexUnusedMin) { 800 VgprIndexUnusedMin = ++i; 801 if (Ctx) { 802 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 803 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 804 } 805 } 806 } 807 808 public: 809 KernelScopeInfo() = default; 810 811 void initialize(MCContext &Context) { 812 Ctx = &Context; 813 usesSgprAt(SgprIndexUnusedMin = -1); 814 usesVgprAt(VgprIndexUnusedMin = -1); 815 } 816 817 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 818 switch (RegKind) { 819 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 820 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 821 default: break; 822 } 823 } 824 }; 825 826 class AMDGPUAsmParser : public MCTargetAsmParser { 827 MCAsmParser &Parser; 828 829 // Number of extra operands parsed after the first optional operand. 830 // This may be necessary to skip hardcoded mandatory operands. 831 static const unsigned MAX_OPR_LOOKAHEAD = 8; 832 833 unsigned ForcedEncodingSize = 0; 834 bool ForcedDPP = false; 835 bool ForcedSDWA = false; 836 KernelScopeInfo KernelScope; 837 838 /// @name Auto-generated Match Functions 839 /// { 840 841 #define GET_ASSEMBLER_HEADER 842 #include "AMDGPUGenAsmMatcher.inc" 843 844 /// } 845 846 private: 847 bool ParseAsAbsoluteExpression(uint32_t &Ret); 848 bool OutOfRangeError(SMRange Range); 849 /// Calculate VGPR/SGPR blocks required for given target, reserved 850 /// registers, and user-specified NextFreeXGPR values. 851 /// 852 /// \param Features [in] Target features, used for bug corrections. 853 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 854 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 855 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 856 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 857 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 858 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 859 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 860 /// \param VGPRBlocks [out] Result VGPR block count. 861 /// \param SGPRBlocks [out] Result SGPR block count. 862 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 863 bool FlatScrUsed, bool XNACKUsed, 864 unsigned NextFreeVGPR, SMRange VGPRRange, 865 unsigned NextFreeSGPR, SMRange SGPRRange, 866 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 867 bool ParseDirectiveAMDGCNTarget(); 868 bool ParseDirectiveAMDHSAKernel(); 869 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 870 bool ParseDirectiveHSACodeObjectVersion(); 871 bool ParseDirectiveHSACodeObjectISA(); 872 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 873 bool ParseDirectiveAMDKernelCodeT(); 874 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 875 bool ParseDirectiveAMDGPUHsaKernel(); 876 877 bool ParseDirectiveISAVersion(); 878 bool ParseDirectiveHSAMetadata(); 879 bool ParseDirectivePALMetadata(); 880 881 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 882 RegisterKind RegKind, unsigned Reg1, 883 unsigned RegNum); 884 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 885 unsigned& RegNum, unsigned& RegWidth, 886 unsigned *DwordRegIndex); 887 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 888 void initializeGprCountSymbol(RegisterKind RegKind); 889 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 890 unsigned RegWidth); 891 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 892 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 893 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 894 bool IsGdsHardcoded); 895 896 public: 897 enum AMDGPUMatchResultTy { 898 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 899 }; 900 901 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 902 903 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 904 const MCInstrInfo &MII, 905 const MCTargetOptions &Options) 906 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 907 MCAsmParserExtension::Initialize(Parser); 908 909 if (getFeatureBits().none()) { 910 // Set default features. 911 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 912 } 913 914 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 915 916 { 917 // TODO: make those pre-defined variables read-only. 918 // Currently there is none suitable machinery in the core llvm-mc for this. 919 // MCSymbol::isRedefinable is intended for another purpose, and 920 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 921 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 922 MCContext &Ctx = getContext(); 923 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 924 MCSymbol *Sym = 925 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 926 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 927 } else { 928 MCSymbol *Sym = 929 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 930 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 931 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 932 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 933 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 934 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 935 } 936 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 937 initializeGprCountSymbol(IS_VGPR); 938 initializeGprCountSymbol(IS_SGPR); 939 } else 940 KernelScope.initialize(getContext()); 941 } 942 } 943 944 bool hasXNACK() const { 945 return AMDGPU::hasXNACK(getSTI()); 946 } 947 948 bool hasMIMG_R128() const { 949 return AMDGPU::hasMIMG_R128(getSTI()); 950 } 951 952 bool hasPackedD16() const { 953 return AMDGPU::hasPackedD16(getSTI()); 954 } 955 956 bool isSI() const { 957 return AMDGPU::isSI(getSTI()); 958 } 959 960 bool isCI() const { 961 return AMDGPU::isCI(getSTI()); 962 } 963 964 bool isVI() const { 965 return AMDGPU::isVI(getSTI()); 966 } 967 968 bool isGFX9() const { 969 return AMDGPU::isGFX9(getSTI()); 970 } 971 972 bool hasInv2PiInlineImm() const { 973 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 974 } 975 976 bool hasFlatOffsets() const { 977 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 978 } 979 980 bool hasSGPR102_SGPR103() const { 981 return !isVI(); 982 } 983 984 bool hasIntClamp() const { 985 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 986 } 987 988 AMDGPUTargetStreamer &getTargetStreamer() { 989 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 990 return static_cast<AMDGPUTargetStreamer &>(TS); 991 } 992 993 const MCRegisterInfo *getMRI() const { 994 // We need this const_cast because for some reason getContext() is not const 995 // in MCAsmParser. 996 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 997 } 998 999 const MCInstrInfo *getMII() const { 1000 return &MII; 1001 } 1002 1003 const FeatureBitset &getFeatureBits() const { 1004 return getSTI().getFeatureBits(); 1005 } 1006 1007 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1008 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1009 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1010 1011 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1012 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1013 bool isForcedDPP() const { return ForcedDPP; } 1014 bool isForcedSDWA() const { return ForcedSDWA; } 1015 ArrayRef<unsigned> getMatchedVariants() const; 1016 1017 std::unique_ptr<AMDGPUOperand> parseRegister(); 1018 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1019 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1020 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1021 unsigned Kind) override; 1022 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1023 OperandVector &Operands, MCStreamer &Out, 1024 uint64_t &ErrorInfo, 1025 bool MatchingInlineAsm) override; 1026 bool ParseDirective(AsmToken DirectiveID) override; 1027 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1028 StringRef parseMnemonicSuffix(StringRef Name); 1029 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1030 SMLoc NameLoc, OperandVector &Operands) override; 1031 //bool ProcessInstruction(MCInst &Inst); 1032 1033 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1034 1035 OperandMatchResultTy 1036 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1037 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1038 bool (*ConvertResult)(int64_t &) = nullptr); 1039 1040 OperandMatchResultTy parseOperandArrayWithPrefix( 1041 const char *Prefix, 1042 OperandVector &Operands, 1043 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1044 bool (*ConvertResult)(int64_t&) = nullptr); 1045 1046 OperandMatchResultTy 1047 parseNamedBit(const char *Name, OperandVector &Operands, 1048 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1049 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1050 StringRef &Value); 1051 1052 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1053 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1054 OperandMatchResultTy parseReg(OperandVector &Operands); 1055 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1056 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1057 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1058 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1059 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1060 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1061 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1062 1063 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1064 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1065 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1066 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1067 1068 bool parseCnt(int64_t &IntVal); 1069 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1070 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1071 1072 private: 1073 struct OperandInfoTy { 1074 int64_t Id; 1075 bool IsSymbolic = false; 1076 1077 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1078 }; 1079 1080 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1081 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1082 1083 void errorExpTgt(); 1084 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1085 1086 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1087 bool validateConstantBusLimitations(const MCInst &Inst); 1088 bool validateEarlyClobberLimitations(const MCInst &Inst); 1089 bool validateIntClampSupported(const MCInst &Inst); 1090 bool validateMIMGAtomicDMask(const MCInst &Inst); 1091 bool validateMIMGGatherDMask(const MCInst &Inst); 1092 bool validateMIMGDataSize(const MCInst &Inst); 1093 bool validateMIMGD16(const MCInst &Inst); 1094 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1095 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1096 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1097 1098 bool trySkipId(const StringRef Id); 1099 bool trySkipToken(const AsmToken::TokenKind Kind); 1100 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1101 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1102 bool parseExpr(int64_t &Imm); 1103 1104 public: 1105 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1106 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1107 1108 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1109 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1110 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1111 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1112 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1113 1114 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1115 const unsigned MinVal, 1116 const unsigned MaxVal, 1117 const StringRef ErrMsg); 1118 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1119 bool parseSwizzleOffset(int64_t &Imm); 1120 bool parseSwizzleMacro(int64_t &Imm); 1121 bool parseSwizzleQuadPerm(int64_t &Imm); 1122 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1123 bool parseSwizzleBroadcast(int64_t &Imm); 1124 bool parseSwizzleSwap(int64_t &Imm); 1125 bool parseSwizzleReverse(int64_t &Imm); 1126 1127 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1128 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1129 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1130 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1131 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1132 1133 AMDGPUOperand::Ptr defaultGLC() const; 1134 AMDGPUOperand::Ptr defaultSLC() const; 1135 1136 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1137 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1138 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1139 AMDGPUOperand::Ptr defaultOffsetU12() const; 1140 AMDGPUOperand::Ptr defaultOffsetS13() const; 1141 1142 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1143 1144 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1145 OptionalImmIndexMap &OptionalIdx); 1146 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1148 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1149 1150 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1151 1152 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1153 bool IsAtomic = false); 1154 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1155 1156 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1157 AMDGPUOperand::Ptr defaultRowMask() const; 1158 AMDGPUOperand::Ptr defaultBankMask() const; 1159 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1160 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1161 1162 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1163 AMDGPUOperand::ImmTy Type); 1164 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1165 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1166 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1167 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1168 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1169 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1170 uint64_t BasicInstType, bool skipVcc = false); 1171 }; 1172 1173 struct OptionalOperand { 1174 const char *Name; 1175 AMDGPUOperand::ImmTy Type; 1176 bool IsBit; 1177 bool (*ConvertResult)(int64_t&); 1178 }; 1179 1180 } // end anonymous namespace 1181 1182 // May be called with integer type with equivalent bitwidth. 1183 static const fltSemantics *getFltSemantics(unsigned Size) { 1184 switch (Size) { 1185 case 4: 1186 return &APFloat::IEEEsingle(); 1187 case 8: 1188 return &APFloat::IEEEdouble(); 1189 case 2: 1190 return &APFloat::IEEEhalf(); 1191 default: 1192 llvm_unreachable("unsupported fp type"); 1193 } 1194 } 1195 1196 static const fltSemantics *getFltSemantics(MVT VT) { 1197 return getFltSemantics(VT.getSizeInBits() / 8); 1198 } 1199 1200 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1201 switch (OperandType) { 1202 case AMDGPU::OPERAND_REG_IMM_INT32: 1203 case AMDGPU::OPERAND_REG_IMM_FP32: 1204 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1205 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1206 return &APFloat::IEEEsingle(); 1207 case AMDGPU::OPERAND_REG_IMM_INT64: 1208 case AMDGPU::OPERAND_REG_IMM_FP64: 1209 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1210 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1211 return &APFloat::IEEEdouble(); 1212 case AMDGPU::OPERAND_REG_IMM_INT16: 1213 case AMDGPU::OPERAND_REG_IMM_FP16: 1214 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1215 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1218 return &APFloat::IEEEhalf(); 1219 default: 1220 llvm_unreachable("unsupported fp type"); 1221 } 1222 } 1223 1224 //===----------------------------------------------------------------------===// 1225 // Operand 1226 //===----------------------------------------------------------------------===// 1227 1228 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1229 bool Lost; 1230 1231 // Convert literal to single precision 1232 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1233 APFloat::rmNearestTiesToEven, 1234 &Lost); 1235 // We allow precision lost but not overflow or underflow 1236 if (Status != APFloat::opOK && 1237 Lost && 1238 ((Status & APFloat::opOverflow) != 0 || 1239 (Status & APFloat::opUnderflow) != 0)) { 1240 return false; 1241 } 1242 1243 return true; 1244 } 1245 1246 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1247 if (!isImmTy(ImmTyNone)) { 1248 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1249 return false; 1250 } 1251 // TODO: We should avoid using host float here. It would be better to 1252 // check the float bit values which is what a few other places do. 1253 // We've had bot failures before due to weird NaN support on mips hosts. 1254 1255 APInt Literal(64, Imm.Val); 1256 1257 if (Imm.IsFPImm) { // We got fp literal token 1258 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1259 return AMDGPU::isInlinableLiteral64(Imm.Val, 1260 AsmParser->hasInv2PiInlineImm()); 1261 } 1262 1263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1264 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1265 return false; 1266 1267 if (type.getScalarSizeInBits() == 16) { 1268 return AMDGPU::isInlinableLiteral16( 1269 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1270 AsmParser->hasInv2PiInlineImm()); 1271 } 1272 1273 // Check if single precision literal is inlinable 1274 return AMDGPU::isInlinableLiteral32( 1275 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1276 AsmParser->hasInv2PiInlineImm()); 1277 } 1278 1279 // We got int literal token. 1280 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1281 return AMDGPU::isInlinableLiteral64(Imm.Val, 1282 AsmParser->hasInv2PiInlineImm()); 1283 } 1284 1285 if (type.getScalarSizeInBits() == 16) { 1286 return AMDGPU::isInlinableLiteral16( 1287 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1288 AsmParser->hasInv2PiInlineImm()); 1289 } 1290 1291 return AMDGPU::isInlinableLiteral32( 1292 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1293 AsmParser->hasInv2PiInlineImm()); 1294 } 1295 1296 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1297 // Check that this immediate can be added as literal 1298 if (!isImmTy(ImmTyNone)) { 1299 return false; 1300 } 1301 1302 if (!Imm.IsFPImm) { 1303 // We got int literal token. 1304 1305 if (type == MVT::f64 && hasFPModifiers()) { 1306 // Cannot apply fp modifiers to int literals preserving the same semantics 1307 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1308 // disable these cases. 1309 return false; 1310 } 1311 1312 unsigned Size = type.getSizeInBits(); 1313 if (Size == 64) 1314 Size = 32; 1315 1316 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1317 // types. 1318 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1319 } 1320 1321 // We got fp literal token 1322 if (type == MVT::f64) { // Expected 64-bit fp operand 1323 // We would set low 64-bits of literal to zeroes but we accept this literals 1324 return true; 1325 } 1326 1327 if (type == MVT::i64) { // Expected 64-bit int operand 1328 // We don't allow fp literals in 64-bit integer instructions. It is 1329 // unclear how we should encode them. 1330 return false; 1331 } 1332 1333 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1334 return canLosslesslyConvertToFPType(FPLiteral, type); 1335 } 1336 1337 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1338 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1339 } 1340 1341 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1342 if (AsmParser->isVI()) 1343 return isVReg(); 1344 else if (AsmParser->isGFX9()) 1345 return isRegKind() || isInlinableImm(type); 1346 else 1347 return false; 1348 } 1349 1350 bool AMDGPUOperand::isSDWAFP16Operand() const { 1351 return isSDWAOperand(MVT::f16); 1352 } 1353 1354 bool AMDGPUOperand::isSDWAFP32Operand() const { 1355 return isSDWAOperand(MVT::f32); 1356 } 1357 1358 bool AMDGPUOperand::isSDWAInt16Operand() const { 1359 return isSDWAOperand(MVT::i16); 1360 } 1361 1362 bool AMDGPUOperand::isSDWAInt32Operand() const { 1363 return isSDWAOperand(MVT::i32); 1364 } 1365 1366 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1367 { 1368 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1369 assert(Size == 2 || Size == 4 || Size == 8); 1370 1371 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1372 1373 if (Imm.Mods.Abs) { 1374 Val &= ~FpSignMask; 1375 } 1376 if (Imm.Mods.Neg) { 1377 Val ^= FpSignMask; 1378 } 1379 1380 return Val; 1381 } 1382 1383 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1384 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1385 Inst.getNumOperands())) { 1386 addLiteralImmOperand(Inst, Imm.Val, 1387 ApplyModifiers & 1388 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1389 } else { 1390 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1391 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1392 } 1393 } 1394 1395 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1396 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1397 auto OpNum = Inst.getNumOperands(); 1398 // Check that this operand accepts literals 1399 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1400 1401 if (ApplyModifiers) { 1402 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1403 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1404 Val = applyInputFPModifiers(Val, Size); 1405 } 1406 1407 APInt Literal(64, Val); 1408 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1409 1410 if (Imm.IsFPImm) { // We got fp literal token 1411 switch (OpTy) { 1412 case AMDGPU::OPERAND_REG_IMM_INT64: 1413 case AMDGPU::OPERAND_REG_IMM_FP64: 1414 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1415 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1416 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1417 AsmParser->hasInv2PiInlineImm())) { 1418 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1419 return; 1420 } 1421 1422 // Non-inlineable 1423 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1424 // For fp operands we check if low 32 bits are zeros 1425 if (Literal.getLoBits(32) != 0) { 1426 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1427 "Can't encode literal as exact 64-bit floating-point operand. " 1428 "Low 32-bits will be set to zero"); 1429 } 1430 1431 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1432 return; 1433 } 1434 1435 // We don't allow fp literals in 64-bit integer instructions. It is 1436 // unclear how we should encode them. This case should be checked earlier 1437 // in predicate methods (isLiteralImm()) 1438 llvm_unreachable("fp literal in 64-bit integer instruction."); 1439 1440 case AMDGPU::OPERAND_REG_IMM_INT32: 1441 case AMDGPU::OPERAND_REG_IMM_FP32: 1442 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1443 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1444 case AMDGPU::OPERAND_REG_IMM_INT16: 1445 case AMDGPU::OPERAND_REG_IMM_FP16: 1446 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1447 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1448 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1449 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1450 bool lost; 1451 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1452 // Convert literal to single precision 1453 FPLiteral.convert(*getOpFltSemantics(OpTy), 1454 APFloat::rmNearestTiesToEven, &lost); 1455 // We allow precision lost but not overflow or underflow. This should be 1456 // checked earlier in isLiteralImm() 1457 1458 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1459 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1460 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1461 ImmVal |= (ImmVal << 16); 1462 } 1463 1464 Inst.addOperand(MCOperand::createImm(ImmVal)); 1465 return; 1466 } 1467 default: 1468 llvm_unreachable("invalid operand size"); 1469 } 1470 1471 return; 1472 } 1473 1474 // We got int literal token. 1475 // Only sign extend inline immediates. 1476 // FIXME: No errors on truncation 1477 switch (OpTy) { 1478 case AMDGPU::OPERAND_REG_IMM_INT32: 1479 case AMDGPU::OPERAND_REG_IMM_FP32: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1482 if (isInt<32>(Val) && 1483 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1484 AsmParser->hasInv2PiInlineImm())) { 1485 Inst.addOperand(MCOperand::createImm(Val)); 1486 return; 1487 } 1488 1489 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1490 return; 1491 1492 case AMDGPU::OPERAND_REG_IMM_INT64: 1493 case AMDGPU::OPERAND_REG_IMM_FP64: 1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1496 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1497 Inst.addOperand(MCOperand::createImm(Val)); 1498 return; 1499 } 1500 1501 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1502 return; 1503 1504 case AMDGPU::OPERAND_REG_IMM_INT16: 1505 case AMDGPU::OPERAND_REG_IMM_FP16: 1506 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1508 if (isInt<16>(Val) && 1509 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1510 AsmParser->hasInv2PiInlineImm())) { 1511 Inst.addOperand(MCOperand::createImm(Val)); 1512 return; 1513 } 1514 1515 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1516 return; 1517 1518 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1519 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1520 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1521 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1522 AsmParser->hasInv2PiInlineImm())); 1523 1524 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1525 static_cast<uint32_t>(LiteralVal); 1526 Inst.addOperand(MCOperand::createImm(ImmVal)); 1527 return; 1528 } 1529 default: 1530 llvm_unreachable("invalid operand size"); 1531 } 1532 } 1533 1534 template <unsigned Bitwidth> 1535 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1536 APInt Literal(64, Imm.Val); 1537 1538 if (!Imm.IsFPImm) { 1539 // We got int literal token. 1540 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1541 return; 1542 } 1543 1544 bool Lost; 1545 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1546 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1547 APFloat::rmNearestTiesToEven, &Lost); 1548 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1549 } 1550 1551 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1552 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1553 } 1554 1555 //===----------------------------------------------------------------------===// 1556 // AsmParser 1557 //===----------------------------------------------------------------------===// 1558 1559 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1560 if (Is == IS_VGPR) { 1561 switch (RegWidth) { 1562 default: return -1; 1563 case 1: return AMDGPU::VGPR_32RegClassID; 1564 case 2: return AMDGPU::VReg_64RegClassID; 1565 case 3: return AMDGPU::VReg_96RegClassID; 1566 case 4: return AMDGPU::VReg_128RegClassID; 1567 case 8: return AMDGPU::VReg_256RegClassID; 1568 case 16: return AMDGPU::VReg_512RegClassID; 1569 } 1570 } else if (Is == IS_TTMP) { 1571 switch (RegWidth) { 1572 default: return -1; 1573 case 1: return AMDGPU::TTMP_32RegClassID; 1574 case 2: return AMDGPU::TTMP_64RegClassID; 1575 case 4: return AMDGPU::TTMP_128RegClassID; 1576 case 8: return AMDGPU::TTMP_256RegClassID; 1577 case 16: return AMDGPU::TTMP_512RegClassID; 1578 } 1579 } else if (Is == IS_SGPR) { 1580 switch (RegWidth) { 1581 default: return -1; 1582 case 1: return AMDGPU::SGPR_32RegClassID; 1583 case 2: return AMDGPU::SGPR_64RegClassID; 1584 case 4: return AMDGPU::SGPR_128RegClassID; 1585 case 8: return AMDGPU::SGPR_256RegClassID; 1586 case 16: return AMDGPU::SGPR_512RegClassID; 1587 } 1588 } 1589 return -1; 1590 } 1591 1592 static unsigned getSpecialRegForName(StringRef RegName) { 1593 return StringSwitch<unsigned>(RegName) 1594 .Case("exec", AMDGPU::EXEC) 1595 .Case("vcc", AMDGPU::VCC) 1596 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1597 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1598 .Case("m0", AMDGPU::M0) 1599 .Case("scc", AMDGPU::SCC) 1600 .Case("tba", AMDGPU::TBA) 1601 .Case("tma", AMDGPU::TMA) 1602 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1603 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1604 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1605 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1606 .Case("vcc_lo", AMDGPU::VCC_LO) 1607 .Case("vcc_hi", AMDGPU::VCC_HI) 1608 .Case("exec_lo", AMDGPU::EXEC_LO) 1609 .Case("exec_hi", AMDGPU::EXEC_HI) 1610 .Case("tma_lo", AMDGPU::TMA_LO) 1611 .Case("tma_hi", AMDGPU::TMA_HI) 1612 .Case("tba_lo", AMDGPU::TBA_LO) 1613 .Case("tba_hi", AMDGPU::TBA_HI) 1614 .Default(0); 1615 } 1616 1617 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1618 SMLoc &EndLoc) { 1619 auto R = parseRegister(); 1620 if (!R) return true; 1621 assert(R->isReg()); 1622 RegNo = R->getReg(); 1623 StartLoc = R->getStartLoc(); 1624 EndLoc = R->getEndLoc(); 1625 return false; 1626 } 1627 1628 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1629 RegisterKind RegKind, unsigned Reg1, 1630 unsigned RegNum) { 1631 switch (RegKind) { 1632 case IS_SPECIAL: 1633 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1634 Reg = AMDGPU::EXEC; 1635 RegWidth = 2; 1636 return true; 1637 } 1638 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1639 Reg = AMDGPU::FLAT_SCR; 1640 RegWidth = 2; 1641 return true; 1642 } 1643 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1644 Reg = AMDGPU::XNACK_MASK; 1645 RegWidth = 2; 1646 return true; 1647 } 1648 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1649 Reg = AMDGPU::VCC; 1650 RegWidth = 2; 1651 return true; 1652 } 1653 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1654 Reg = AMDGPU::TBA; 1655 RegWidth = 2; 1656 return true; 1657 } 1658 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1659 Reg = AMDGPU::TMA; 1660 RegWidth = 2; 1661 return true; 1662 } 1663 return false; 1664 case IS_VGPR: 1665 case IS_SGPR: 1666 case IS_TTMP: 1667 if (Reg1 != Reg + RegWidth) { 1668 return false; 1669 } 1670 RegWidth++; 1671 return true; 1672 default: 1673 llvm_unreachable("unexpected register kind"); 1674 } 1675 } 1676 1677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1678 unsigned &RegNum, unsigned &RegWidth, 1679 unsigned *DwordRegIndex) { 1680 if (DwordRegIndex) { *DwordRegIndex = 0; } 1681 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1682 if (getLexer().is(AsmToken::Identifier)) { 1683 StringRef RegName = Parser.getTok().getString(); 1684 if ((Reg = getSpecialRegForName(RegName))) { 1685 Parser.Lex(); 1686 RegKind = IS_SPECIAL; 1687 } else { 1688 unsigned RegNumIndex = 0; 1689 if (RegName[0] == 'v') { 1690 RegNumIndex = 1; 1691 RegKind = IS_VGPR; 1692 } else if (RegName[0] == 's') { 1693 RegNumIndex = 1; 1694 RegKind = IS_SGPR; 1695 } else if (RegName.startswith("ttmp")) { 1696 RegNumIndex = strlen("ttmp"); 1697 RegKind = IS_TTMP; 1698 } else { 1699 return false; 1700 } 1701 if (RegName.size() > RegNumIndex) { 1702 // Single 32-bit register: vXX. 1703 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1704 return false; 1705 Parser.Lex(); 1706 RegWidth = 1; 1707 } else { 1708 // Range of registers: v[XX:YY]. ":YY" is optional. 1709 Parser.Lex(); 1710 int64_t RegLo, RegHi; 1711 if (getLexer().isNot(AsmToken::LBrac)) 1712 return false; 1713 Parser.Lex(); 1714 1715 if (getParser().parseAbsoluteExpression(RegLo)) 1716 return false; 1717 1718 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1719 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1720 return false; 1721 Parser.Lex(); 1722 1723 if (isRBrace) { 1724 RegHi = RegLo; 1725 } else { 1726 if (getParser().parseAbsoluteExpression(RegHi)) 1727 return false; 1728 1729 if (getLexer().isNot(AsmToken::RBrac)) 1730 return false; 1731 Parser.Lex(); 1732 } 1733 RegNum = (unsigned) RegLo; 1734 RegWidth = (RegHi - RegLo) + 1; 1735 } 1736 } 1737 } else if (getLexer().is(AsmToken::LBrac)) { 1738 // List of consecutive registers: [s0,s1,s2,s3] 1739 Parser.Lex(); 1740 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1741 return false; 1742 if (RegWidth != 1) 1743 return false; 1744 RegisterKind RegKind1; 1745 unsigned Reg1, RegNum1, RegWidth1; 1746 do { 1747 if (getLexer().is(AsmToken::Comma)) { 1748 Parser.Lex(); 1749 } else if (getLexer().is(AsmToken::RBrac)) { 1750 Parser.Lex(); 1751 break; 1752 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1753 if (RegWidth1 != 1) { 1754 return false; 1755 } 1756 if (RegKind1 != RegKind) { 1757 return false; 1758 } 1759 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1760 return false; 1761 } 1762 } else { 1763 return false; 1764 } 1765 } while (true); 1766 } else { 1767 return false; 1768 } 1769 switch (RegKind) { 1770 case IS_SPECIAL: 1771 RegNum = 0; 1772 RegWidth = 1; 1773 break; 1774 case IS_VGPR: 1775 case IS_SGPR: 1776 case IS_TTMP: 1777 { 1778 unsigned Size = 1; 1779 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1780 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1781 Size = std::min(RegWidth, 4u); 1782 } 1783 if (RegNum % Size != 0) 1784 return false; 1785 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1786 RegNum = RegNum / Size; 1787 int RCID = getRegClass(RegKind, RegWidth); 1788 if (RCID == -1) 1789 return false; 1790 const MCRegisterClass RC = TRI->getRegClass(RCID); 1791 if (RegNum >= RC.getNumRegs()) 1792 return false; 1793 Reg = RC.getRegister(RegNum); 1794 break; 1795 } 1796 1797 default: 1798 llvm_unreachable("unexpected register kind"); 1799 } 1800 1801 if (!subtargetHasRegister(*TRI, Reg)) 1802 return false; 1803 return true; 1804 } 1805 1806 Optional<StringRef> 1807 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1808 switch (RegKind) { 1809 case IS_VGPR: 1810 return StringRef(".amdgcn.next_free_vgpr"); 1811 case IS_SGPR: 1812 return StringRef(".amdgcn.next_free_sgpr"); 1813 default: 1814 return None; 1815 } 1816 } 1817 1818 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1819 auto SymbolName = getGprCountSymbolName(RegKind); 1820 assert(SymbolName && "initializing invalid register kind"); 1821 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1822 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1823 } 1824 1825 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1826 unsigned DwordRegIndex, 1827 unsigned RegWidth) { 1828 // Symbols are only defined for GCN targets 1829 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1830 return true; 1831 1832 auto SymbolName = getGprCountSymbolName(RegKind); 1833 if (!SymbolName) 1834 return true; 1835 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1836 1837 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1838 int64_t OldCount; 1839 1840 if (!Sym->isVariable()) 1841 return !Error(getParser().getTok().getLoc(), 1842 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1843 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1844 return !Error( 1845 getParser().getTok().getLoc(), 1846 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1847 1848 if (OldCount <= NewMax) 1849 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1850 1851 return true; 1852 } 1853 1854 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1855 const auto &Tok = Parser.getTok(); 1856 SMLoc StartLoc = Tok.getLoc(); 1857 SMLoc EndLoc = Tok.getEndLoc(); 1858 RegisterKind RegKind; 1859 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1860 1861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1862 return nullptr; 1863 } 1864 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1865 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1866 return nullptr; 1867 } else 1868 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1870 } 1871 1872 bool 1873 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1874 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1875 (getLexer().getKind() == AsmToken::Integer || 1876 getLexer().getKind() == AsmToken::Real)) { 1877 // This is a workaround for handling operands like these: 1878 // |1.0| 1879 // |-1| 1880 // This syntax is not compatible with syntax of standard 1881 // MC expressions (due to the trailing '|'). 1882 1883 SMLoc EndLoc; 1884 const MCExpr *Expr; 1885 1886 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1887 return true; 1888 } 1889 1890 return !Expr->evaluateAsAbsolute(Val); 1891 } 1892 1893 return getParser().parseAbsoluteExpression(Val); 1894 } 1895 1896 OperandMatchResultTy 1897 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1898 // TODO: add syntactic sugar for 1/(2*PI) 1899 bool Minus = false; 1900 if (getLexer().getKind() == AsmToken::Minus) { 1901 const AsmToken NextToken = getLexer().peekTok(); 1902 if (!NextToken.is(AsmToken::Integer) && 1903 !NextToken.is(AsmToken::Real)) { 1904 return MatchOperand_NoMatch; 1905 } 1906 Minus = true; 1907 Parser.Lex(); 1908 } 1909 1910 SMLoc S = Parser.getTok().getLoc(); 1911 switch(getLexer().getKind()) { 1912 case AsmToken::Integer: { 1913 int64_t IntVal; 1914 if (parseAbsoluteExpr(IntVal, AbsMod)) 1915 return MatchOperand_ParseFail; 1916 if (Minus) 1917 IntVal *= -1; 1918 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1919 return MatchOperand_Success; 1920 } 1921 case AsmToken::Real: { 1922 int64_t IntVal; 1923 if (parseAbsoluteExpr(IntVal, AbsMod)) 1924 return MatchOperand_ParseFail; 1925 1926 APFloat F(BitsToDouble(IntVal)); 1927 if (Minus) 1928 F.changeSign(); 1929 Operands.push_back( 1930 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1931 AMDGPUOperand::ImmTyNone, true)); 1932 return MatchOperand_Success; 1933 } 1934 default: 1935 return MatchOperand_NoMatch; 1936 } 1937 } 1938 1939 OperandMatchResultTy 1940 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1941 if (auto R = parseRegister()) { 1942 assert(R->isReg()); 1943 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1944 Operands.push_back(std::move(R)); 1945 return MatchOperand_Success; 1946 } 1947 return MatchOperand_NoMatch; 1948 } 1949 1950 OperandMatchResultTy 1951 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1952 auto res = parseImm(Operands, AbsMod); 1953 if (res != MatchOperand_NoMatch) { 1954 return res; 1955 } 1956 1957 return parseReg(Operands); 1958 } 1959 1960 OperandMatchResultTy 1961 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1962 bool AllowImm) { 1963 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1964 1965 if (getLexer().getKind()== AsmToken::Minus) { 1966 const AsmToken NextToken = getLexer().peekTok(); 1967 1968 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1969 if (NextToken.is(AsmToken::Minus)) { 1970 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1971 return MatchOperand_ParseFail; 1972 } 1973 1974 // '-' followed by an integer literal N should be interpreted as integer 1975 // negation rather than a floating-point NEG modifier applied to N. 1976 // Beside being contr-intuitive, such use of floating-point NEG modifier 1977 // results in different meaning of integer literals used with VOP1/2/C 1978 // and VOP3, for example: 1979 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1980 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1981 // Negative fp literals should be handled likewise for unifomtity 1982 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1983 Parser.Lex(); 1984 Negate = true; 1985 } 1986 } 1987 1988 if (getLexer().getKind() == AsmToken::Identifier && 1989 Parser.getTok().getString() == "neg") { 1990 if (Negate) { 1991 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1992 return MatchOperand_ParseFail; 1993 } 1994 Parser.Lex(); 1995 Negate2 = true; 1996 if (getLexer().isNot(AsmToken::LParen)) { 1997 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 1998 return MatchOperand_ParseFail; 1999 } 2000 Parser.Lex(); 2001 } 2002 2003 if (getLexer().getKind() == AsmToken::Identifier && 2004 Parser.getTok().getString() == "abs") { 2005 Parser.Lex(); 2006 Abs2 = true; 2007 if (getLexer().isNot(AsmToken::LParen)) { 2008 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2009 return MatchOperand_ParseFail; 2010 } 2011 Parser.Lex(); 2012 } 2013 2014 if (getLexer().getKind() == AsmToken::Pipe) { 2015 if (Abs2) { 2016 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2017 return MatchOperand_ParseFail; 2018 } 2019 Parser.Lex(); 2020 Abs = true; 2021 } 2022 2023 OperandMatchResultTy Res; 2024 if (AllowImm) { 2025 Res = parseRegOrImm(Operands, Abs); 2026 } else { 2027 Res = parseReg(Operands); 2028 } 2029 if (Res != MatchOperand_Success) { 2030 return Res; 2031 } 2032 2033 AMDGPUOperand::Modifiers Mods; 2034 if (Abs) { 2035 if (getLexer().getKind() != AsmToken::Pipe) { 2036 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2037 return MatchOperand_ParseFail; 2038 } 2039 Parser.Lex(); 2040 Mods.Abs = true; 2041 } 2042 if (Abs2) { 2043 if (getLexer().isNot(AsmToken::RParen)) { 2044 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2045 return MatchOperand_ParseFail; 2046 } 2047 Parser.Lex(); 2048 Mods.Abs = true; 2049 } 2050 2051 if (Negate) { 2052 Mods.Neg = true; 2053 } else if (Negate2) { 2054 if (getLexer().isNot(AsmToken::RParen)) { 2055 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2056 return MatchOperand_ParseFail; 2057 } 2058 Parser.Lex(); 2059 Mods.Neg = true; 2060 } 2061 2062 if (Mods.hasFPModifiers()) { 2063 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2064 Op.setModifiers(Mods); 2065 } 2066 return MatchOperand_Success; 2067 } 2068 2069 OperandMatchResultTy 2070 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2071 bool AllowImm) { 2072 bool Sext = false; 2073 2074 if (getLexer().getKind() == AsmToken::Identifier && 2075 Parser.getTok().getString() == "sext") { 2076 Parser.Lex(); 2077 Sext = true; 2078 if (getLexer().isNot(AsmToken::LParen)) { 2079 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2080 return MatchOperand_ParseFail; 2081 } 2082 Parser.Lex(); 2083 } 2084 2085 OperandMatchResultTy Res; 2086 if (AllowImm) { 2087 Res = parseRegOrImm(Operands); 2088 } else { 2089 Res = parseReg(Operands); 2090 } 2091 if (Res != MatchOperand_Success) { 2092 return Res; 2093 } 2094 2095 AMDGPUOperand::Modifiers Mods; 2096 if (Sext) { 2097 if (getLexer().isNot(AsmToken::RParen)) { 2098 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2099 return MatchOperand_ParseFail; 2100 } 2101 Parser.Lex(); 2102 Mods.Sext = true; 2103 } 2104 2105 if (Mods.hasIntModifiers()) { 2106 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2107 Op.setModifiers(Mods); 2108 } 2109 2110 return MatchOperand_Success; 2111 } 2112 2113 OperandMatchResultTy 2114 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2115 return parseRegOrImmWithFPInputMods(Operands, false); 2116 } 2117 2118 OperandMatchResultTy 2119 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2120 return parseRegOrImmWithIntInputMods(Operands, false); 2121 } 2122 2123 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2124 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2125 if (Reg) { 2126 Operands.push_back(std::move(Reg)); 2127 return MatchOperand_Success; 2128 } 2129 2130 const AsmToken &Tok = Parser.getTok(); 2131 if (Tok.getString() == "off") { 2132 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2133 AMDGPUOperand::ImmTyOff, false)); 2134 Parser.Lex(); 2135 return MatchOperand_Success; 2136 } 2137 2138 return MatchOperand_NoMatch; 2139 } 2140 2141 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2142 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2143 2144 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2145 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2146 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2147 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2148 return Match_InvalidOperand; 2149 2150 if ((TSFlags & SIInstrFlags::VOP3) && 2151 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2152 getForcedEncodingSize() != 64) 2153 return Match_PreferE32; 2154 2155 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2156 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2157 // v_mac_f32/16 allow only dst_sel == DWORD; 2158 auto OpNum = 2159 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2160 const auto &Op = Inst.getOperand(OpNum); 2161 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2162 return Match_InvalidOperand; 2163 } 2164 } 2165 2166 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2167 // FIXME: Produces error without correct column reported. 2168 auto OpNum = 2169 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2170 const auto &Op = Inst.getOperand(OpNum); 2171 if (Op.getImm() != 0) 2172 return Match_InvalidOperand; 2173 } 2174 2175 return Match_Success; 2176 } 2177 2178 // What asm variants we should check 2179 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2180 if (getForcedEncodingSize() == 32) { 2181 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2182 return makeArrayRef(Variants); 2183 } 2184 2185 if (isForcedVOP3()) { 2186 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2187 return makeArrayRef(Variants); 2188 } 2189 2190 if (isForcedSDWA()) { 2191 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2192 AMDGPUAsmVariants::SDWA9}; 2193 return makeArrayRef(Variants); 2194 } 2195 2196 if (isForcedDPP()) { 2197 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2198 return makeArrayRef(Variants); 2199 } 2200 2201 static const unsigned Variants[] = { 2202 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2203 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2204 }; 2205 2206 return makeArrayRef(Variants); 2207 } 2208 2209 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2210 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2211 const unsigned Num = Desc.getNumImplicitUses(); 2212 for (unsigned i = 0; i < Num; ++i) { 2213 unsigned Reg = Desc.ImplicitUses[i]; 2214 switch (Reg) { 2215 case AMDGPU::FLAT_SCR: 2216 case AMDGPU::VCC: 2217 case AMDGPU::M0: 2218 return Reg; 2219 default: 2220 break; 2221 } 2222 } 2223 return AMDGPU::NoRegister; 2224 } 2225 2226 // NB: This code is correct only when used to check constant 2227 // bus limitations because GFX7 support no f16 inline constants. 2228 // Note that there are no cases when a GFX7 opcode violates 2229 // constant bus limitations due to the use of an f16 constant. 2230 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2231 unsigned OpIdx) const { 2232 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2233 2234 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2235 return false; 2236 } 2237 2238 const MCOperand &MO = Inst.getOperand(OpIdx); 2239 2240 int64_t Val = MO.getImm(); 2241 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2242 2243 switch (OpSize) { // expected operand size 2244 case 8: 2245 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2246 case 4: 2247 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2248 case 2: { 2249 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2250 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2251 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2252 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2253 } else { 2254 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2255 } 2256 } 2257 default: 2258 llvm_unreachable("invalid operand size"); 2259 } 2260 } 2261 2262 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2263 const MCOperand &MO = Inst.getOperand(OpIdx); 2264 if (MO.isImm()) { 2265 return !isInlineConstant(Inst, OpIdx); 2266 } 2267 return !MO.isReg() || 2268 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2269 } 2270 2271 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2272 const unsigned Opcode = Inst.getOpcode(); 2273 const MCInstrDesc &Desc = MII.get(Opcode); 2274 unsigned ConstantBusUseCount = 0; 2275 2276 if (Desc.TSFlags & 2277 (SIInstrFlags::VOPC | 2278 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2279 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2280 SIInstrFlags::SDWA)) { 2281 // Check special imm operands (used by madmk, etc) 2282 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2283 ++ConstantBusUseCount; 2284 } 2285 2286 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2287 if (SGPRUsed != AMDGPU::NoRegister) { 2288 ++ConstantBusUseCount; 2289 } 2290 2291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2294 2295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2296 2297 for (int OpIdx : OpIndices) { 2298 if (OpIdx == -1) break; 2299 2300 const MCOperand &MO = Inst.getOperand(OpIdx); 2301 if (usesConstantBus(Inst, OpIdx)) { 2302 if (MO.isReg()) { 2303 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2304 // Pairs of registers with a partial intersections like these 2305 // s0, s[0:1] 2306 // flat_scratch_lo, flat_scratch 2307 // flat_scratch_lo, flat_scratch_hi 2308 // are theoretically valid but they are disabled anyway. 2309 // Note that this code mimics SIInstrInfo::verifyInstruction 2310 if (Reg != SGPRUsed) { 2311 ++ConstantBusUseCount; 2312 } 2313 SGPRUsed = Reg; 2314 } else { // Expression or a literal 2315 ++ConstantBusUseCount; 2316 } 2317 } 2318 } 2319 } 2320 2321 return ConstantBusUseCount <= 1; 2322 } 2323 2324 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2325 const unsigned Opcode = Inst.getOpcode(); 2326 const MCInstrDesc &Desc = MII.get(Opcode); 2327 2328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2329 if (DstIdx == -1 || 2330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2331 return true; 2332 } 2333 2334 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2335 2336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2339 2340 assert(DstIdx != -1); 2341 const MCOperand &Dst = Inst.getOperand(DstIdx); 2342 assert(Dst.isReg()); 2343 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2344 2345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2346 2347 for (int SrcIdx : SrcIndices) { 2348 if (SrcIdx == -1) break; 2349 const MCOperand &Src = Inst.getOperand(SrcIdx); 2350 if (Src.isReg()) { 2351 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2352 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2353 return false; 2354 } 2355 } 2356 } 2357 2358 return true; 2359 } 2360 2361 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2362 2363 const unsigned Opc = Inst.getOpcode(); 2364 const MCInstrDesc &Desc = MII.get(Opc); 2365 2366 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2367 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2368 assert(ClampIdx != -1); 2369 return Inst.getOperand(ClampIdx).getImm() == 0; 2370 } 2371 2372 return true; 2373 } 2374 2375 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2376 2377 const unsigned Opc = Inst.getOpcode(); 2378 const MCInstrDesc &Desc = MII.get(Opc); 2379 2380 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2381 return true; 2382 2383 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2384 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2385 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2386 2387 assert(VDataIdx != -1); 2388 assert(DMaskIdx != -1); 2389 assert(TFEIdx != -1); 2390 2391 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2392 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2393 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2394 if (DMask == 0) 2395 DMask = 1; 2396 2397 unsigned DataSize = 2398 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2399 if (hasPackedD16()) { 2400 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2401 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2402 DataSize = (DataSize + 1) / 2; 2403 } 2404 2405 return (VDataSize / 4) == DataSize + TFESize; 2406 } 2407 2408 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2409 2410 const unsigned Opc = Inst.getOpcode(); 2411 const MCInstrDesc &Desc = MII.get(Opc); 2412 2413 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2414 return true; 2415 if (!Desc.mayLoad() || !Desc.mayStore()) 2416 return true; // Not atomic 2417 2418 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2419 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2420 2421 // This is an incomplete check because image_atomic_cmpswap 2422 // may only use 0x3 and 0xf while other atomic operations 2423 // may use 0x1 and 0x3. However these limitations are 2424 // verified when we check that dmask matches dst size. 2425 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2426 } 2427 2428 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2429 2430 const unsigned Opc = Inst.getOpcode(); 2431 const MCInstrDesc &Desc = MII.get(Opc); 2432 2433 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2434 return true; 2435 2436 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2437 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2438 2439 // GATHER4 instructions use dmask in a different fashion compared to 2440 // other MIMG instructions. The only useful DMASK values are 2441 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2442 // (red,red,red,red) etc.) The ISA document doesn't mention 2443 // this. 2444 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2445 } 2446 2447 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2448 2449 const unsigned Opc = Inst.getOpcode(); 2450 const MCInstrDesc &Desc = MII.get(Opc); 2451 2452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2453 return true; 2454 2455 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2456 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2457 if (isCI() || isSI()) 2458 return false; 2459 } 2460 2461 return true; 2462 } 2463 2464 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2465 const SMLoc &IDLoc) { 2466 if (!validateConstantBusLimitations(Inst)) { 2467 Error(IDLoc, 2468 "invalid operand (violates constant bus restrictions)"); 2469 return false; 2470 } 2471 if (!validateEarlyClobberLimitations(Inst)) { 2472 Error(IDLoc, 2473 "destination must be different than all sources"); 2474 return false; 2475 } 2476 if (!validateIntClampSupported(Inst)) { 2477 Error(IDLoc, 2478 "integer clamping is not supported on this GPU"); 2479 return false; 2480 } 2481 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2482 if (!validateMIMGD16(Inst)) { 2483 Error(IDLoc, 2484 "d16 modifier is not supported on this GPU"); 2485 return false; 2486 } 2487 if (!validateMIMGDataSize(Inst)) { 2488 Error(IDLoc, 2489 "image data size does not match dmask and tfe"); 2490 return false; 2491 } 2492 if (!validateMIMGAtomicDMask(Inst)) { 2493 Error(IDLoc, 2494 "invalid atomic image dmask"); 2495 return false; 2496 } 2497 if (!validateMIMGGatherDMask(Inst)) { 2498 Error(IDLoc, 2499 "invalid image_gather dmask: only one bit must be set"); 2500 return false; 2501 } 2502 2503 return true; 2504 } 2505 2506 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2507 unsigned VariantID = 0); 2508 2509 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2510 OperandVector &Operands, 2511 MCStreamer &Out, 2512 uint64_t &ErrorInfo, 2513 bool MatchingInlineAsm) { 2514 MCInst Inst; 2515 unsigned Result = Match_Success; 2516 for (auto Variant : getMatchedVariants()) { 2517 uint64_t EI; 2518 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2519 Variant); 2520 // We order match statuses from least to most specific. We use most specific 2521 // status as resulting 2522 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2523 if ((R == Match_Success) || 2524 (R == Match_PreferE32) || 2525 (R == Match_MissingFeature && Result != Match_PreferE32) || 2526 (R == Match_InvalidOperand && Result != Match_MissingFeature 2527 && Result != Match_PreferE32) || 2528 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2529 && Result != Match_MissingFeature 2530 && Result != Match_PreferE32)) { 2531 Result = R; 2532 ErrorInfo = EI; 2533 } 2534 if (R == Match_Success) 2535 break; 2536 } 2537 2538 switch (Result) { 2539 default: break; 2540 case Match_Success: 2541 if (!validateInstruction(Inst, IDLoc)) { 2542 return true; 2543 } 2544 Inst.setLoc(IDLoc); 2545 Out.EmitInstruction(Inst, getSTI()); 2546 return false; 2547 2548 case Match_MissingFeature: 2549 return Error(IDLoc, "instruction not supported on this GPU"); 2550 2551 case Match_MnemonicFail: { 2552 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2553 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2554 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2555 return Error(IDLoc, "invalid instruction" + Suggestion, 2556 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2557 } 2558 2559 case Match_InvalidOperand: { 2560 SMLoc ErrorLoc = IDLoc; 2561 if (ErrorInfo != ~0ULL) { 2562 if (ErrorInfo >= Operands.size()) { 2563 return Error(IDLoc, "too few operands for instruction"); 2564 } 2565 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2566 if (ErrorLoc == SMLoc()) 2567 ErrorLoc = IDLoc; 2568 } 2569 return Error(ErrorLoc, "invalid operand for instruction"); 2570 } 2571 2572 case Match_PreferE32: 2573 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2574 "should be encoded as e32"); 2575 } 2576 llvm_unreachable("Implement any new match types added!"); 2577 } 2578 2579 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2580 int64_t Tmp = -1; 2581 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2582 return true; 2583 } 2584 if (getParser().parseAbsoluteExpression(Tmp)) { 2585 return true; 2586 } 2587 Ret = static_cast<uint32_t>(Tmp); 2588 return false; 2589 } 2590 2591 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2592 uint32_t &Minor) { 2593 if (ParseAsAbsoluteExpression(Major)) 2594 return TokError("invalid major version"); 2595 2596 if (getLexer().isNot(AsmToken::Comma)) 2597 return TokError("minor version number required, comma expected"); 2598 Lex(); 2599 2600 if (ParseAsAbsoluteExpression(Minor)) 2601 return TokError("invalid minor version"); 2602 2603 return false; 2604 } 2605 2606 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2607 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2608 return TokError("directive only supported for amdgcn architecture"); 2609 2610 std::string Target; 2611 2612 SMLoc TargetStart = getTok().getLoc(); 2613 if (getParser().parseEscapedString(Target)) 2614 return true; 2615 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2616 2617 std::string ExpectedTarget; 2618 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2619 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2620 2621 if (Target != ExpectedTargetOS.str()) 2622 return getParser().Error(TargetRange.Start, "target must match options", 2623 TargetRange); 2624 2625 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2626 return false; 2627 } 2628 2629 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2630 return getParser().Error(Range.Start, "value out of range", Range); 2631 } 2632 2633 bool AMDGPUAsmParser::calculateGPRBlocks( 2634 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2635 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2636 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2637 unsigned &SGPRBlocks) { 2638 // TODO(scott.linder): These calculations are duplicated from 2639 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2640 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2641 2642 unsigned NumVGPRs = NextFreeVGPR; 2643 unsigned NumSGPRs = NextFreeSGPR; 2644 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2645 2646 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2647 NumSGPRs > MaxAddressableNumSGPRs) 2648 return OutOfRangeError(SGPRRange); 2649 2650 NumSGPRs += 2651 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2652 2653 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2654 NumSGPRs > MaxAddressableNumSGPRs) 2655 return OutOfRangeError(SGPRRange); 2656 2657 if (Features.test(FeatureSGPRInitBug)) 2658 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2659 2660 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2661 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2662 2663 return false; 2664 } 2665 2666 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2667 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2668 return TokError("directive only supported for amdgcn architecture"); 2669 2670 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2671 return TokError("directive only supported for amdhsa OS"); 2672 2673 StringRef KernelName; 2674 if (getParser().parseIdentifier(KernelName)) 2675 return true; 2676 2677 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2678 2679 StringSet<> Seen; 2680 2681 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2682 2683 SMRange VGPRRange; 2684 uint64_t NextFreeVGPR = 0; 2685 SMRange SGPRRange; 2686 uint64_t NextFreeSGPR = 0; 2687 unsigned UserSGPRCount = 0; 2688 bool ReserveVCC = true; 2689 bool ReserveFlatScr = true; 2690 bool ReserveXNACK = hasXNACK(); 2691 2692 while (true) { 2693 while (getLexer().is(AsmToken::EndOfStatement)) 2694 Lex(); 2695 2696 if (getLexer().isNot(AsmToken::Identifier)) 2697 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2698 2699 StringRef ID = getTok().getIdentifier(); 2700 SMRange IDRange = getTok().getLocRange(); 2701 Lex(); 2702 2703 if (ID == ".end_amdhsa_kernel") 2704 break; 2705 2706 if (Seen.find(ID) != Seen.end()) 2707 return TokError(".amdhsa_ directives cannot be repeated"); 2708 Seen.insert(ID); 2709 2710 SMLoc ValStart = getTok().getLoc(); 2711 int64_t IVal; 2712 if (getParser().parseAbsoluteExpression(IVal)) 2713 return true; 2714 SMLoc ValEnd = getTok().getLoc(); 2715 SMRange ValRange = SMRange(ValStart, ValEnd); 2716 2717 if (IVal < 0) 2718 return OutOfRangeError(ValRange); 2719 2720 uint64_t Val = IVal; 2721 2722 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2723 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2724 return OutOfRangeError(RANGE); \ 2725 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2726 2727 if (ID == ".amdhsa_group_segment_fixed_size") { 2728 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2729 return OutOfRangeError(ValRange); 2730 KD.group_segment_fixed_size = Val; 2731 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2732 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2733 return OutOfRangeError(ValRange); 2734 KD.private_segment_fixed_size = Val; 2735 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2736 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2738 Val, ValRange); 2739 UserSGPRCount++; 2740 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2741 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2742 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2743 ValRange); 2744 UserSGPRCount++; 2745 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2746 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2747 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2748 ValRange); 2749 UserSGPRCount++; 2750 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2751 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2753 Val, ValRange); 2754 UserSGPRCount++; 2755 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2756 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2758 ValRange); 2759 UserSGPRCount++; 2760 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2763 ValRange); 2764 UserSGPRCount++; 2765 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2766 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2767 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2768 Val, ValRange); 2769 UserSGPRCount++; 2770 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2771 PARSE_BITS_ENTRY( 2772 KD.compute_pgm_rsrc2, 2773 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2774 ValRange); 2775 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2776 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2777 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2778 ValRange); 2779 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2781 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2782 ValRange); 2783 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2785 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2786 ValRange); 2787 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2789 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2790 ValRange); 2791 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2792 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2793 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2794 ValRange); 2795 } else if (ID == ".amdhsa_next_free_vgpr") { 2796 VGPRRange = ValRange; 2797 NextFreeVGPR = Val; 2798 } else if (ID == ".amdhsa_next_free_sgpr") { 2799 SGPRRange = ValRange; 2800 NextFreeSGPR = Val; 2801 } else if (ID == ".amdhsa_reserve_vcc") { 2802 if (!isUInt<1>(Val)) 2803 return OutOfRangeError(ValRange); 2804 ReserveVCC = Val; 2805 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2806 if (IVersion.Major < 7) 2807 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2808 IDRange); 2809 if (!isUInt<1>(Val)) 2810 return OutOfRangeError(ValRange); 2811 ReserveFlatScr = Val; 2812 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2813 if (IVersion.Major < 8) 2814 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2815 IDRange); 2816 if (!isUInt<1>(Val)) 2817 return OutOfRangeError(ValRange); 2818 ReserveXNACK = Val; 2819 } else if (ID == ".amdhsa_float_round_mode_32") { 2820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2821 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2822 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2824 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2825 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2827 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2828 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2830 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2831 ValRange); 2832 } else if (ID == ".amdhsa_dx10_clamp") { 2833 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2834 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2835 } else if (ID == ".amdhsa_ieee_mode") { 2836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2837 Val, ValRange); 2838 } else if (ID == ".amdhsa_fp16_overflow") { 2839 if (IVersion.Major < 9) 2840 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2841 IDRange); 2842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2843 ValRange); 2844 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2845 PARSE_BITS_ENTRY( 2846 KD.compute_pgm_rsrc2, 2847 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2848 ValRange); 2849 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2850 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2851 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2852 Val, ValRange); 2853 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2854 PARSE_BITS_ENTRY( 2855 KD.compute_pgm_rsrc2, 2856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2857 ValRange); 2858 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2861 Val, ValRange); 2862 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2864 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2865 Val, ValRange); 2866 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2868 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2869 Val, ValRange); 2870 } else if (ID == ".amdhsa_exception_int_div_zero") { 2871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 2873 Val, ValRange); 2874 } else { 2875 return getParser().Error(IDRange.Start, 2876 "unknown .amdhsa_kernel directive", IDRange); 2877 } 2878 2879 #undef PARSE_BITS_ENTRY 2880 } 2881 2882 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 2883 return TokError(".amdhsa_next_free_vgpr directive is required"); 2884 2885 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 2886 return TokError(".amdhsa_next_free_sgpr directive is required"); 2887 2888 unsigned VGPRBlocks; 2889 unsigned SGPRBlocks; 2890 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 2891 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 2892 SGPRRange, VGPRBlocks, SGPRBlocks)) 2893 return true; 2894 2895 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 2896 VGPRBlocks)) 2897 return OutOfRangeError(VGPRRange); 2898 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2899 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 2900 2901 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 2902 SGPRBlocks)) 2903 return OutOfRangeError(SGPRRange); 2904 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2905 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2906 SGPRBlocks); 2907 2908 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 2909 return TokError("too many user SGPRs enabled"); 2910 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 2911 UserSGPRCount); 2912 2913 getTargetStreamer().EmitAmdhsaKernelDescriptor( 2914 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 2915 ReserveFlatScr, ReserveXNACK); 2916 return false; 2917 } 2918 2919 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 2920 uint32_t Major; 2921 uint32_t Minor; 2922 2923 if (ParseDirectiveMajorMinor(Major, Minor)) 2924 return true; 2925 2926 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 2927 return false; 2928 } 2929 2930 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 2931 uint32_t Major; 2932 uint32_t Minor; 2933 uint32_t Stepping; 2934 StringRef VendorName; 2935 StringRef ArchName; 2936 2937 // If this directive has no arguments, then use the ISA version for the 2938 // targeted GPU. 2939 if (getLexer().is(AsmToken::EndOfStatement)) { 2940 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 2941 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 2942 ISA.Stepping, 2943 "AMD", "AMDGPU"); 2944 return false; 2945 } 2946 2947 if (ParseDirectiveMajorMinor(Major, Minor)) 2948 return true; 2949 2950 if (getLexer().isNot(AsmToken::Comma)) 2951 return TokError("stepping version number required, comma expected"); 2952 Lex(); 2953 2954 if (ParseAsAbsoluteExpression(Stepping)) 2955 return TokError("invalid stepping version"); 2956 2957 if (getLexer().isNot(AsmToken::Comma)) 2958 return TokError("vendor name required, comma expected"); 2959 Lex(); 2960 2961 if (getLexer().isNot(AsmToken::String)) 2962 return TokError("invalid vendor name"); 2963 2964 VendorName = getLexer().getTok().getStringContents(); 2965 Lex(); 2966 2967 if (getLexer().isNot(AsmToken::Comma)) 2968 return TokError("arch name required, comma expected"); 2969 Lex(); 2970 2971 if (getLexer().isNot(AsmToken::String)) 2972 return TokError("invalid arch name"); 2973 2974 ArchName = getLexer().getTok().getStringContents(); 2975 Lex(); 2976 2977 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 2978 VendorName, ArchName); 2979 return false; 2980 } 2981 2982 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 2983 amd_kernel_code_t &Header) { 2984 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 2985 // assembly for backwards compatibility. 2986 if (ID == "max_scratch_backing_memory_byte_size") { 2987 Parser.eatToEndOfStatement(); 2988 return false; 2989 } 2990 2991 SmallString<40> ErrStr; 2992 raw_svector_ostream Err(ErrStr); 2993 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 2994 return TokError(Err.str()); 2995 } 2996 Lex(); 2997 return false; 2998 } 2999 3000 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3001 amd_kernel_code_t Header; 3002 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3003 3004 while (true) { 3005 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3006 // will set the current token to EndOfStatement. 3007 while(getLexer().is(AsmToken::EndOfStatement)) 3008 Lex(); 3009 3010 if (getLexer().isNot(AsmToken::Identifier)) 3011 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3012 3013 StringRef ID = getLexer().getTok().getIdentifier(); 3014 Lex(); 3015 3016 if (ID == ".end_amd_kernel_code_t") 3017 break; 3018 3019 if (ParseAMDKernelCodeTValue(ID, Header)) 3020 return true; 3021 } 3022 3023 getTargetStreamer().EmitAMDKernelCodeT(Header); 3024 3025 return false; 3026 } 3027 3028 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3029 if (getLexer().isNot(AsmToken::Identifier)) 3030 return TokError("expected symbol name"); 3031 3032 StringRef KernelName = Parser.getTok().getString(); 3033 3034 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3035 ELF::STT_AMDGPU_HSA_KERNEL); 3036 Lex(); 3037 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3038 KernelScope.initialize(getContext()); 3039 return false; 3040 } 3041 3042 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3043 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3044 return Error(getParser().getTok().getLoc(), 3045 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3046 "architectures"); 3047 } 3048 3049 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3050 3051 std::string ISAVersionStringFromSTI; 3052 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3053 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3054 3055 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3056 return Error(getParser().getTok().getLoc(), 3057 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3058 "arguments specified through the command line"); 3059 } 3060 3061 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3062 Lex(); 3063 3064 return false; 3065 } 3066 3067 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3068 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3069 return Error(getParser().getTok().getLoc(), 3070 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " 3071 "not available on non-amdhsa OSes")).str()); 3072 } 3073 3074 std::string HSAMetadataString; 3075 raw_string_ostream YamlStream(HSAMetadataString); 3076 3077 getLexer().setSkipSpace(false); 3078 3079 bool FoundEnd = false; 3080 while (!getLexer().is(AsmToken::Eof)) { 3081 while (getLexer().is(AsmToken::Space)) { 3082 YamlStream << getLexer().getTok().getString(); 3083 Lex(); 3084 } 3085 3086 if (getLexer().is(AsmToken::Identifier)) { 3087 StringRef ID = getLexer().getTok().getIdentifier(); 3088 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { 3089 Lex(); 3090 FoundEnd = true; 3091 break; 3092 } 3093 } 3094 3095 YamlStream << Parser.parseStringToEndOfStatement() 3096 << getContext().getAsmInfo()->getSeparatorString(); 3097 3098 Parser.eatToEndOfStatement(); 3099 } 3100 3101 getLexer().setSkipSpace(true); 3102 3103 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3104 return TokError(Twine("expected directive ") + 3105 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3106 } 3107 3108 YamlStream.flush(); 3109 3110 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) 3111 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3112 3113 return false; 3114 } 3115 3116 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3117 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3118 return Error(getParser().getTok().getLoc(), 3119 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3120 "not available on non-amdpal OSes")).str()); 3121 } 3122 3123 PALMD::Metadata PALMetadata; 3124 for (;;) { 3125 uint32_t Value; 3126 if (ParseAsAbsoluteExpression(Value)) { 3127 return TokError(Twine("invalid value in ") + 3128 Twine(PALMD::AssemblerDirective)); 3129 } 3130 PALMetadata.push_back(Value); 3131 if (getLexer().isNot(AsmToken::Comma)) 3132 break; 3133 Lex(); 3134 } 3135 getTargetStreamer().EmitPALMetadata(PALMetadata); 3136 return false; 3137 } 3138 3139 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3140 StringRef IDVal = DirectiveID.getString(); 3141 3142 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3143 if (IDVal == ".amdgcn_target") 3144 return ParseDirectiveAMDGCNTarget(); 3145 3146 if (IDVal == ".amdhsa_kernel") 3147 return ParseDirectiveAMDHSAKernel(); 3148 } else { 3149 if (IDVal == ".hsa_code_object_version") 3150 return ParseDirectiveHSACodeObjectVersion(); 3151 3152 if (IDVal == ".hsa_code_object_isa") 3153 return ParseDirectiveHSACodeObjectISA(); 3154 3155 if (IDVal == ".amd_kernel_code_t") 3156 return ParseDirectiveAMDKernelCodeT(); 3157 3158 if (IDVal == ".amdgpu_hsa_kernel") 3159 return ParseDirectiveAMDGPUHsaKernel(); 3160 3161 if (IDVal == ".amd_amdgpu_isa") 3162 return ParseDirectiveISAVersion(); 3163 } 3164 3165 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3166 return ParseDirectiveHSAMetadata(); 3167 3168 if (IDVal == PALMD::AssemblerDirective) 3169 return ParseDirectivePALMetadata(); 3170 3171 return true; 3172 } 3173 3174 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3175 unsigned RegNo) const { 3176 3177 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3178 R.isValid(); ++R) { 3179 if (*R == RegNo) 3180 return isGFX9(); 3181 } 3182 3183 switch (RegNo) { 3184 case AMDGPU::TBA: 3185 case AMDGPU::TBA_LO: 3186 case AMDGPU::TBA_HI: 3187 case AMDGPU::TMA: 3188 case AMDGPU::TMA_LO: 3189 case AMDGPU::TMA_HI: 3190 return !isGFX9(); 3191 case AMDGPU::XNACK_MASK: 3192 case AMDGPU::XNACK_MASK_LO: 3193 case AMDGPU::XNACK_MASK_HI: 3194 return !isCI() && !isSI() && hasXNACK(); 3195 default: 3196 break; 3197 } 3198 3199 if (isCI()) 3200 return true; 3201 3202 if (isSI()) { 3203 // No flat_scr 3204 switch (RegNo) { 3205 case AMDGPU::FLAT_SCR: 3206 case AMDGPU::FLAT_SCR_LO: 3207 case AMDGPU::FLAT_SCR_HI: 3208 return false; 3209 default: 3210 return true; 3211 } 3212 } 3213 3214 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3215 // SI/CI have. 3216 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3217 R.isValid(); ++R) { 3218 if (*R == RegNo) 3219 return false; 3220 } 3221 3222 return true; 3223 } 3224 3225 OperandMatchResultTy 3226 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3227 // Try to parse with a custom parser 3228 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3229 3230 // If we successfully parsed the operand or if there as an error parsing, 3231 // we are done. 3232 // 3233 // If we are parsing after we reach EndOfStatement then this means we 3234 // are appending default values to the Operands list. This is only done 3235 // by custom parser, so we shouldn't continue on to the generic parsing. 3236 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3237 getLexer().is(AsmToken::EndOfStatement)) 3238 return ResTy; 3239 3240 ResTy = parseRegOrImm(Operands); 3241 3242 if (ResTy == MatchOperand_Success) 3243 return ResTy; 3244 3245 const auto &Tok = Parser.getTok(); 3246 SMLoc S = Tok.getLoc(); 3247 3248 const MCExpr *Expr = nullptr; 3249 if (!Parser.parseExpression(Expr)) { 3250 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3251 return MatchOperand_Success; 3252 } 3253 3254 // Possibly this is an instruction flag like 'gds'. 3255 if (Tok.getKind() == AsmToken::Identifier) { 3256 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3257 Parser.Lex(); 3258 return MatchOperand_Success; 3259 } 3260 3261 return MatchOperand_NoMatch; 3262 } 3263 3264 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3265 // Clear any forced encodings from the previous instruction. 3266 setForcedEncodingSize(0); 3267 setForcedDPP(false); 3268 setForcedSDWA(false); 3269 3270 if (Name.endswith("_e64")) { 3271 setForcedEncodingSize(64); 3272 return Name.substr(0, Name.size() - 4); 3273 } else if (Name.endswith("_e32")) { 3274 setForcedEncodingSize(32); 3275 return Name.substr(0, Name.size() - 4); 3276 } else if (Name.endswith("_dpp")) { 3277 setForcedDPP(true); 3278 return Name.substr(0, Name.size() - 4); 3279 } else if (Name.endswith("_sdwa")) { 3280 setForcedSDWA(true); 3281 return Name.substr(0, Name.size() - 5); 3282 } 3283 return Name; 3284 } 3285 3286 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3287 StringRef Name, 3288 SMLoc NameLoc, OperandVector &Operands) { 3289 // Add the instruction mnemonic 3290 Name = parseMnemonicSuffix(Name); 3291 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3292 3293 while (!getLexer().is(AsmToken::EndOfStatement)) { 3294 OperandMatchResultTy Res = parseOperand(Operands, Name); 3295 3296 // Eat the comma or space if there is one. 3297 if (getLexer().is(AsmToken::Comma)) 3298 Parser.Lex(); 3299 3300 switch (Res) { 3301 case MatchOperand_Success: break; 3302 case MatchOperand_ParseFail: 3303 Error(getLexer().getLoc(), "failed parsing operand."); 3304 while (!getLexer().is(AsmToken::EndOfStatement)) { 3305 Parser.Lex(); 3306 } 3307 return true; 3308 case MatchOperand_NoMatch: 3309 Error(getLexer().getLoc(), "not a valid operand."); 3310 while (!getLexer().is(AsmToken::EndOfStatement)) { 3311 Parser.Lex(); 3312 } 3313 return true; 3314 } 3315 } 3316 3317 return false; 3318 } 3319 3320 //===----------------------------------------------------------------------===// 3321 // Utility functions 3322 //===----------------------------------------------------------------------===// 3323 3324 OperandMatchResultTy 3325 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3326 switch(getLexer().getKind()) { 3327 default: return MatchOperand_NoMatch; 3328 case AsmToken::Identifier: { 3329 StringRef Name = Parser.getTok().getString(); 3330 if (!Name.equals(Prefix)) { 3331 return MatchOperand_NoMatch; 3332 } 3333 3334 Parser.Lex(); 3335 if (getLexer().isNot(AsmToken::Colon)) 3336 return MatchOperand_ParseFail; 3337 3338 Parser.Lex(); 3339 3340 bool IsMinus = false; 3341 if (getLexer().getKind() == AsmToken::Minus) { 3342 Parser.Lex(); 3343 IsMinus = true; 3344 } 3345 3346 if (getLexer().isNot(AsmToken::Integer)) 3347 return MatchOperand_ParseFail; 3348 3349 if (getParser().parseAbsoluteExpression(Int)) 3350 return MatchOperand_ParseFail; 3351 3352 if (IsMinus) 3353 Int = -Int; 3354 break; 3355 } 3356 } 3357 return MatchOperand_Success; 3358 } 3359 3360 OperandMatchResultTy 3361 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3362 AMDGPUOperand::ImmTy ImmTy, 3363 bool (*ConvertResult)(int64_t&)) { 3364 SMLoc S = Parser.getTok().getLoc(); 3365 int64_t Value = 0; 3366 3367 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3368 if (Res != MatchOperand_Success) 3369 return Res; 3370 3371 if (ConvertResult && !ConvertResult(Value)) { 3372 return MatchOperand_ParseFail; 3373 } 3374 3375 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3376 return MatchOperand_Success; 3377 } 3378 3379 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3380 const char *Prefix, 3381 OperandVector &Operands, 3382 AMDGPUOperand::ImmTy ImmTy, 3383 bool (*ConvertResult)(int64_t&)) { 3384 StringRef Name = Parser.getTok().getString(); 3385 if (!Name.equals(Prefix)) 3386 return MatchOperand_NoMatch; 3387 3388 Parser.Lex(); 3389 if (getLexer().isNot(AsmToken::Colon)) 3390 return MatchOperand_ParseFail; 3391 3392 Parser.Lex(); 3393 if (getLexer().isNot(AsmToken::LBrac)) 3394 return MatchOperand_ParseFail; 3395 Parser.Lex(); 3396 3397 unsigned Val = 0; 3398 SMLoc S = Parser.getTok().getLoc(); 3399 3400 // FIXME: How to verify the number of elements matches the number of src 3401 // operands? 3402 for (int I = 0; I < 4; ++I) { 3403 if (I != 0) { 3404 if (getLexer().is(AsmToken::RBrac)) 3405 break; 3406 3407 if (getLexer().isNot(AsmToken::Comma)) 3408 return MatchOperand_ParseFail; 3409 Parser.Lex(); 3410 } 3411 3412 if (getLexer().isNot(AsmToken::Integer)) 3413 return MatchOperand_ParseFail; 3414 3415 int64_t Op; 3416 if (getParser().parseAbsoluteExpression(Op)) 3417 return MatchOperand_ParseFail; 3418 3419 if (Op != 0 && Op != 1) 3420 return MatchOperand_ParseFail; 3421 Val |= (Op << I); 3422 } 3423 3424 Parser.Lex(); 3425 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3426 return MatchOperand_Success; 3427 } 3428 3429 OperandMatchResultTy 3430 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3431 AMDGPUOperand::ImmTy ImmTy) { 3432 int64_t Bit = 0; 3433 SMLoc S = Parser.getTok().getLoc(); 3434 3435 // We are at the end of the statement, and this is a default argument, so 3436 // use a default value. 3437 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3438 switch(getLexer().getKind()) { 3439 case AsmToken::Identifier: { 3440 StringRef Tok = Parser.getTok().getString(); 3441 if (Tok == Name) { 3442 if (Tok == "r128" && isGFX9()) 3443 Error(S, "r128 modifier is not supported on this GPU"); 3444 if (Tok == "a16" && !isGFX9()) 3445 Error(S, "a16 modifier is not supported on this GPU"); 3446 Bit = 1; 3447 Parser.Lex(); 3448 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3449 Bit = 0; 3450 Parser.Lex(); 3451 } else { 3452 return MatchOperand_NoMatch; 3453 } 3454 break; 3455 } 3456 default: 3457 return MatchOperand_NoMatch; 3458 } 3459 } 3460 3461 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3462 return MatchOperand_Success; 3463 } 3464 3465 static void addOptionalImmOperand( 3466 MCInst& Inst, const OperandVector& Operands, 3467 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3468 AMDGPUOperand::ImmTy ImmT, 3469 int64_t Default = 0) { 3470 auto i = OptionalIdx.find(ImmT); 3471 if (i != OptionalIdx.end()) { 3472 unsigned Idx = i->second; 3473 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3474 } else { 3475 Inst.addOperand(MCOperand::createImm(Default)); 3476 } 3477 } 3478 3479 OperandMatchResultTy 3480 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3481 if (getLexer().isNot(AsmToken::Identifier)) { 3482 return MatchOperand_NoMatch; 3483 } 3484 StringRef Tok = Parser.getTok().getString(); 3485 if (Tok != Prefix) { 3486 return MatchOperand_NoMatch; 3487 } 3488 3489 Parser.Lex(); 3490 if (getLexer().isNot(AsmToken::Colon)) { 3491 return MatchOperand_ParseFail; 3492 } 3493 3494 Parser.Lex(); 3495 if (getLexer().isNot(AsmToken::Identifier)) { 3496 return MatchOperand_ParseFail; 3497 } 3498 3499 Value = Parser.getTok().getString(); 3500 return MatchOperand_Success; 3501 } 3502 3503 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3504 // values to live in a joint format operand in the MCInst encoding. 3505 OperandMatchResultTy 3506 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3507 SMLoc S = Parser.getTok().getLoc(); 3508 int64_t Dfmt = 0, Nfmt = 0; 3509 // dfmt and nfmt can appear in either order, and each is optional. 3510 bool GotDfmt = false, GotNfmt = false; 3511 while (!GotDfmt || !GotNfmt) { 3512 if (!GotDfmt) { 3513 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3514 if (Res != MatchOperand_NoMatch) { 3515 if (Res != MatchOperand_Success) 3516 return Res; 3517 if (Dfmt >= 16) { 3518 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3519 return MatchOperand_ParseFail; 3520 } 3521 GotDfmt = true; 3522 Parser.Lex(); 3523 continue; 3524 } 3525 } 3526 if (!GotNfmt) { 3527 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3528 if (Res != MatchOperand_NoMatch) { 3529 if (Res != MatchOperand_Success) 3530 return Res; 3531 if (Nfmt >= 8) { 3532 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3533 return MatchOperand_ParseFail; 3534 } 3535 GotNfmt = true; 3536 Parser.Lex(); 3537 continue; 3538 } 3539 } 3540 break; 3541 } 3542 if (!GotDfmt && !GotNfmt) 3543 return MatchOperand_NoMatch; 3544 auto Format = Dfmt | Nfmt << 4; 3545 Operands.push_back( 3546 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3547 return MatchOperand_Success; 3548 } 3549 3550 //===----------------------------------------------------------------------===// 3551 // ds 3552 //===----------------------------------------------------------------------===// 3553 3554 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3555 const OperandVector &Operands) { 3556 OptionalImmIndexMap OptionalIdx; 3557 3558 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3559 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3560 3561 // Add the register arguments 3562 if (Op.isReg()) { 3563 Op.addRegOperands(Inst, 1); 3564 continue; 3565 } 3566 3567 // Handle optional arguments 3568 OptionalIdx[Op.getImmTy()] = i; 3569 } 3570 3571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3574 3575 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3576 } 3577 3578 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3579 bool IsGdsHardcoded) { 3580 OptionalImmIndexMap OptionalIdx; 3581 3582 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3583 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3584 3585 // Add the register arguments 3586 if (Op.isReg()) { 3587 Op.addRegOperands(Inst, 1); 3588 continue; 3589 } 3590 3591 if (Op.isToken() && Op.getToken() == "gds") { 3592 IsGdsHardcoded = true; 3593 continue; 3594 } 3595 3596 // Handle optional arguments 3597 OptionalIdx[Op.getImmTy()] = i; 3598 } 3599 3600 AMDGPUOperand::ImmTy OffsetType = 3601 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3602 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3603 AMDGPUOperand::ImmTyOffset; 3604 3605 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3606 3607 if (!IsGdsHardcoded) { 3608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3609 } 3610 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3611 } 3612 3613 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3614 OptionalImmIndexMap OptionalIdx; 3615 3616 unsigned OperandIdx[4]; 3617 unsigned EnMask = 0; 3618 int SrcIdx = 0; 3619 3620 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3621 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3622 3623 // Add the register arguments 3624 if (Op.isReg()) { 3625 assert(SrcIdx < 4); 3626 OperandIdx[SrcIdx] = Inst.size(); 3627 Op.addRegOperands(Inst, 1); 3628 ++SrcIdx; 3629 continue; 3630 } 3631 3632 if (Op.isOff()) { 3633 assert(SrcIdx < 4); 3634 OperandIdx[SrcIdx] = Inst.size(); 3635 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3636 ++SrcIdx; 3637 continue; 3638 } 3639 3640 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3641 Op.addImmOperands(Inst, 1); 3642 continue; 3643 } 3644 3645 if (Op.isToken() && Op.getToken() == "done") 3646 continue; 3647 3648 // Handle optional arguments 3649 OptionalIdx[Op.getImmTy()] = i; 3650 } 3651 3652 assert(SrcIdx == 4); 3653 3654 bool Compr = false; 3655 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3656 Compr = true; 3657 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3658 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3659 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3660 } 3661 3662 for (auto i = 0; i < SrcIdx; ++i) { 3663 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3664 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3665 } 3666 } 3667 3668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3670 3671 Inst.addOperand(MCOperand::createImm(EnMask)); 3672 } 3673 3674 //===----------------------------------------------------------------------===// 3675 // s_waitcnt 3676 //===----------------------------------------------------------------------===// 3677 3678 static bool 3679 encodeCnt( 3680 const AMDGPU::IsaVersion ISA, 3681 int64_t &IntVal, 3682 int64_t CntVal, 3683 bool Saturate, 3684 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3685 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3686 { 3687 bool Failed = false; 3688 3689 IntVal = encode(ISA, IntVal, CntVal); 3690 if (CntVal != decode(ISA, IntVal)) { 3691 if (Saturate) { 3692 IntVal = encode(ISA, IntVal, -1); 3693 } else { 3694 Failed = true; 3695 } 3696 } 3697 return Failed; 3698 } 3699 3700 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3701 StringRef CntName = Parser.getTok().getString(); 3702 int64_t CntVal; 3703 3704 Parser.Lex(); 3705 if (getLexer().isNot(AsmToken::LParen)) 3706 return true; 3707 3708 Parser.Lex(); 3709 if (getLexer().isNot(AsmToken::Integer)) 3710 return true; 3711 3712 SMLoc ValLoc = Parser.getTok().getLoc(); 3713 if (getParser().parseAbsoluteExpression(CntVal)) 3714 return true; 3715 3716 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3717 3718 bool Failed = true; 3719 bool Sat = CntName.endswith("_sat"); 3720 3721 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3722 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3723 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3724 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3725 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3726 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3727 } 3728 3729 if (Failed) { 3730 Error(ValLoc, "too large value for " + CntName); 3731 return true; 3732 } 3733 3734 if (getLexer().isNot(AsmToken::RParen)) { 3735 return true; 3736 } 3737 3738 Parser.Lex(); 3739 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3740 const AsmToken NextToken = getLexer().peekTok(); 3741 if (NextToken.is(AsmToken::Identifier)) { 3742 Parser.Lex(); 3743 } 3744 } 3745 3746 return false; 3747 } 3748 3749 OperandMatchResultTy 3750 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3751 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3752 int64_t Waitcnt = getWaitcntBitMask(ISA); 3753 SMLoc S = Parser.getTok().getLoc(); 3754 3755 switch(getLexer().getKind()) { 3756 default: return MatchOperand_ParseFail; 3757 case AsmToken::Integer: 3758 // The operand can be an integer value. 3759 if (getParser().parseAbsoluteExpression(Waitcnt)) 3760 return MatchOperand_ParseFail; 3761 break; 3762 3763 case AsmToken::Identifier: 3764 do { 3765 if (parseCnt(Waitcnt)) 3766 return MatchOperand_ParseFail; 3767 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3768 break; 3769 } 3770 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3771 return MatchOperand_Success; 3772 } 3773 3774 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3775 int64_t &Width) { 3776 using namespace llvm::AMDGPU::Hwreg; 3777 3778 if (Parser.getTok().getString() != "hwreg") 3779 return true; 3780 Parser.Lex(); 3781 3782 if (getLexer().isNot(AsmToken::LParen)) 3783 return true; 3784 Parser.Lex(); 3785 3786 if (getLexer().is(AsmToken::Identifier)) { 3787 HwReg.IsSymbolic = true; 3788 HwReg.Id = ID_UNKNOWN_; 3789 const StringRef tok = Parser.getTok().getString(); 3790 int Last = ID_SYMBOLIC_LAST_; 3791 if (isSI() || isCI() || isVI()) 3792 Last = ID_SYMBOLIC_FIRST_GFX9_; 3793 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3794 if (tok == IdSymbolic[i]) { 3795 HwReg.Id = i; 3796 break; 3797 } 3798 } 3799 Parser.Lex(); 3800 } else { 3801 HwReg.IsSymbolic = false; 3802 if (getLexer().isNot(AsmToken::Integer)) 3803 return true; 3804 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3805 return true; 3806 } 3807 3808 if (getLexer().is(AsmToken::RParen)) { 3809 Parser.Lex(); 3810 return false; 3811 } 3812 3813 // optional params 3814 if (getLexer().isNot(AsmToken::Comma)) 3815 return true; 3816 Parser.Lex(); 3817 3818 if (getLexer().isNot(AsmToken::Integer)) 3819 return true; 3820 if (getParser().parseAbsoluteExpression(Offset)) 3821 return true; 3822 3823 if (getLexer().isNot(AsmToken::Comma)) 3824 return true; 3825 Parser.Lex(); 3826 3827 if (getLexer().isNot(AsmToken::Integer)) 3828 return true; 3829 if (getParser().parseAbsoluteExpression(Width)) 3830 return true; 3831 3832 if (getLexer().isNot(AsmToken::RParen)) 3833 return true; 3834 Parser.Lex(); 3835 3836 return false; 3837 } 3838 3839 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3840 using namespace llvm::AMDGPU::Hwreg; 3841 3842 int64_t Imm16Val = 0; 3843 SMLoc S = Parser.getTok().getLoc(); 3844 3845 switch(getLexer().getKind()) { 3846 default: return MatchOperand_NoMatch; 3847 case AsmToken::Integer: 3848 // The operand can be an integer value. 3849 if (getParser().parseAbsoluteExpression(Imm16Val)) 3850 return MatchOperand_NoMatch; 3851 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 3852 Error(S, "invalid immediate: only 16-bit values are legal"); 3853 // Do not return error code, but create an imm operand anyway and proceed 3854 // to the next operand, if any. That avoids unneccessary error messages. 3855 } 3856 break; 3857 3858 case AsmToken::Identifier: { 3859 OperandInfoTy HwReg(ID_UNKNOWN_); 3860 int64_t Offset = OFFSET_DEFAULT_; 3861 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 3862 if (parseHwregConstruct(HwReg, Offset, Width)) 3863 return MatchOperand_ParseFail; 3864 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 3865 if (HwReg.IsSymbolic) 3866 Error(S, "invalid symbolic name of hardware register"); 3867 else 3868 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 3869 } 3870 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 3871 Error(S, "invalid bit offset: only 5-bit values are legal"); 3872 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 3873 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 3874 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 3875 } 3876 break; 3877 } 3878 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 3879 return MatchOperand_Success; 3880 } 3881 3882 bool AMDGPUOperand::isSWaitCnt() const { 3883 return isImm(); 3884 } 3885 3886 bool AMDGPUOperand::isHwreg() const { 3887 return isImmTy(ImmTyHwreg); 3888 } 3889 3890 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 3891 using namespace llvm::AMDGPU::SendMsg; 3892 3893 if (Parser.getTok().getString() != "sendmsg") 3894 return true; 3895 Parser.Lex(); 3896 3897 if (getLexer().isNot(AsmToken::LParen)) 3898 return true; 3899 Parser.Lex(); 3900 3901 if (getLexer().is(AsmToken::Identifier)) { 3902 Msg.IsSymbolic = true; 3903 Msg.Id = ID_UNKNOWN_; 3904 const std::string tok = Parser.getTok().getString(); 3905 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 3906 switch(i) { 3907 default: continue; // Omit gaps. 3908 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 3909 } 3910 if (tok == IdSymbolic[i]) { 3911 Msg.Id = i; 3912 break; 3913 } 3914 } 3915 Parser.Lex(); 3916 } else { 3917 Msg.IsSymbolic = false; 3918 if (getLexer().isNot(AsmToken::Integer)) 3919 return true; 3920 if (getParser().parseAbsoluteExpression(Msg.Id)) 3921 return true; 3922 if (getLexer().is(AsmToken::Integer)) 3923 if (getParser().parseAbsoluteExpression(Msg.Id)) 3924 Msg.Id = ID_UNKNOWN_; 3925 } 3926 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 3927 return false; 3928 3929 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 3930 if (getLexer().isNot(AsmToken::RParen)) 3931 return true; 3932 Parser.Lex(); 3933 return false; 3934 } 3935 3936 if (getLexer().isNot(AsmToken::Comma)) 3937 return true; 3938 Parser.Lex(); 3939 3940 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 3941 Operation.Id = ID_UNKNOWN_; 3942 if (getLexer().is(AsmToken::Identifier)) { 3943 Operation.IsSymbolic = true; 3944 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 3945 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 3946 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 3947 const StringRef Tok = Parser.getTok().getString(); 3948 for (int i = F; i < L; ++i) { 3949 if (Tok == S[i]) { 3950 Operation.Id = i; 3951 break; 3952 } 3953 } 3954 Parser.Lex(); 3955 } else { 3956 Operation.IsSymbolic = false; 3957 if (getLexer().isNot(AsmToken::Integer)) 3958 return true; 3959 if (getParser().parseAbsoluteExpression(Operation.Id)) 3960 return true; 3961 } 3962 3963 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 3964 // Stream id is optional. 3965 if (getLexer().is(AsmToken::RParen)) { 3966 Parser.Lex(); 3967 return false; 3968 } 3969 3970 if (getLexer().isNot(AsmToken::Comma)) 3971 return true; 3972 Parser.Lex(); 3973 3974 if (getLexer().isNot(AsmToken::Integer)) 3975 return true; 3976 if (getParser().parseAbsoluteExpression(StreamId)) 3977 return true; 3978 } 3979 3980 if (getLexer().isNot(AsmToken::RParen)) 3981 return true; 3982 Parser.Lex(); 3983 return false; 3984 } 3985 3986 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 3987 if (getLexer().getKind() != AsmToken::Identifier) 3988 return MatchOperand_NoMatch; 3989 3990 StringRef Str = Parser.getTok().getString(); 3991 int Slot = StringSwitch<int>(Str) 3992 .Case("p10", 0) 3993 .Case("p20", 1) 3994 .Case("p0", 2) 3995 .Default(-1); 3996 3997 SMLoc S = Parser.getTok().getLoc(); 3998 if (Slot == -1) 3999 return MatchOperand_ParseFail; 4000 4001 Parser.Lex(); 4002 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4003 AMDGPUOperand::ImmTyInterpSlot)); 4004 return MatchOperand_Success; 4005 } 4006 4007 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4008 if (getLexer().getKind() != AsmToken::Identifier) 4009 return MatchOperand_NoMatch; 4010 4011 StringRef Str = Parser.getTok().getString(); 4012 if (!Str.startswith("attr")) 4013 return MatchOperand_NoMatch; 4014 4015 StringRef Chan = Str.take_back(2); 4016 int AttrChan = StringSwitch<int>(Chan) 4017 .Case(".x", 0) 4018 .Case(".y", 1) 4019 .Case(".z", 2) 4020 .Case(".w", 3) 4021 .Default(-1); 4022 if (AttrChan == -1) 4023 return MatchOperand_ParseFail; 4024 4025 Str = Str.drop_back(2).drop_front(4); 4026 4027 uint8_t Attr; 4028 if (Str.getAsInteger(10, Attr)) 4029 return MatchOperand_ParseFail; 4030 4031 SMLoc S = Parser.getTok().getLoc(); 4032 Parser.Lex(); 4033 if (Attr > 63) { 4034 Error(S, "out of bounds attr"); 4035 return MatchOperand_Success; 4036 } 4037 4038 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4039 4040 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4041 AMDGPUOperand::ImmTyInterpAttr)); 4042 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4043 AMDGPUOperand::ImmTyAttrChan)); 4044 return MatchOperand_Success; 4045 } 4046 4047 void AMDGPUAsmParser::errorExpTgt() { 4048 Error(Parser.getTok().getLoc(), "invalid exp target"); 4049 } 4050 4051 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4052 uint8_t &Val) { 4053 if (Str == "null") { 4054 Val = 9; 4055 return MatchOperand_Success; 4056 } 4057 4058 if (Str.startswith("mrt")) { 4059 Str = Str.drop_front(3); 4060 if (Str == "z") { // == mrtz 4061 Val = 8; 4062 return MatchOperand_Success; 4063 } 4064 4065 if (Str.getAsInteger(10, Val)) 4066 return MatchOperand_ParseFail; 4067 4068 if (Val > 7) 4069 errorExpTgt(); 4070 4071 return MatchOperand_Success; 4072 } 4073 4074 if (Str.startswith("pos")) { 4075 Str = Str.drop_front(3); 4076 if (Str.getAsInteger(10, Val)) 4077 return MatchOperand_ParseFail; 4078 4079 if (Val > 3) 4080 errorExpTgt(); 4081 4082 Val += 12; 4083 return MatchOperand_Success; 4084 } 4085 4086 if (Str.startswith("param")) { 4087 Str = Str.drop_front(5); 4088 if (Str.getAsInteger(10, Val)) 4089 return MatchOperand_ParseFail; 4090 4091 if (Val >= 32) 4092 errorExpTgt(); 4093 4094 Val += 32; 4095 return MatchOperand_Success; 4096 } 4097 4098 if (Str.startswith("invalid_target_")) { 4099 Str = Str.drop_front(15); 4100 if (Str.getAsInteger(10, Val)) 4101 return MatchOperand_ParseFail; 4102 4103 errorExpTgt(); 4104 return MatchOperand_Success; 4105 } 4106 4107 return MatchOperand_NoMatch; 4108 } 4109 4110 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4111 uint8_t Val; 4112 StringRef Str = Parser.getTok().getString(); 4113 4114 auto Res = parseExpTgtImpl(Str, Val); 4115 if (Res != MatchOperand_Success) 4116 return Res; 4117 4118 SMLoc S = Parser.getTok().getLoc(); 4119 Parser.Lex(); 4120 4121 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4122 AMDGPUOperand::ImmTyExpTgt)); 4123 return MatchOperand_Success; 4124 } 4125 4126 OperandMatchResultTy 4127 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4128 using namespace llvm::AMDGPU::SendMsg; 4129 4130 int64_t Imm16Val = 0; 4131 SMLoc S = Parser.getTok().getLoc(); 4132 4133 switch(getLexer().getKind()) { 4134 default: 4135 return MatchOperand_NoMatch; 4136 case AsmToken::Integer: 4137 // The operand can be an integer value. 4138 if (getParser().parseAbsoluteExpression(Imm16Val)) 4139 return MatchOperand_NoMatch; 4140 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4141 Error(S, "invalid immediate: only 16-bit values are legal"); 4142 // Do not return error code, but create an imm operand anyway and proceed 4143 // to the next operand, if any. That avoids unneccessary error messages. 4144 } 4145 break; 4146 case AsmToken::Identifier: { 4147 OperandInfoTy Msg(ID_UNKNOWN_); 4148 OperandInfoTy Operation(OP_UNKNOWN_); 4149 int64_t StreamId = STREAM_ID_DEFAULT_; 4150 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4151 return MatchOperand_ParseFail; 4152 do { 4153 // Validate and encode message ID. 4154 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4155 || Msg.Id == ID_SYSMSG)) { 4156 if (Msg.IsSymbolic) 4157 Error(S, "invalid/unsupported symbolic name of message"); 4158 else 4159 Error(S, "invalid/unsupported code of message"); 4160 break; 4161 } 4162 Imm16Val = (Msg.Id << ID_SHIFT_); 4163 // Validate and encode operation ID. 4164 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4165 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4166 if (Operation.IsSymbolic) 4167 Error(S, "invalid symbolic name of GS_OP"); 4168 else 4169 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4170 break; 4171 } 4172 if (Operation.Id == OP_GS_NOP 4173 && Msg.Id != ID_GS_DONE) { 4174 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4175 break; 4176 } 4177 Imm16Val |= (Operation.Id << OP_SHIFT_); 4178 } 4179 if (Msg.Id == ID_SYSMSG) { 4180 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4181 if (Operation.IsSymbolic) 4182 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4183 else 4184 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4185 break; 4186 } 4187 Imm16Val |= (Operation.Id << OP_SHIFT_); 4188 } 4189 // Validate and encode stream ID. 4190 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4191 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4192 Error(S, "invalid stream id: only 2-bit values are legal"); 4193 break; 4194 } 4195 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4196 } 4197 } while (false); 4198 } 4199 break; 4200 } 4201 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4202 return MatchOperand_Success; 4203 } 4204 4205 bool AMDGPUOperand::isSendMsg() const { 4206 return isImmTy(ImmTySendMsg); 4207 } 4208 4209 //===----------------------------------------------------------------------===// 4210 // parser helpers 4211 //===----------------------------------------------------------------------===// 4212 4213 bool 4214 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4215 if (getLexer().getKind() == AsmToken::Identifier && 4216 Parser.getTok().getString() == Id) { 4217 Parser.Lex(); 4218 return true; 4219 } 4220 return false; 4221 } 4222 4223 bool 4224 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4225 if (getLexer().getKind() == Kind) { 4226 Parser.Lex(); 4227 return true; 4228 } 4229 return false; 4230 } 4231 4232 bool 4233 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4234 const StringRef ErrMsg) { 4235 if (!trySkipToken(Kind)) { 4236 Error(Parser.getTok().getLoc(), ErrMsg); 4237 return false; 4238 } 4239 return true; 4240 } 4241 4242 bool 4243 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4244 return !getParser().parseAbsoluteExpression(Imm); 4245 } 4246 4247 bool 4248 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4249 SMLoc S = Parser.getTok().getLoc(); 4250 if (getLexer().getKind() == AsmToken::String) { 4251 Val = Parser.getTok().getStringContents(); 4252 Parser.Lex(); 4253 return true; 4254 } else { 4255 Error(S, ErrMsg); 4256 return false; 4257 } 4258 } 4259 4260 //===----------------------------------------------------------------------===// 4261 // swizzle 4262 //===----------------------------------------------------------------------===// 4263 4264 LLVM_READNONE 4265 static unsigned 4266 encodeBitmaskPerm(const unsigned AndMask, 4267 const unsigned OrMask, 4268 const unsigned XorMask) { 4269 using namespace llvm::AMDGPU::Swizzle; 4270 4271 return BITMASK_PERM_ENC | 4272 (AndMask << BITMASK_AND_SHIFT) | 4273 (OrMask << BITMASK_OR_SHIFT) | 4274 (XorMask << BITMASK_XOR_SHIFT); 4275 } 4276 4277 bool 4278 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4279 const unsigned MinVal, 4280 const unsigned MaxVal, 4281 const StringRef ErrMsg) { 4282 for (unsigned i = 0; i < OpNum; ++i) { 4283 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4284 return false; 4285 } 4286 SMLoc ExprLoc = Parser.getTok().getLoc(); 4287 if (!parseExpr(Op[i])) { 4288 return false; 4289 } 4290 if (Op[i] < MinVal || Op[i] > MaxVal) { 4291 Error(ExprLoc, ErrMsg); 4292 return false; 4293 } 4294 } 4295 4296 return true; 4297 } 4298 4299 bool 4300 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4301 using namespace llvm::AMDGPU::Swizzle; 4302 4303 int64_t Lane[LANE_NUM]; 4304 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4305 "expected a 2-bit lane id")) { 4306 Imm = QUAD_PERM_ENC; 4307 for (auto i = 0; i < LANE_NUM; ++i) { 4308 Imm |= Lane[i] << (LANE_SHIFT * i); 4309 } 4310 return true; 4311 } 4312 return false; 4313 } 4314 4315 bool 4316 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4317 using namespace llvm::AMDGPU::Swizzle; 4318 4319 SMLoc S = Parser.getTok().getLoc(); 4320 int64_t GroupSize; 4321 int64_t LaneIdx; 4322 4323 if (!parseSwizzleOperands(1, &GroupSize, 4324 2, 32, 4325 "group size must be in the interval [2,32]")) { 4326 return false; 4327 } 4328 if (!isPowerOf2_64(GroupSize)) { 4329 Error(S, "group size must be a power of two"); 4330 return false; 4331 } 4332 if (parseSwizzleOperands(1, &LaneIdx, 4333 0, GroupSize - 1, 4334 "lane id must be in the interval [0,group size - 1]")) { 4335 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4336 return true; 4337 } 4338 return false; 4339 } 4340 4341 bool 4342 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4343 using namespace llvm::AMDGPU::Swizzle; 4344 4345 SMLoc S = Parser.getTok().getLoc(); 4346 int64_t GroupSize; 4347 4348 if (!parseSwizzleOperands(1, &GroupSize, 4349 2, 32, "group size must be in the interval [2,32]")) { 4350 return false; 4351 } 4352 if (!isPowerOf2_64(GroupSize)) { 4353 Error(S, "group size must be a power of two"); 4354 return false; 4355 } 4356 4357 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4358 return true; 4359 } 4360 4361 bool 4362 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4363 using namespace llvm::AMDGPU::Swizzle; 4364 4365 SMLoc S = Parser.getTok().getLoc(); 4366 int64_t GroupSize; 4367 4368 if (!parseSwizzleOperands(1, &GroupSize, 4369 1, 16, "group size must be in the interval [1,16]")) { 4370 return false; 4371 } 4372 if (!isPowerOf2_64(GroupSize)) { 4373 Error(S, "group size must be a power of two"); 4374 return false; 4375 } 4376 4377 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4378 return true; 4379 } 4380 4381 bool 4382 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4383 using namespace llvm::AMDGPU::Swizzle; 4384 4385 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4386 return false; 4387 } 4388 4389 StringRef Ctl; 4390 SMLoc StrLoc = Parser.getTok().getLoc(); 4391 if (!parseString(Ctl)) { 4392 return false; 4393 } 4394 if (Ctl.size() != BITMASK_WIDTH) { 4395 Error(StrLoc, "expected a 5-character mask"); 4396 return false; 4397 } 4398 4399 unsigned AndMask = 0; 4400 unsigned OrMask = 0; 4401 unsigned XorMask = 0; 4402 4403 for (size_t i = 0; i < Ctl.size(); ++i) { 4404 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4405 switch(Ctl[i]) { 4406 default: 4407 Error(StrLoc, "invalid mask"); 4408 return false; 4409 case '0': 4410 break; 4411 case '1': 4412 OrMask |= Mask; 4413 break; 4414 case 'p': 4415 AndMask |= Mask; 4416 break; 4417 case 'i': 4418 AndMask |= Mask; 4419 XorMask |= Mask; 4420 break; 4421 } 4422 } 4423 4424 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4425 return true; 4426 } 4427 4428 bool 4429 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4430 4431 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4432 4433 if (!parseExpr(Imm)) { 4434 return false; 4435 } 4436 if (!isUInt<16>(Imm)) { 4437 Error(OffsetLoc, "expected a 16-bit offset"); 4438 return false; 4439 } 4440 return true; 4441 } 4442 4443 bool 4444 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4445 using namespace llvm::AMDGPU::Swizzle; 4446 4447 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4448 4449 SMLoc ModeLoc = Parser.getTok().getLoc(); 4450 bool Ok = false; 4451 4452 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4453 Ok = parseSwizzleQuadPerm(Imm); 4454 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4455 Ok = parseSwizzleBitmaskPerm(Imm); 4456 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4457 Ok = parseSwizzleBroadcast(Imm); 4458 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4459 Ok = parseSwizzleSwap(Imm); 4460 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4461 Ok = parseSwizzleReverse(Imm); 4462 } else { 4463 Error(ModeLoc, "expected a swizzle mode"); 4464 } 4465 4466 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4467 } 4468 4469 return false; 4470 } 4471 4472 OperandMatchResultTy 4473 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4474 SMLoc S = Parser.getTok().getLoc(); 4475 int64_t Imm = 0; 4476 4477 if (trySkipId("offset")) { 4478 4479 bool Ok = false; 4480 if (skipToken(AsmToken::Colon, "expected a colon")) { 4481 if (trySkipId("swizzle")) { 4482 Ok = parseSwizzleMacro(Imm); 4483 } else { 4484 Ok = parseSwizzleOffset(Imm); 4485 } 4486 } 4487 4488 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4489 4490 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4491 } else { 4492 // Swizzle "offset" operand is optional. 4493 // If it is omitted, try parsing other optional operands. 4494 return parseOptionalOpr(Operands); 4495 } 4496 } 4497 4498 bool 4499 AMDGPUOperand::isSwizzle() const { 4500 return isImmTy(ImmTySwizzle); 4501 } 4502 4503 //===----------------------------------------------------------------------===// 4504 // sopp branch targets 4505 //===----------------------------------------------------------------------===// 4506 4507 OperandMatchResultTy 4508 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4509 SMLoc S = Parser.getTok().getLoc(); 4510 4511 switch (getLexer().getKind()) { 4512 default: return MatchOperand_ParseFail; 4513 case AsmToken::Integer: { 4514 int64_t Imm; 4515 if (getParser().parseAbsoluteExpression(Imm)) 4516 return MatchOperand_ParseFail; 4517 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4518 return MatchOperand_Success; 4519 } 4520 4521 case AsmToken::Identifier: 4522 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4523 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4524 Parser.getTok().getString()), getContext()), S)); 4525 Parser.Lex(); 4526 return MatchOperand_Success; 4527 } 4528 } 4529 4530 //===----------------------------------------------------------------------===// 4531 // mubuf 4532 //===----------------------------------------------------------------------===// 4533 4534 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4535 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4536 } 4537 4538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4539 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4540 } 4541 4542 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4543 const OperandVector &Operands, 4544 bool IsAtomic, 4545 bool IsAtomicReturn, 4546 bool IsLds) { 4547 bool IsLdsOpcode = IsLds; 4548 bool HasLdsModifier = false; 4549 OptionalImmIndexMap OptionalIdx; 4550 assert(IsAtomicReturn ? IsAtomic : true); 4551 4552 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4553 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4554 4555 // Add the register arguments 4556 if (Op.isReg()) { 4557 Op.addRegOperands(Inst, 1); 4558 continue; 4559 } 4560 4561 // Handle the case where soffset is an immediate 4562 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4563 Op.addImmOperands(Inst, 1); 4564 continue; 4565 } 4566 4567 HasLdsModifier = Op.isLDS(); 4568 4569 // Handle tokens like 'offen' which are sometimes hard-coded into the 4570 // asm string. There are no MCInst operands for these. 4571 if (Op.isToken()) { 4572 continue; 4573 } 4574 assert(Op.isImm()); 4575 4576 // Handle optional arguments 4577 OptionalIdx[Op.getImmTy()] = i; 4578 } 4579 4580 // This is a workaround for an llvm quirk which may result in an 4581 // incorrect instruction selection. Lds and non-lds versions of 4582 // MUBUF instructions are identical except that lds versions 4583 // have mandatory 'lds' modifier. However this modifier follows 4584 // optional modifiers and llvm asm matcher regards this 'lds' 4585 // modifier as an optional one. As a result, an lds version 4586 // of opcode may be selected even if it has no 'lds' modifier. 4587 if (IsLdsOpcode && !HasLdsModifier) { 4588 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4589 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4590 Inst.setOpcode(NoLdsOpcode); 4591 IsLdsOpcode = false; 4592 } 4593 } 4594 4595 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4596 if (IsAtomicReturn) { 4597 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4598 Inst.insert(I, *I); 4599 } 4600 4601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4602 if (!IsAtomic) { // glc is hard-coded. 4603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4604 } 4605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4606 4607 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4609 } 4610 } 4611 4612 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4613 OptionalImmIndexMap OptionalIdx; 4614 4615 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4616 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4617 4618 // Add the register arguments 4619 if (Op.isReg()) { 4620 Op.addRegOperands(Inst, 1); 4621 continue; 4622 } 4623 4624 // Handle the case where soffset is an immediate 4625 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4626 Op.addImmOperands(Inst, 1); 4627 continue; 4628 } 4629 4630 // Handle tokens like 'offen' which are sometimes hard-coded into the 4631 // asm string. There are no MCInst operands for these. 4632 if (Op.isToken()) { 4633 continue; 4634 } 4635 assert(Op.isImm()); 4636 4637 // Handle optional arguments 4638 OptionalIdx[Op.getImmTy()] = i; 4639 } 4640 4641 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4642 AMDGPUOperand::ImmTyOffset); 4643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4647 } 4648 4649 //===----------------------------------------------------------------------===// 4650 // mimg 4651 //===----------------------------------------------------------------------===// 4652 4653 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4654 bool IsAtomic) { 4655 unsigned I = 1; 4656 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4657 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4658 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4659 } 4660 4661 if (IsAtomic) { 4662 // Add src, same as dst 4663 assert(Desc.getNumDefs() == 1); 4664 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4665 } 4666 4667 OptionalImmIndexMap OptionalIdx; 4668 4669 for (unsigned E = Operands.size(); I != E; ++I) { 4670 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4671 4672 // Add the register arguments 4673 if (Op.isReg()) { 4674 Op.addRegOperands(Inst, 1); 4675 } else if (Op.isImmModifier()) { 4676 OptionalIdx[Op.getImmTy()] = I; 4677 } else { 4678 llvm_unreachable("unexpected operand type"); 4679 } 4680 } 4681 4682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4691 } 4692 4693 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4694 cvtMIMG(Inst, Operands, true); 4695 } 4696 4697 //===----------------------------------------------------------------------===// 4698 // smrd 4699 //===----------------------------------------------------------------------===// 4700 4701 bool AMDGPUOperand::isSMRDOffset8() const { 4702 return isImm() && isUInt<8>(getImm()); 4703 } 4704 4705 bool AMDGPUOperand::isSMRDOffset20() const { 4706 return isImm() && isUInt<20>(getImm()); 4707 } 4708 4709 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4710 // 32-bit literals are only supported on CI and we only want to use them 4711 // when the offset is > 8-bits. 4712 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4713 } 4714 4715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4716 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4717 } 4718 4719 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4720 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4721 } 4722 4723 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4724 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4725 } 4726 4727 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4728 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4729 } 4730 4731 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4732 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4733 } 4734 4735 //===----------------------------------------------------------------------===// 4736 // vop3 4737 //===----------------------------------------------------------------------===// 4738 4739 static bool ConvertOmodMul(int64_t &Mul) { 4740 if (Mul != 1 && Mul != 2 && Mul != 4) 4741 return false; 4742 4743 Mul >>= 1; 4744 return true; 4745 } 4746 4747 static bool ConvertOmodDiv(int64_t &Div) { 4748 if (Div == 1) { 4749 Div = 0; 4750 return true; 4751 } 4752 4753 if (Div == 2) { 4754 Div = 3; 4755 return true; 4756 } 4757 4758 return false; 4759 } 4760 4761 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4762 if (BoundCtrl == 0) { 4763 BoundCtrl = 1; 4764 return true; 4765 } 4766 4767 if (BoundCtrl == -1) { 4768 BoundCtrl = 0; 4769 return true; 4770 } 4771 4772 return false; 4773 } 4774 4775 // Note: the order in this table matches the order of operands in AsmString. 4776 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4777 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4778 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4779 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4780 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4781 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4782 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4783 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4784 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4785 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4786 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 4787 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4788 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4789 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4790 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4791 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4792 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4793 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4794 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4795 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4796 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4797 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4798 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4799 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4800 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4801 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4802 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4803 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4804 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4805 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4806 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4807 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4808 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4809 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4810 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4811 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4812 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4813 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4814 }; 4815 4816 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4817 unsigned size = Operands.size(); 4818 assert(size > 0); 4819 4820 OperandMatchResultTy res = parseOptionalOpr(Operands); 4821 4822 // This is a hack to enable hardcoded mandatory operands which follow 4823 // optional operands. 4824 // 4825 // Current design assumes that all operands after the first optional operand 4826 // are also optional. However implementation of some instructions violates 4827 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4828 // 4829 // To alleviate this problem, we have to (implicitly) parse extra operands 4830 // to make sure autogenerated parser of custom operands never hit hardcoded 4831 // mandatory operands. 4832 4833 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4834 4835 // We have parsed the first optional operand. 4836 // Parse as many operands as necessary to skip all mandatory operands. 4837 4838 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4839 if (res != MatchOperand_Success || 4840 getLexer().is(AsmToken::EndOfStatement)) break; 4841 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4842 res = parseOptionalOpr(Operands); 4843 } 4844 } 4845 4846 return res; 4847 } 4848 4849 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4850 OperandMatchResultTy res; 4851 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 4852 // try to parse any optional operand here 4853 if (Op.IsBit) { 4854 res = parseNamedBit(Op.Name, Operands, Op.Type); 4855 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 4856 res = parseOModOperand(Operands); 4857 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 4858 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 4859 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 4860 res = parseSDWASel(Operands, Op.Name, Op.Type); 4861 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 4862 res = parseSDWADstUnused(Operands); 4863 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 4864 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 4865 Op.Type == AMDGPUOperand::ImmTyNegLo || 4866 Op.Type == AMDGPUOperand::ImmTyNegHi) { 4867 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 4868 Op.ConvertResult); 4869 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 4870 res = parseDfmtNfmt(Operands); 4871 } else { 4872 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 4873 } 4874 if (res != MatchOperand_NoMatch) { 4875 return res; 4876 } 4877 } 4878 return MatchOperand_NoMatch; 4879 } 4880 4881 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 4882 StringRef Name = Parser.getTok().getString(); 4883 if (Name == "mul") { 4884 return parseIntWithPrefix("mul", Operands, 4885 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 4886 } 4887 4888 if (Name == "div") { 4889 return parseIntWithPrefix("div", Operands, 4890 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 4891 } 4892 4893 return MatchOperand_NoMatch; 4894 } 4895 4896 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 4897 cvtVOP3P(Inst, Operands); 4898 4899 int Opc = Inst.getOpcode(); 4900 4901 int SrcNum; 4902 const int Ops[] = { AMDGPU::OpName::src0, 4903 AMDGPU::OpName::src1, 4904 AMDGPU::OpName::src2 }; 4905 for (SrcNum = 0; 4906 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 4907 ++SrcNum); 4908 assert(SrcNum > 0); 4909 4910 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4911 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4912 4913 if ((OpSel & (1 << SrcNum)) != 0) { 4914 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 4915 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 4916 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 4917 } 4918 } 4919 4920 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 4921 // 1. This operand is input modifiers 4922 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 4923 // 2. This is not last operand 4924 && Desc.NumOperands > (OpNum + 1) 4925 // 3. Next operand is register class 4926 && Desc.OpInfo[OpNum + 1].RegClass != -1 4927 // 4. Next register is not tied to any other operand 4928 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 4929 } 4930 4931 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 4932 { 4933 OptionalImmIndexMap OptionalIdx; 4934 unsigned Opc = Inst.getOpcode(); 4935 4936 unsigned I = 1; 4937 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4938 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4939 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4940 } 4941 4942 for (unsigned E = Operands.size(); I != E; ++I) { 4943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4944 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4945 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4946 } else if (Op.isInterpSlot() || 4947 Op.isInterpAttr() || 4948 Op.isAttrChan()) { 4949 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 4950 } else if (Op.isImmModifier()) { 4951 OptionalIdx[Op.getImmTy()] = I; 4952 } else { 4953 llvm_unreachable("unhandled operand type"); 4954 } 4955 } 4956 4957 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 4958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 4959 } 4960 4961 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4963 } 4964 4965 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4967 } 4968 } 4969 4970 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 4971 OptionalImmIndexMap &OptionalIdx) { 4972 unsigned Opc = Inst.getOpcode(); 4973 4974 unsigned I = 1; 4975 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4976 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4977 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4978 } 4979 4980 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 4981 // This instruction has src modifiers 4982 for (unsigned E = Operands.size(); I != E; ++I) { 4983 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4984 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4985 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4986 } else if (Op.isImmModifier()) { 4987 OptionalIdx[Op.getImmTy()] = I; 4988 } else if (Op.isRegOrImm()) { 4989 Op.addRegOrImmOperands(Inst, 1); 4990 } else { 4991 llvm_unreachable("unhandled operand type"); 4992 } 4993 } 4994 } else { 4995 // No src modifiers 4996 for (unsigned E = Operands.size(); I != E; ++I) { 4997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4998 if (Op.isMod()) { 4999 OptionalIdx[Op.getImmTy()] = I; 5000 } else { 5001 Op.addRegOrImmOperands(Inst, 1); 5002 } 5003 } 5004 } 5005 5006 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5008 } 5009 5010 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5012 } 5013 5014 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5015 // it has src2 register operand that is tied to dst operand 5016 // we don't allow modifiers for this operand in assembler so src2_modifiers 5017 // should be 0. 5018 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5019 Opc == AMDGPU::V_MAC_F32_e64_vi || 5020 Opc == AMDGPU::V_MAC_F16_e64_vi || 5021 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5022 auto it = Inst.begin(); 5023 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5024 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5025 ++it; 5026 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5027 } 5028 } 5029 5030 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5031 OptionalImmIndexMap OptionalIdx; 5032 cvtVOP3(Inst, Operands, OptionalIdx); 5033 } 5034 5035 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5036 const OperandVector &Operands) { 5037 OptionalImmIndexMap OptIdx; 5038 const int Opc = Inst.getOpcode(); 5039 const MCInstrDesc &Desc = MII.get(Opc); 5040 5041 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5042 5043 cvtVOP3(Inst, Operands, OptIdx); 5044 5045 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5046 assert(!IsPacked); 5047 Inst.addOperand(Inst.getOperand(0)); 5048 } 5049 5050 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5051 // instruction, and then figure out where to actually put the modifiers 5052 5053 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5054 5055 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5056 if (OpSelHiIdx != -1) { 5057 int DefaultVal = IsPacked ? -1 : 0; 5058 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5059 DefaultVal); 5060 } 5061 5062 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5063 if (NegLoIdx != -1) { 5064 assert(IsPacked); 5065 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5066 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5067 } 5068 5069 const int Ops[] = { AMDGPU::OpName::src0, 5070 AMDGPU::OpName::src1, 5071 AMDGPU::OpName::src2 }; 5072 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5073 AMDGPU::OpName::src1_modifiers, 5074 AMDGPU::OpName::src2_modifiers }; 5075 5076 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5077 5078 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5079 unsigned OpSelHi = 0; 5080 unsigned NegLo = 0; 5081 unsigned NegHi = 0; 5082 5083 if (OpSelHiIdx != -1) { 5084 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5085 } 5086 5087 if (NegLoIdx != -1) { 5088 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5089 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5090 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5091 } 5092 5093 for (int J = 0; J < 3; ++J) { 5094 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5095 if (OpIdx == -1) 5096 break; 5097 5098 uint32_t ModVal = 0; 5099 5100 if ((OpSel & (1 << J)) != 0) 5101 ModVal |= SISrcMods::OP_SEL_0; 5102 5103 if ((OpSelHi & (1 << J)) != 0) 5104 ModVal |= SISrcMods::OP_SEL_1; 5105 5106 if ((NegLo & (1 << J)) != 0) 5107 ModVal |= SISrcMods::NEG; 5108 5109 if ((NegHi & (1 << J)) != 0) 5110 ModVal |= SISrcMods::NEG_HI; 5111 5112 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5113 5114 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5115 } 5116 } 5117 5118 //===----------------------------------------------------------------------===// 5119 // dpp 5120 //===----------------------------------------------------------------------===// 5121 5122 bool AMDGPUOperand::isDPPCtrl() const { 5123 using namespace AMDGPU::DPP; 5124 5125 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5126 if (result) { 5127 int64_t Imm = getImm(); 5128 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5129 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5130 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5131 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5132 (Imm == DppCtrl::WAVE_SHL1) || 5133 (Imm == DppCtrl::WAVE_ROL1) || 5134 (Imm == DppCtrl::WAVE_SHR1) || 5135 (Imm == DppCtrl::WAVE_ROR1) || 5136 (Imm == DppCtrl::ROW_MIRROR) || 5137 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5138 (Imm == DppCtrl::BCAST15) || 5139 (Imm == DppCtrl::BCAST31); 5140 } 5141 return false; 5142 } 5143 5144 bool AMDGPUOperand::isGPRIdxMode() const { 5145 return isImm() && isUInt<4>(getImm()); 5146 } 5147 5148 bool AMDGPUOperand::isS16Imm() const { 5149 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5150 } 5151 5152 bool AMDGPUOperand::isU16Imm() const { 5153 return isImm() && isUInt<16>(getImm()); 5154 } 5155 5156 OperandMatchResultTy 5157 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5158 using namespace AMDGPU::DPP; 5159 5160 SMLoc S = Parser.getTok().getLoc(); 5161 StringRef Prefix; 5162 int64_t Int; 5163 5164 if (getLexer().getKind() == AsmToken::Identifier) { 5165 Prefix = Parser.getTok().getString(); 5166 } else { 5167 return MatchOperand_NoMatch; 5168 } 5169 5170 if (Prefix == "row_mirror") { 5171 Int = DppCtrl::ROW_MIRROR; 5172 Parser.Lex(); 5173 } else if (Prefix == "row_half_mirror") { 5174 Int = DppCtrl::ROW_HALF_MIRROR; 5175 Parser.Lex(); 5176 } else { 5177 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5178 if (Prefix != "quad_perm" 5179 && Prefix != "row_shl" 5180 && Prefix != "row_shr" 5181 && Prefix != "row_ror" 5182 && Prefix != "wave_shl" 5183 && Prefix != "wave_rol" 5184 && Prefix != "wave_shr" 5185 && Prefix != "wave_ror" 5186 && Prefix != "row_bcast") { 5187 return MatchOperand_NoMatch; 5188 } 5189 5190 Parser.Lex(); 5191 if (getLexer().isNot(AsmToken::Colon)) 5192 return MatchOperand_ParseFail; 5193 5194 if (Prefix == "quad_perm") { 5195 // quad_perm:[%d,%d,%d,%d] 5196 Parser.Lex(); 5197 if (getLexer().isNot(AsmToken::LBrac)) 5198 return MatchOperand_ParseFail; 5199 Parser.Lex(); 5200 5201 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5202 return MatchOperand_ParseFail; 5203 5204 for (int i = 0; i < 3; ++i) { 5205 if (getLexer().isNot(AsmToken::Comma)) 5206 return MatchOperand_ParseFail; 5207 Parser.Lex(); 5208 5209 int64_t Temp; 5210 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5211 return MatchOperand_ParseFail; 5212 const int shift = i*2 + 2; 5213 Int += (Temp << shift); 5214 } 5215 5216 if (getLexer().isNot(AsmToken::RBrac)) 5217 return MatchOperand_ParseFail; 5218 Parser.Lex(); 5219 } else { 5220 // sel:%d 5221 Parser.Lex(); 5222 if (getParser().parseAbsoluteExpression(Int)) 5223 return MatchOperand_ParseFail; 5224 5225 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5226 Int |= DppCtrl::ROW_SHL0; 5227 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5228 Int |= DppCtrl::ROW_SHR0; 5229 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5230 Int |= DppCtrl::ROW_ROR0; 5231 } else if (Prefix == "wave_shl" && 1 == Int) { 5232 Int = DppCtrl::WAVE_SHL1; 5233 } else if (Prefix == "wave_rol" && 1 == Int) { 5234 Int = DppCtrl::WAVE_ROL1; 5235 } else if (Prefix == "wave_shr" && 1 == Int) { 5236 Int = DppCtrl::WAVE_SHR1; 5237 } else if (Prefix == "wave_ror" && 1 == Int) { 5238 Int = DppCtrl::WAVE_ROR1; 5239 } else if (Prefix == "row_bcast") { 5240 if (Int == 15) { 5241 Int = DppCtrl::BCAST15; 5242 } else if (Int == 31) { 5243 Int = DppCtrl::BCAST31; 5244 } else { 5245 return MatchOperand_ParseFail; 5246 } 5247 } else { 5248 return MatchOperand_ParseFail; 5249 } 5250 } 5251 } 5252 5253 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5254 return MatchOperand_Success; 5255 } 5256 5257 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5258 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5259 } 5260 5261 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5262 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5263 } 5264 5265 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5266 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5267 } 5268 5269 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5270 OptionalImmIndexMap OptionalIdx; 5271 5272 unsigned I = 1; 5273 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5274 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5275 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5276 } 5277 5278 // All DPP instructions with at least one source operand have a fake "old" 5279 // source at the beginning that's tied to the dst operand. Handle it here. 5280 if (Desc.getNumOperands() >= 2) 5281 Inst.addOperand(Inst.getOperand(0)); 5282 5283 for (unsigned E = Operands.size(); I != E; ++I) { 5284 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5285 // Add the register arguments 5286 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5287 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5288 // Skip it. 5289 continue; 5290 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5291 Op.addRegWithFPInputModsOperands(Inst, 2); 5292 } else if (Op.isDPPCtrl()) { 5293 Op.addImmOperands(Inst, 1); 5294 } else if (Op.isImm()) { 5295 // Handle optional arguments 5296 OptionalIdx[Op.getImmTy()] = I; 5297 } else { 5298 llvm_unreachable("Invalid operand type"); 5299 } 5300 } 5301 5302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5305 } 5306 5307 //===----------------------------------------------------------------------===// 5308 // sdwa 5309 //===----------------------------------------------------------------------===// 5310 5311 OperandMatchResultTy 5312 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5313 AMDGPUOperand::ImmTy Type) { 5314 using namespace llvm::AMDGPU::SDWA; 5315 5316 SMLoc S = Parser.getTok().getLoc(); 5317 StringRef Value; 5318 OperandMatchResultTy res; 5319 5320 res = parseStringWithPrefix(Prefix, Value); 5321 if (res != MatchOperand_Success) { 5322 return res; 5323 } 5324 5325 int64_t Int; 5326 Int = StringSwitch<int64_t>(Value) 5327 .Case("BYTE_0", SdwaSel::BYTE_0) 5328 .Case("BYTE_1", SdwaSel::BYTE_1) 5329 .Case("BYTE_2", SdwaSel::BYTE_2) 5330 .Case("BYTE_3", SdwaSel::BYTE_3) 5331 .Case("WORD_0", SdwaSel::WORD_0) 5332 .Case("WORD_1", SdwaSel::WORD_1) 5333 .Case("DWORD", SdwaSel::DWORD) 5334 .Default(0xffffffff); 5335 Parser.Lex(); // eat last token 5336 5337 if (Int == 0xffffffff) { 5338 return MatchOperand_ParseFail; 5339 } 5340 5341 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5342 return MatchOperand_Success; 5343 } 5344 5345 OperandMatchResultTy 5346 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5347 using namespace llvm::AMDGPU::SDWA; 5348 5349 SMLoc S = Parser.getTok().getLoc(); 5350 StringRef Value; 5351 OperandMatchResultTy res; 5352 5353 res = parseStringWithPrefix("dst_unused", Value); 5354 if (res != MatchOperand_Success) { 5355 return res; 5356 } 5357 5358 int64_t Int; 5359 Int = StringSwitch<int64_t>(Value) 5360 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5361 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5362 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5363 .Default(0xffffffff); 5364 Parser.Lex(); // eat last token 5365 5366 if (Int == 0xffffffff) { 5367 return MatchOperand_ParseFail; 5368 } 5369 5370 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5371 return MatchOperand_Success; 5372 } 5373 5374 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5375 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5376 } 5377 5378 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5379 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5380 } 5381 5382 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5383 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5384 } 5385 5386 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5387 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5388 } 5389 5390 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5391 uint64_t BasicInstType, bool skipVcc) { 5392 using namespace llvm::AMDGPU::SDWA; 5393 5394 OptionalImmIndexMap OptionalIdx; 5395 bool skippedVcc = false; 5396 5397 unsigned I = 1; 5398 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5399 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5400 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5401 } 5402 5403 for (unsigned E = Operands.size(); I != E; ++I) { 5404 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5405 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5406 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5407 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5408 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5409 // Skip VCC only if we didn't skip it on previous iteration. 5410 if (BasicInstType == SIInstrFlags::VOP2 && 5411 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5412 skippedVcc = true; 5413 continue; 5414 } else if (BasicInstType == SIInstrFlags::VOPC && 5415 Inst.getNumOperands() == 0) { 5416 skippedVcc = true; 5417 continue; 5418 } 5419 } 5420 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5421 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5422 } else if (Op.isImm()) { 5423 // Handle optional arguments 5424 OptionalIdx[Op.getImmTy()] = I; 5425 } else { 5426 llvm_unreachable("Invalid operand type"); 5427 } 5428 skippedVcc = false; 5429 } 5430 5431 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5432 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5433 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5434 switch (BasicInstType) { 5435 case SIInstrFlags::VOP1: 5436 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5437 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5438 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5439 } 5440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5441 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5442 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5443 break; 5444 5445 case SIInstrFlags::VOP2: 5446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5447 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5449 } 5450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5451 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5454 break; 5455 5456 case SIInstrFlags::VOPC: 5457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5460 break; 5461 5462 default: 5463 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5464 } 5465 } 5466 5467 // special case v_mac_{f16, f32}: 5468 // it has src2 register operand that is tied to dst operand 5469 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5470 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5471 auto it = Inst.begin(); 5472 std::advance( 5473 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5474 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5475 } 5476 } 5477 5478 /// Force static initialization. 5479 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5480 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5481 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5482 } 5483 5484 #define GET_REGISTER_MATCHER 5485 #define GET_MATCHER_IMPLEMENTATION 5486 #define GET_MNEMONIC_SPELL_CHECKER 5487 #include "AMDGPUGenAsmMatcher.inc" 5488 5489 // This fuction should be defined after auto-generated include so that we have 5490 // MatchClassKind enum defined 5491 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5492 unsigned Kind) { 5493 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5494 // But MatchInstructionImpl() expects to meet token and fails to validate 5495 // operand. This method checks if we are given immediate operand but expect to 5496 // get corresponding token. 5497 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5498 switch (Kind) { 5499 case MCK_addr64: 5500 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5501 case MCK_gds: 5502 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5503 case MCK_lds: 5504 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5505 case MCK_glc: 5506 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5507 case MCK_idxen: 5508 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5509 case MCK_offen: 5510 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5511 case MCK_SSrcB32: 5512 // When operands have expression values, they will return true for isToken, 5513 // because it is not possible to distinguish between a token and an 5514 // expression at parse time. MatchInstructionImpl() will always try to 5515 // match an operand as a token, when isToken returns true, and when the 5516 // name of the expression is not a valid token, the match will fail, 5517 // so we need to handle it here. 5518 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5519 case MCK_SSrcF32: 5520 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5521 case MCK_SoppBrTarget: 5522 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5523 case MCK_VReg32OrOff: 5524 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5525 case MCK_InterpSlot: 5526 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5527 case MCK_Attr: 5528 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5529 case MCK_AttrChan: 5530 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5531 default: 5532 return Match_InvalidOperand; 5533 } 5534 } 5535