1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDKernelCodeT.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 14 #include "SIDefines.h" 15 #include "SIInstrInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyHigh 177 }; 178 179 struct TokOp { 180 const char *Data; 181 unsigned Length; 182 }; 183 184 struct ImmOp { 185 int64_t Val; 186 ImmTy Type; 187 bool IsFPImm; 188 Modifiers Mods; 189 }; 190 191 struct RegOp { 192 unsigned RegNo; 193 bool IsForcedVOP3; 194 Modifiers Mods; 195 }; 196 197 union { 198 TokOp Tok; 199 ImmOp Imm; 200 RegOp Reg; 201 const MCExpr *Expr; 202 }; 203 204 bool isToken() const override { 205 if (Kind == Token) 206 return true; 207 208 if (Kind != Expression || !Expr) 209 return false; 210 211 // When parsing operands, we can't always tell if something was meant to be 212 // a token, like 'gds', or an expression that references a global variable. 213 // In this case, we assume the string is an expression, and if we need to 214 // interpret is a token, then we treat the symbol name as the token. 215 return isa<MCSymbolRefExpr>(Expr); 216 } 217 218 bool isImm() const override { 219 return Kind == Immediate; 220 } 221 222 bool isInlinableImm(MVT type) const; 223 bool isLiteralImm(MVT type) const; 224 225 bool isRegKind() const { 226 return Kind == Register; 227 } 228 229 bool isReg() const override { 230 return isRegKind() && !hasModifiers(); 231 } 232 233 bool isRegOrImmWithInputMods(MVT type) const { 234 return isRegKind() || isInlinableImm(type); 235 } 236 237 bool isRegOrImmWithInt16InputMods() const { 238 return isRegOrImmWithInputMods(MVT::i16); 239 } 240 241 bool isRegOrImmWithInt32InputMods() const { 242 return isRegOrImmWithInputMods(MVT::i32); 243 } 244 245 bool isRegOrImmWithInt64InputMods() const { 246 return isRegOrImmWithInputMods(MVT::i64); 247 } 248 249 bool isRegOrImmWithFP16InputMods() const { 250 return isRegOrImmWithInputMods(MVT::f16); 251 } 252 253 bool isRegOrImmWithFP32InputMods() const { 254 return isRegOrImmWithInputMods(MVT::f32); 255 } 256 257 bool isRegOrImmWithFP64InputMods() const { 258 return isRegOrImmWithInputMods(MVT::f64); 259 } 260 261 bool isVReg() const { 262 return isRegClass(AMDGPU::VGPR_32RegClassID) || 263 isRegClass(AMDGPU::VReg_64RegClassID) || 264 isRegClass(AMDGPU::VReg_96RegClassID) || 265 isRegClass(AMDGPU::VReg_128RegClassID) || 266 isRegClass(AMDGPU::VReg_256RegClassID) || 267 isRegClass(AMDGPU::VReg_512RegClassID); 268 } 269 270 bool isVReg32OrOff() const { 271 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 272 } 273 274 bool isSDWAOperand(MVT type) const; 275 bool isSDWAFP16Operand() const; 276 bool isSDWAFP32Operand() const; 277 bool isSDWAInt16Operand() const; 278 bool isSDWAInt32Operand() const; 279 280 bool isImmTy(ImmTy ImmT) const { 281 return isImm() && Imm.Type == ImmT; 282 } 283 284 bool isImmModifier() const { 285 return isImm() && Imm.Type != ImmTyNone; 286 } 287 288 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 289 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 290 bool isDMask() const { return isImmTy(ImmTyDMask); } 291 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 292 bool isDA() const { return isImmTy(ImmTyDA); } 293 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 294 bool isLWE() const { return isImmTy(ImmTyLWE); } 295 bool isOff() const { return isImmTy(ImmTyOff); } 296 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 297 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 298 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 299 bool isOffen() const { return isImmTy(ImmTyOffen); } 300 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 301 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 302 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 303 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 304 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 305 306 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 307 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 308 bool isGDS() const { return isImmTy(ImmTyGDS); } 309 bool isLDS() const { return isImmTy(ImmTyLDS); } 310 bool isGLC() const { return isImmTy(ImmTyGLC); } 311 bool isSLC() const { return isImmTy(ImmTySLC); } 312 bool isTFE() const { return isImmTy(ImmTyTFE); } 313 bool isD16() const { return isImmTy(ImmTyD16); } 314 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 315 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 316 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 317 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 318 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 319 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 320 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 321 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 322 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 323 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 324 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 325 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 326 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 327 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 328 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 329 bool isHigh() const { return isImmTy(ImmTyHigh); } 330 331 bool isMod() const { 332 return isClampSI() || isOModSI(); 333 } 334 335 bool isRegOrImm() const { 336 return isReg() || isImm(); 337 } 338 339 bool isRegClass(unsigned RCID) const; 340 341 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 342 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 343 } 344 345 bool isSCSrcB16() const { 346 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 347 } 348 349 bool isSCSrcV2B16() const { 350 return isSCSrcB16(); 351 } 352 353 bool isSCSrcB32() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 355 } 356 357 bool isSCSrcB64() const { 358 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 359 } 360 361 bool isSCSrcF16() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 363 } 364 365 bool isSCSrcV2F16() const { 366 return isSCSrcF16(); 367 } 368 369 bool isSCSrcF32() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 371 } 372 373 bool isSCSrcF64() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 375 } 376 377 bool isSSrcB32() const { 378 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 379 } 380 381 bool isSSrcB16() const { 382 return isSCSrcB16() || isLiteralImm(MVT::i16); 383 } 384 385 bool isSSrcV2B16() const { 386 llvm_unreachable("cannot happen"); 387 return isSSrcB16(); 388 } 389 390 bool isSSrcB64() const { 391 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 392 // See isVSrc64(). 393 return isSCSrcB64() || isLiteralImm(MVT::i64); 394 } 395 396 bool isSSrcF32() const { 397 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 398 } 399 400 bool isSSrcF64() const { 401 return isSCSrcB64() || isLiteralImm(MVT::f64); 402 } 403 404 bool isSSrcF16() const { 405 return isSCSrcB16() || isLiteralImm(MVT::f16); 406 } 407 408 bool isSSrcV2F16() const { 409 llvm_unreachable("cannot happen"); 410 return isSSrcF16(); 411 } 412 413 bool isVCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 415 } 416 417 bool isVCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 419 } 420 421 bool isVCSrcB16() const { 422 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 423 } 424 425 bool isVCSrcV2B16() const { 426 return isVCSrcB16(); 427 } 428 429 bool isVCSrcF32() const { 430 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 431 } 432 433 bool isVCSrcF64() const { 434 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 435 } 436 437 bool isVCSrcF16() const { 438 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 439 } 440 441 bool isVCSrcV2F16() const { 442 return isVCSrcF16(); 443 } 444 445 bool isVSrcB32() const { 446 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 447 } 448 449 bool isVSrcB64() const { 450 return isVCSrcF64() || isLiteralImm(MVT::i64); 451 } 452 453 bool isVSrcB16() const { 454 return isVCSrcF16() || isLiteralImm(MVT::i16); 455 } 456 457 bool isVSrcV2B16() const { 458 llvm_unreachable("cannot happen"); 459 return isVSrcB16(); 460 } 461 462 bool isVSrcF32() const { 463 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isVSrcF64() const { 467 return isVCSrcF64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isVSrcF16() const { 471 return isVCSrcF16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isVSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isVSrcF16(); 477 } 478 479 bool isKImmFP32() const { 480 return isLiteralImm(MVT::f32); 481 } 482 483 bool isKImmFP16() const { 484 return isLiteralImm(MVT::f16); 485 } 486 487 bool isMem() const override { 488 return false; 489 } 490 491 bool isExpr() const { 492 return Kind == Expression; 493 } 494 495 bool isSoppBrTarget() const { 496 return isExpr() || isImm(); 497 } 498 499 bool isSWaitCnt() const; 500 bool isHwreg() const; 501 bool isSendMsg() const; 502 bool isSwizzle() const; 503 bool isSMRDOffset8() const; 504 bool isSMRDOffset20() const; 505 bool isSMRDLiteralOffset() const; 506 bool isDPPCtrl() const; 507 bool isGPRIdxMode() const; 508 bool isS16Imm() const; 509 bool isU16Imm() const; 510 511 StringRef getExpressionAsToken() const { 512 assert(isExpr()); 513 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 514 return S->getSymbol().getName(); 515 } 516 517 StringRef getToken() const { 518 assert(isToken()); 519 520 if (Kind == Expression) 521 return getExpressionAsToken(); 522 523 return StringRef(Tok.Data, Tok.Length); 524 } 525 526 int64_t getImm() const { 527 assert(isImm()); 528 return Imm.Val; 529 } 530 531 ImmTy getImmTy() const { 532 assert(isImm()); 533 return Imm.Type; 534 } 535 536 unsigned getReg() const override { 537 return Reg.RegNo; 538 } 539 540 SMLoc getStartLoc() const override { 541 return StartLoc; 542 } 543 544 SMLoc getEndLoc() const override { 545 return EndLoc; 546 } 547 548 SMRange getLocRange() const { 549 return SMRange(StartLoc, EndLoc); 550 } 551 552 Modifiers getModifiers() const { 553 assert(isRegKind() || isImmTy(ImmTyNone)); 554 return isRegKind() ? Reg.Mods : Imm.Mods; 555 } 556 557 void setModifiers(Modifiers Mods) { 558 assert(isRegKind() || isImmTy(ImmTyNone)); 559 if (isRegKind()) 560 Reg.Mods = Mods; 561 else 562 Imm.Mods = Mods; 563 } 564 565 bool hasModifiers() const { 566 return getModifiers().hasModifiers(); 567 } 568 569 bool hasFPModifiers() const { 570 return getModifiers().hasFPModifiers(); 571 } 572 573 bool hasIntModifiers() const { 574 return getModifiers().hasIntModifiers(); 575 } 576 577 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 578 579 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 580 581 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 582 583 template <unsigned Bitwidth> 584 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 585 586 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 587 addKImmFPOperands<16>(Inst, N); 588 } 589 590 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 591 addKImmFPOperands<32>(Inst, N); 592 } 593 594 void addRegOperands(MCInst &Inst, unsigned N) const; 595 596 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 597 if (isRegKind()) 598 addRegOperands(Inst, N); 599 else if (isExpr()) 600 Inst.addOperand(MCOperand::createExpr(Expr)); 601 else 602 addImmOperands(Inst, N); 603 } 604 605 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 606 Modifiers Mods = getModifiers(); 607 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 608 if (isRegKind()) { 609 addRegOperands(Inst, N); 610 } else { 611 addImmOperands(Inst, N, false); 612 } 613 } 614 615 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 616 assert(!hasIntModifiers()); 617 addRegOrImmWithInputModsOperands(Inst, N); 618 } 619 620 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 621 assert(!hasFPModifiers()); 622 addRegOrImmWithInputModsOperands(Inst, N); 623 } 624 625 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 626 Modifiers Mods = getModifiers(); 627 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 628 assert(isRegKind()); 629 addRegOperands(Inst, N); 630 } 631 632 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 633 assert(!hasIntModifiers()); 634 addRegWithInputModsOperands(Inst, N); 635 } 636 637 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 638 assert(!hasFPModifiers()); 639 addRegWithInputModsOperands(Inst, N); 640 } 641 642 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 643 if (isImm()) 644 addImmOperands(Inst, N); 645 else { 646 assert(isExpr()); 647 Inst.addOperand(MCOperand::createExpr(Expr)); 648 } 649 } 650 651 static void printImmTy(raw_ostream& OS, ImmTy Type) { 652 switch (Type) { 653 case ImmTyNone: OS << "None"; break; 654 case ImmTyGDS: OS << "GDS"; break; 655 case ImmTyLDS: OS << "LDS"; break; 656 case ImmTyOffen: OS << "Offen"; break; 657 case ImmTyIdxen: OS << "Idxen"; break; 658 case ImmTyAddr64: OS << "Addr64"; break; 659 case ImmTyOffset: OS << "Offset"; break; 660 case ImmTyInstOffset: OS << "InstOffset"; break; 661 case ImmTyOffset0: OS << "Offset0"; break; 662 case ImmTyOffset1: OS << "Offset1"; break; 663 case ImmTyGLC: OS << "GLC"; break; 664 case ImmTySLC: OS << "SLC"; break; 665 case ImmTyTFE: OS << "TFE"; break; 666 case ImmTyD16: OS << "D16"; break; 667 case ImmTyFORMAT: OS << "FORMAT"; break; 668 case ImmTyClampSI: OS << "ClampSI"; break; 669 case ImmTyOModSI: OS << "OModSI"; break; 670 case ImmTyDppCtrl: OS << "DppCtrl"; break; 671 case ImmTyDppRowMask: OS << "DppRowMask"; break; 672 case ImmTyDppBankMask: OS << "DppBankMask"; break; 673 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 674 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 675 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 676 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 677 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 678 case ImmTyDMask: OS << "DMask"; break; 679 case ImmTyUNorm: OS << "UNorm"; break; 680 case ImmTyDA: OS << "DA"; break; 681 case ImmTyR128A16: OS << "R128A16"; break; 682 case ImmTyLWE: OS << "LWE"; break; 683 case ImmTyOff: OS << "Off"; break; 684 case ImmTyExpTgt: OS << "ExpTgt"; break; 685 case ImmTyExpCompr: OS << "ExpCompr"; break; 686 case ImmTyExpVM: OS << "ExpVM"; break; 687 case ImmTyHwreg: OS << "Hwreg"; break; 688 case ImmTySendMsg: OS << "SendMsg"; break; 689 case ImmTyInterpSlot: OS << "InterpSlot"; break; 690 case ImmTyInterpAttr: OS << "InterpAttr"; break; 691 case ImmTyAttrChan: OS << "AttrChan"; break; 692 case ImmTyOpSel: OS << "OpSel"; break; 693 case ImmTyOpSelHi: OS << "OpSelHi"; break; 694 case ImmTyNegLo: OS << "NegLo"; break; 695 case ImmTyNegHi: OS << "NegHi"; break; 696 case ImmTySwizzle: OS << "Swizzle"; break; 697 case ImmTyHigh: OS << "High"; break; 698 } 699 } 700 701 void print(raw_ostream &OS) const override { 702 switch (Kind) { 703 case Register: 704 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 705 break; 706 case Immediate: 707 OS << '<' << getImm(); 708 if (getImmTy() != ImmTyNone) { 709 OS << " type: "; printImmTy(OS, getImmTy()); 710 } 711 OS << " mods: " << Imm.Mods << '>'; 712 break; 713 case Token: 714 OS << '\'' << getToken() << '\''; 715 break; 716 case Expression: 717 OS << "<expr " << *Expr << '>'; 718 break; 719 } 720 } 721 722 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 723 int64_t Val, SMLoc Loc, 724 ImmTy Type = ImmTyNone, 725 bool IsFPImm = false) { 726 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 727 Op->Imm.Val = Val; 728 Op->Imm.IsFPImm = IsFPImm; 729 Op->Imm.Type = Type; 730 Op->Imm.Mods = Modifiers(); 731 Op->StartLoc = Loc; 732 Op->EndLoc = Loc; 733 return Op; 734 } 735 736 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 737 StringRef Str, SMLoc Loc, 738 bool HasExplicitEncodingSize = true) { 739 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 740 Res->Tok.Data = Str.data(); 741 Res->Tok.Length = Str.size(); 742 Res->StartLoc = Loc; 743 Res->EndLoc = Loc; 744 return Res; 745 } 746 747 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 748 unsigned RegNo, SMLoc S, 749 SMLoc E, 750 bool ForceVOP3) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 752 Op->Reg.RegNo = RegNo; 753 Op->Reg.Mods = Modifiers(); 754 Op->Reg.IsForcedVOP3 = ForceVOP3; 755 Op->StartLoc = S; 756 Op->EndLoc = E; 757 return Op; 758 } 759 760 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 761 const class MCExpr *Expr, SMLoc S) { 762 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 763 Op->Expr = Expr; 764 Op->StartLoc = S; 765 Op->EndLoc = S; 766 return Op; 767 } 768 }; 769 770 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 771 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 772 return OS; 773 } 774 775 //===----------------------------------------------------------------------===// 776 // AsmParser 777 //===----------------------------------------------------------------------===// 778 779 // Holds info related to the current kernel, e.g. count of SGPRs used. 780 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 781 // .amdgpu_hsa_kernel or at EOF. 782 class KernelScopeInfo { 783 int SgprIndexUnusedMin = -1; 784 int VgprIndexUnusedMin = -1; 785 MCContext *Ctx = nullptr; 786 787 void usesSgprAt(int i) { 788 if (i >= SgprIndexUnusedMin) { 789 SgprIndexUnusedMin = ++i; 790 if (Ctx) { 791 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 792 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 793 } 794 } 795 } 796 797 void usesVgprAt(int i) { 798 if (i >= VgprIndexUnusedMin) { 799 VgprIndexUnusedMin = ++i; 800 if (Ctx) { 801 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 802 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 803 } 804 } 805 } 806 807 public: 808 KernelScopeInfo() = default; 809 810 void initialize(MCContext &Context) { 811 Ctx = &Context; 812 usesSgprAt(SgprIndexUnusedMin = -1); 813 usesVgprAt(VgprIndexUnusedMin = -1); 814 } 815 816 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 817 switch (RegKind) { 818 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 819 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 820 default: break; 821 } 822 } 823 }; 824 825 class AMDGPUAsmParser : public MCTargetAsmParser { 826 MCAsmParser &Parser; 827 828 // Number of extra operands parsed after the first optional operand. 829 // This may be necessary to skip hardcoded mandatory operands. 830 static const unsigned MAX_OPR_LOOKAHEAD = 8; 831 832 unsigned ForcedEncodingSize = 0; 833 bool ForcedDPP = false; 834 bool ForcedSDWA = false; 835 KernelScopeInfo KernelScope; 836 837 /// @name Auto-generated Match Functions 838 /// { 839 840 #define GET_ASSEMBLER_HEADER 841 #include "AMDGPUGenAsmMatcher.inc" 842 843 /// } 844 845 private: 846 bool ParseAsAbsoluteExpression(uint32_t &Ret); 847 bool OutOfRangeError(SMRange Range); 848 /// Calculate VGPR/SGPR blocks required for given target, reserved 849 /// registers, and user-specified NextFreeXGPR values. 850 /// 851 /// \param Features [in] Target features, used for bug corrections. 852 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 853 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 854 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 855 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 856 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 857 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 858 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 859 /// \param VGPRBlocks [out] Result VGPR block count. 860 /// \param SGPRBlocks [out] Result SGPR block count. 861 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 862 bool FlatScrUsed, bool XNACKUsed, 863 unsigned NextFreeVGPR, SMRange VGPRRange, 864 unsigned NextFreeSGPR, SMRange SGPRRange, 865 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 866 bool ParseDirectiveAMDGCNTarget(); 867 bool ParseDirectiveAMDHSAKernel(); 868 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 869 bool ParseDirectiveHSACodeObjectVersion(); 870 bool ParseDirectiveHSACodeObjectISA(); 871 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 872 bool ParseDirectiveAMDKernelCodeT(); 873 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 874 bool ParseDirectiveAMDGPUHsaKernel(); 875 876 bool ParseDirectiveISAVersion(); 877 bool ParseDirectiveHSAMetadata(); 878 bool ParseDirectivePALMetadata(); 879 880 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 881 RegisterKind RegKind, unsigned Reg1, 882 unsigned RegNum); 883 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 884 unsigned& RegNum, unsigned& RegWidth, 885 unsigned *DwordRegIndex); 886 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 887 void initializeGprCountSymbol(RegisterKind RegKind); 888 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 889 unsigned RegWidth); 890 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 891 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 892 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 893 bool IsGdsHardcoded); 894 895 public: 896 enum AMDGPUMatchResultTy { 897 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 898 }; 899 900 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 901 902 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 903 const MCInstrInfo &MII, 904 const MCTargetOptions &Options) 905 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 906 MCAsmParserExtension::Initialize(Parser); 907 908 if (getFeatureBits().none()) { 909 // Set default features. 910 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 911 } 912 913 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 914 915 { 916 // TODO: make those pre-defined variables read-only. 917 // Currently there is none suitable machinery in the core llvm-mc for this. 918 // MCSymbol::isRedefinable is intended for another purpose, and 919 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 920 AMDGPU::IsaInfo::IsaVersion ISA = 921 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 922 MCContext &Ctx = getContext(); 923 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 924 MCSymbol *Sym = 925 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 926 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 927 } else { 928 MCSymbol *Sym = 929 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 930 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 931 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 932 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 933 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 934 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 935 } 936 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 937 initializeGprCountSymbol(IS_VGPR); 938 initializeGprCountSymbol(IS_SGPR); 939 } else 940 KernelScope.initialize(getContext()); 941 } 942 } 943 944 bool hasXNACK() const { 945 return AMDGPU::hasXNACK(getSTI()); 946 } 947 948 bool hasMIMG_R128() const { 949 return AMDGPU::hasMIMG_R128(getSTI()); 950 } 951 952 bool hasPackedD16() const { 953 return AMDGPU::hasPackedD16(getSTI()); 954 } 955 956 bool isSI() const { 957 return AMDGPU::isSI(getSTI()); 958 } 959 960 bool isCI() const { 961 return AMDGPU::isCI(getSTI()); 962 } 963 964 bool isVI() const { 965 return AMDGPU::isVI(getSTI()); 966 } 967 968 bool isGFX9() const { 969 return AMDGPU::isGFX9(getSTI()); 970 } 971 972 bool hasInv2PiInlineImm() const { 973 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 974 } 975 976 bool hasFlatOffsets() const { 977 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 978 } 979 980 bool hasSGPR102_SGPR103() const { 981 return !isVI(); 982 } 983 984 bool hasIntClamp() const { 985 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 986 } 987 988 AMDGPUTargetStreamer &getTargetStreamer() { 989 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 990 return static_cast<AMDGPUTargetStreamer &>(TS); 991 } 992 993 const MCRegisterInfo *getMRI() const { 994 // We need this const_cast because for some reason getContext() is not const 995 // in MCAsmParser. 996 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 997 } 998 999 const MCInstrInfo *getMII() const { 1000 return &MII; 1001 } 1002 1003 const FeatureBitset &getFeatureBits() const { 1004 return getSTI().getFeatureBits(); 1005 } 1006 1007 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1008 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1009 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1010 1011 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1012 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1013 bool isForcedDPP() const { return ForcedDPP; } 1014 bool isForcedSDWA() const { return ForcedSDWA; } 1015 ArrayRef<unsigned> getMatchedVariants() const; 1016 1017 std::unique_ptr<AMDGPUOperand> parseRegister(); 1018 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1019 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1020 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1021 unsigned Kind) override; 1022 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1023 OperandVector &Operands, MCStreamer &Out, 1024 uint64_t &ErrorInfo, 1025 bool MatchingInlineAsm) override; 1026 bool ParseDirective(AsmToken DirectiveID) override; 1027 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1028 StringRef parseMnemonicSuffix(StringRef Name); 1029 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1030 SMLoc NameLoc, OperandVector &Operands) override; 1031 //bool ProcessInstruction(MCInst &Inst); 1032 1033 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1034 1035 OperandMatchResultTy 1036 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1037 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1038 bool (*ConvertResult)(int64_t &) = nullptr); 1039 1040 OperandMatchResultTy parseOperandArrayWithPrefix( 1041 const char *Prefix, 1042 OperandVector &Operands, 1043 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1044 bool (*ConvertResult)(int64_t&) = nullptr); 1045 1046 OperandMatchResultTy 1047 parseNamedBit(const char *Name, OperandVector &Operands, 1048 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1049 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1050 StringRef &Value); 1051 1052 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1053 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1054 OperandMatchResultTy parseReg(OperandVector &Operands); 1055 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1056 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1057 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1058 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1059 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1060 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1061 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1062 1063 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1064 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1065 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1066 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1067 1068 bool parseCnt(int64_t &IntVal); 1069 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1070 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1071 1072 private: 1073 struct OperandInfoTy { 1074 int64_t Id; 1075 bool IsSymbolic = false; 1076 1077 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1078 }; 1079 1080 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1081 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1082 1083 void errorExpTgt(); 1084 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1085 1086 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1087 bool validateConstantBusLimitations(const MCInst &Inst); 1088 bool validateEarlyClobberLimitations(const MCInst &Inst); 1089 bool validateIntClampSupported(const MCInst &Inst); 1090 bool validateMIMGAtomicDMask(const MCInst &Inst); 1091 bool validateMIMGGatherDMask(const MCInst &Inst); 1092 bool validateMIMGDataSize(const MCInst &Inst); 1093 bool validateMIMGD16(const MCInst &Inst); 1094 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1095 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1096 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1097 1098 bool trySkipId(const StringRef Id); 1099 bool trySkipToken(const AsmToken::TokenKind Kind); 1100 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1101 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1102 bool parseExpr(int64_t &Imm); 1103 1104 public: 1105 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1106 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1107 1108 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1109 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1110 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1111 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1112 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1113 1114 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1115 const unsigned MinVal, 1116 const unsigned MaxVal, 1117 const StringRef ErrMsg); 1118 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1119 bool parseSwizzleOffset(int64_t &Imm); 1120 bool parseSwizzleMacro(int64_t &Imm); 1121 bool parseSwizzleQuadPerm(int64_t &Imm); 1122 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1123 bool parseSwizzleBroadcast(int64_t &Imm); 1124 bool parseSwizzleSwap(int64_t &Imm); 1125 bool parseSwizzleReverse(int64_t &Imm); 1126 1127 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1128 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1129 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1130 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1131 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1132 1133 AMDGPUOperand::Ptr defaultGLC() const; 1134 AMDGPUOperand::Ptr defaultSLC() const; 1135 1136 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1137 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1138 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1139 AMDGPUOperand::Ptr defaultOffsetU12() const; 1140 AMDGPUOperand::Ptr defaultOffsetS13() const; 1141 1142 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1143 1144 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1145 OptionalImmIndexMap &OptionalIdx); 1146 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1148 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1149 1150 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1151 1152 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1153 bool IsAtomic = false); 1154 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1155 1156 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1157 AMDGPUOperand::Ptr defaultRowMask() const; 1158 AMDGPUOperand::Ptr defaultBankMask() const; 1159 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1160 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1161 1162 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1163 AMDGPUOperand::ImmTy Type); 1164 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1165 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1166 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1167 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1168 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1169 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1170 uint64_t BasicInstType, bool skipVcc = false); 1171 }; 1172 1173 struct OptionalOperand { 1174 const char *Name; 1175 AMDGPUOperand::ImmTy Type; 1176 bool IsBit; 1177 bool (*ConvertResult)(int64_t&); 1178 }; 1179 1180 } // end anonymous namespace 1181 1182 // May be called with integer type with equivalent bitwidth. 1183 static const fltSemantics *getFltSemantics(unsigned Size) { 1184 switch (Size) { 1185 case 4: 1186 return &APFloat::IEEEsingle(); 1187 case 8: 1188 return &APFloat::IEEEdouble(); 1189 case 2: 1190 return &APFloat::IEEEhalf(); 1191 default: 1192 llvm_unreachable("unsupported fp type"); 1193 } 1194 } 1195 1196 static const fltSemantics *getFltSemantics(MVT VT) { 1197 return getFltSemantics(VT.getSizeInBits() / 8); 1198 } 1199 1200 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1201 switch (OperandType) { 1202 case AMDGPU::OPERAND_REG_IMM_INT32: 1203 case AMDGPU::OPERAND_REG_IMM_FP32: 1204 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1205 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1206 return &APFloat::IEEEsingle(); 1207 case AMDGPU::OPERAND_REG_IMM_INT64: 1208 case AMDGPU::OPERAND_REG_IMM_FP64: 1209 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1210 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1211 return &APFloat::IEEEdouble(); 1212 case AMDGPU::OPERAND_REG_IMM_INT16: 1213 case AMDGPU::OPERAND_REG_IMM_FP16: 1214 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1215 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1218 return &APFloat::IEEEhalf(); 1219 default: 1220 llvm_unreachable("unsupported fp type"); 1221 } 1222 } 1223 1224 //===----------------------------------------------------------------------===// 1225 // Operand 1226 //===----------------------------------------------------------------------===// 1227 1228 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1229 bool Lost; 1230 1231 // Convert literal to single precision 1232 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1233 APFloat::rmNearestTiesToEven, 1234 &Lost); 1235 // We allow precision lost but not overflow or underflow 1236 if (Status != APFloat::opOK && 1237 Lost && 1238 ((Status & APFloat::opOverflow) != 0 || 1239 (Status & APFloat::opUnderflow) != 0)) { 1240 return false; 1241 } 1242 1243 return true; 1244 } 1245 1246 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1247 if (!isImmTy(ImmTyNone)) { 1248 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1249 return false; 1250 } 1251 // TODO: We should avoid using host float here. It would be better to 1252 // check the float bit values which is what a few other places do. 1253 // We've had bot failures before due to weird NaN support on mips hosts. 1254 1255 APInt Literal(64, Imm.Val); 1256 1257 if (Imm.IsFPImm) { // We got fp literal token 1258 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1259 return AMDGPU::isInlinableLiteral64(Imm.Val, 1260 AsmParser->hasInv2PiInlineImm()); 1261 } 1262 1263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1264 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1265 return false; 1266 1267 if (type.getScalarSizeInBits() == 16) { 1268 return AMDGPU::isInlinableLiteral16( 1269 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1270 AsmParser->hasInv2PiInlineImm()); 1271 } 1272 1273 // Check if single precision literal is inlinable 1274 return AMDGPU::isInlinableLiteral32( 1275 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1276 AsmParser->hasInv2PiInlineImm()); 1277 } 1278 1279 // We got int literal token. 1280 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1281 return AMDGPU::isInlinableLiteral64(Imm.Val, 1282 AsmParser->hasInv2PiInlineImm()); 1283 } 1284 1285 if (type.getScalarSizeInBits() == 16) { 1286 return AMDGPU::isInlinableLiteral16( 1287 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1288 AsmParser->hasInv2PiInlineImm()); 1289 } 1290 1291 return AMDGPU::isInlinableLiteral32( 1292 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1293 AsmParser->hasInv2PiInlineImm()); 1294 } 1295 1296 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1297 // Check that this immediate can be added as literal 1298 if (!isImmTy(ImmTyNone)) { 1299 return false; 1300 } 1301 1302 if (!Imm.IsFPImm) { 1303 // We got int literal token. 1304 1305 if (type == MVT::f64 && hasFPModifiers()) { 1306 // Cannot apply fp modifiers to int literals preserving the same semantics 1307 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1308 // disable these cases. 1309 return false; 1310 } 1311 1312 unsigned Size = type.getSizeInBits(); 1313 if (Size == 64) 1314 Size = 32; 1315 1316 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1317 // types. 1318 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1319 } 1320 1321 // We got fp literal token 1322 if (type == MVT::f64) { // Expected 64-bit fp operand 1323 // We would set low 64-bits of literal to zeroes but we accept this literals 1324 return true; 1325 } 1326 1327 if (type == MVT::i64) { // Expected 64-bit int operand 1328 // We don't allow fp literals in 64-bit integer instructions. It is 1329 // unclear how we should encode them. 1330 return false; 1331 } 1332 1333 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1334 return canLosslesslyConvertToFPType(FPLiteral, type); 1335 } 1336 1337 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1338 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1339 } 1340 1341 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1342 if (AsmParser->isVI()) 1343 return isVReg(); 1344 else if (AsmParser->isGFX9()) 1345 return isRegKind() || isInlinableImm(type); 1346 else 1347 return false; 1348 } 1349 1350 bool AMDGPUOperand::isSDWAFP16Operand() const { 1351 return isSDWAOperand(MVT::f16); 1352 } 1353 1354 bool AMDGPUOperand::isSDWAFP32Operand() const { 1355 return isSDWAOperand(MVT::f32); 1356 } 1357 1358 bool AMDGPUOperand::isSDWAInt16Operand() const { 1359 return isSDWAOperand(MVT::i16); 1360 } 1361 1362 bool AMDGPUOperand::isSDWAInt32Operand() const { 1363 return isSDWAOperand(MVT::i32); 1364 } 1365 1366 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1367 { 1368 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1369 assert(Size == 2 || Size == 4 || Size == 8); 1370 1371 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1372 1373 if (Imm.Mods.Abs) { 1374 Val &= ~FpSignMask; 1375 } 1376 if (Imm.Mods.Neg) { 1377 Val ^= FpSignMask; 1378 } 1379 1380 return Val; 1381 } 1382 1383 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1384 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1385 Inst.getNumOperands())) { 1386 addLiteralImmOperand(Inst, Imm.Val, 1387 ApplyModifiers & 1388 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1389 } else { 1390 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1391 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1392 } 1393 } 1394 1395 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1396 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1397 auto OpNum = Inst.getNumOperands(); 1398 // Check that this operand accepts literals 1399 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1400 1401 if (ApplyModifiers) { 1402 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1403 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1404 Val = applyInputFPModifiers(Val, Size); 1405 } 1406 1407 APInt Literal(64, Val); 1408 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1409 1410 if (Imm.IsFPImm) { // We got fp literal token 1411 switch (OpTy) { 1412 case AMDGPU::OPERAND_REG_IMM_INT64: 1413 case AMDGPU::OPERAND_REG_IMM_FP64: 1414 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1415 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1416 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1417 AsmParser->hasInv2PiInlineImm())) { 1418 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1419 return; 1420 } 1421 1422 // Non-inlineable 1423 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1424 // For fp operands we check if low 32 bits are zeros 1425 if (Literal.getLoBits(32) != 0) { 1426 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1427 "Can't encode literal as exact 64-bit floating-point operand. " 1428 "Low 32-bits will be set to zero"); 1429 } 1430 1431 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1432 return; 1433 } 1434 1435 // We don't allow fp literals in 64-bit integer instructions. It is 1436 // unclear how we should encode them. This case should be checked earlier 1437 // in predicate methods (isLiteralImm()) 1438 llvm_unreachable("fp literal in 64-bit integer instruction."); 1439 1440 case AMDGPU::OPERAND_REG_IMM_INT32: 1441 case AMDGPU::OPERAND_REG_IMM_FP32: 1442 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1443 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1444 case AMDGPU::OPERAND_REG_IMM_INT16: 1445 case AMDGPU::OPERAND_REG_IMM_FP16: 1446 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1447 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1448 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1449 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1450 bool lost; 1451 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1452 // Convert literal to single precision 1453 FPLiteral.convert(*getOpFltSemantics(OpTy), 1454 APFloat::rmNearestTiesToEven, &lost); 1455 // We allow precision lost but not overflow or underflow. This should be 1456 // checked earlier in isLiteralImm() 1457 1458 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1459 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1460 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1461 ImmVal |= (ImmVal << 16); 1462 } 1463 1464 Inst.addOperand(MCOperand::createImm(ImmVal)); 1465 return; 1466 } 1467 default: 1468 llvm_unreachable("invalid operand size"); 1469 } 1470 1471 return; 1472 } 1473 1474 // We got int literal token. 1475 // Only sign extend inline immediates. 1476 // FIXME: No errors on truncation 1477 switch (OpTy) { 1478 case AMDGPU::OPERAND_REG_IMM_INT32: 1479 case AMDGPU::OPERAND_REG_IMM_FP32: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1482 if (isInt<32>(Val) && 1483 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1484 AsmParser->hasInv2PiInlineImm())) { 1485 Inst.addOperand(MCOperand::createImm(Val)); 1486 return; 1487 } 1488 1489 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1490 return; 1491 1492 case AMDGPU::OPERAND_REG_IMM_INT64: 1493 case AMDGPU::OPERAND_REG_IMM_FP64: 1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1496 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1497 Inst.addOperand(MCOperand::createImm(Val)); 1498 return; 1499 } 1500 1501 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1502 return; 1503 1504 case AMDGPU::OPERAND_REG_IMM_INT16: 1505 case AMDGPU::OPERAND_REG_IMM_FP16: 1506 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1508 if (isInt<16>(Val) && 1509 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1510 AsmParser->hasInv2PiInlineImm())) { 1511 Inst.addOperand(MCOperand::createImm(Val)); 1512 return; 1513 } 1514 1515 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1516 return; 1517 1518 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1519 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1520 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1521 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1522 AsmParser->hasInv2PiInlineImm())); 1523 1524 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1525 static_cast<uint32_t>(LiteralVal); 1526 Inst.addOperand(MCOperand::createImm(ImmVal)); 1527 return; 1528 } 1529 default: 1530 llvm_unreachable("invalid operand size"); 1531 } 1532 } 1533 1534 template <unsigned Bitwidth> 1535 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1536 APInt Literal(64, Imm.Val); 1537 1538 if (!Imm.IsFPImm) { 1539 // We got int literal token. 1540 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1541 return; 1542 } 1543 1544 bool Lost; 1545 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1546 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1547 APFloat::rmNearestTiesToEven, &Lost); 1548 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1549 } 1550 1551 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1552 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1553 } 1554 1555 //===----------------------------------------------------------------------===// 1556 // AsmParser 1557 //===----------------------------------------------------------------------===// 1558 1559 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1560 if (Is == IS_VGPR) { 1561 switch (RegWidth) { 1562 default: return -1; 1563 case 1: return AMDGPU::VGPR_32RegClassID; 1564 case 2: return AMDGPU::VReg_64RegClassID; 1565 case 3: return AMDGPU::VReg_96RegClassID; 1566 case 4: return AMDGPU::VReg_128RegClassID; 1567 case 8: return AMDGPU::VReg_256RegClassID; 1568 case 16: return AMDGPU::VReg_512RegClassID; 1569 } 1570 } else if (Is == IS_TTMP) { 1571 switch (RegWidth) { 1572 default: return -1; 1573 case 1: return AMDGPU::TTMP_32RegClassID; 1574 case 2: return AMDGPU::TTMP_64RegClassID; 1575 case 4: return AMDGPU::TTMP_128RegClassID; 1576 case 8: return AMDGPU::TTMP_256RegClassID; 1577 case 16: return AMDGPU::TTMP_512RegClassID; 1578 } 1579 } else if (Is == IS_SGPR) { 1580 switch (RegWidth) { 1581 default: return -1; 1582 case 1: return AMDGPU::SGPR_32RegClassID; 1583 case 2: return AMDGPU::SGPR_64RegClassID; 1584 case 4: return AMDGPU::SGPR_128RegClassID; 1585 case 8: return AMDGPU::SGPR_256RegClassID; 1586 case 16: return AMDGPU::SGPR_512RegClassID; 1587 } 1588 } 1589 return -1; 1590 } 1591 1592 static unsigned getSpecialRegForName(StringRef RegName) { 1593 return StringSwitch<unsigned>(RegName) 1594 .Case("exec", AMDGPU::EXEC) 1595 .Case("vcc", AMDGPU::VCC) 1596 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1597 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1598 .Case("m0", AMDGPU::M0) 1599 .Case("scc", AMDGPU::SCC) 1600 .Case("tba", AMDGPU::TBA) 1601 .Case("tma", AMDGPU::TMA) 1602 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1603 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1604 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1605 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1606 .Case("vcc_lo", AMDGPU::VCC_LO) 1607 .Case("vcc_hi", AMDGPU::VCC_HI) 1608 .Case("exec_lo", AMDGPU::EXEC_LO) 1609 .Case("exec_hi", AMDGPU::EXEC_HI) 1610 .Case("tma_lo", AMDGPU::TMA_LO) 1611 .Case("tma_hi", AMDGPU::TMA_HI) 1612 .Case("tba_lo", AMDGPU::TBA_LO) 1613 .Case("tba_hi", AMDGPU::TBA_HI) 1614 .Default(0); 1615 } 1616 1617 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1618 SMLoc &EndLoc) { 1619 auto R = parseRegister(); 1620 if (!R) return true; 1621 assert(R->isReg()); 1622 RegNo = R->getReg(); 1623 StartLoc = R->getStartLoc(); 1624 EndLoc = R->getEndLoc(); 1625 return false; 1626 } 1627 1628 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1629 RegisterKind RegKind, unsigned Reg1, 1630 unsigned RegNum) { 1631 switch (RegKind) { 1632 case IS_SPECIAL: 1633 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1634 Reg = AMDGPU::EXEC; 1635 RegWidth = 2; 1636 return true; 1637 } 1638 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1639 Reg = AMDGPU::FLAT_SCR; 1640 RegWidth = 2; 1641 return true; 1642 } 1643 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1644 Reg = AMDGPU::XNACK_MASK; 1645 RegWidth = 2; 1646 return true; 1647 } 1648 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1649 Reg = AMDGPU::VCC; 1650 RegWidth = 2; 1651 return true; 1652 } 1653 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1654 Reg = AMDGPU::TBA; 1655 RegWidth = 2; 1656 return true; 1657 } 1658 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1659 Reg = AMDGPU::TMA; 1660 RegWidth = 2; 1661 return true; 1662 } 1663 return false; 1664 case IS_VGPR: 1665 case IS_SGPR: 1666 case IS_TTMP: 1667 if (Reg1 != Reg + RegWidth) { 1668 return false; 1669 } 1670 RegWidth++; 1671 return true; 1672 default: 1673 llvm_unreachable("unexpected register kind"); 1674 } 1675 } 1676 1677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1678 unsigned &RegNum, unsigned &RegWidth, 1679 unsigned *DwordRegIndex) { 1680 if (DwordRegIndex) { *DwordRegIndex = 0; } 1681 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1682 if (getLexer().is(AsmToken::Identifier)) { 1683 StringRef RegName = Parser.getTok().getString(); 1684 if ((Reg = getSpecialRegForName(RegName))) { 1685 Parser.Lex(); 1686 RegKind = IS_SPECIAL; 1687 } else { 1688 unsigned RegNumIndex = 0; 1689 if (RegName[0] == 'v') { 1690 RegNumIndex = 1; 1691 RegKind = IS_VGPR; 1692 } else if (RegName[0] == 's') { 1693 RegNumIndex = 1; 1694 RegKind = IS_SGPR; 1695 } else if (RegName.startswith("ttmp")) { 1696 RegNumIndex = strlen("ttmp"); 1697 RegKind = IS_TTMP; 1698 } else { 1699 return false; 1700 } 1701 if (RegName.size() > RegNumIndex) { 1702 // Single 32-bit register: vXX. 1703 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1704 return false; 1705 Parser.Lex(); 1706 RegWidth = 1; 1707 } else { 1708 // Range of registers: v[XX:YY]. ":YY" is optional. 1709 Parser.Lex(); 1710 int64_t RegLo, RegHi; 1711 if (getLexer().isNot(AsmToken::LBrac)) 1712 return false; 1713 Parser.Lex(); 1714 1715 if (getParser().parseAbsoluteExpression(RegLo)) 1716 return false; 1717 1718 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1719 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1720 return false; 1721 Parser.Lex(); 1722 1723 if (isRBrace) { 1724 RegHi = RegLo; 1725 } else { 1726 if (getParser().parseAbsoluteExpression(RegHi)) 1727 return false; 1728 1729 if (getLexer().isNot(AsmToken::RBrac)) 1730 return false; 1731 Parser.Lex(); 1732 } 1733 RegNum = (unsigned) RegLo; 1734 RegWidth = (RegHi - RegLo) + 1; 1735 } 1736 } 1737 } else if (getLexer().is(AsmToken::LBrac)) { 1738 // List of consecutive registers: [s0,s1,s2,s3] 1739 Parser.Lex(); 1740 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1741 return false; 1742 if (RegWidth != 1) 1743 return false; 1744 RegisterKind RegKind1; 1745 unsigned Reg1, RegNum1, RegWidth1; 1746 do { 1747 if (getLexer().is(AsmToken::Comma)) { 1748 Parser.Lex(); 1749 } else if (getLexer().is(AsmToken::RBrac)) { 1750 Parser.Lex(); 1751 break; 1752 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1753 if (RegWidth1 != 1) { 1754 return false; 1755 } 1756 if (RegKind1 != RegKind) { 1757 return false; 1758 } 1759 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1760 return false; 1761 } 1762 } else { 1763 return false; 1764 } 1765 } while (true); 1766 } else { 1767 return false; 1768 } 1769 switch (RegKind) { 1770 case IS_SPECIAL: 1771 RegNum = 0; 1772 RegWidth = 1; 1773 break; 1774 case IS_VGPR: 1775 case IS_SGPR: 1776 case IS_TTMP: 1777 { 1778 unsigned Size = 1; 1779 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1780 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1781 Size = std::min(RegWidth, 4u); 1782 } 1783 if (RegNum % Size != 0) 1784 return false; 1785 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1786 RegNum = RegNum / Size; 1787 int RCID = getRegClass(RegKind, RegWidth); 1788 if (RCID == -1) 1789 return false; 1790 const MCRegisterClass RC = TRI->getRegClass(RCID); 1791 if (RegNum >= RC.getNumRegs()) 1792 return false; 1793 Reg = RC.getRegister(RegNum); 1794 break; 1795 } 1796 1797 default: 1798 llvm_unreachable("unexpected register kind"); 1799 } 1800 1801 if (!subtargetHasRegister(*TRI, Reg)) 1802 return false; 1803 return true; 1804 } 1805 1806 Optional<StringRef> 1807 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1808 switch (RegKind) { 1809 case IS_VGPR: 1810 return StringRef(".amdgcn.next_free_vgpr"); 1811 case IS_SGPR: 1812 return StringRef(".amdgcn.next_free_sgpr"); 1813 default: 1814 return None; 1815 } 1816 } 1817 1818 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1819 auto SymbolName = getGprCountSymbolName(RegKind); 1820 assert(SymbolName && "initializing invalid register kind"); 1821 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1822 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1823 } 1824 1825 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1826 unsigned DwordRegIndex, 1827 unsigned RegWidth) { 1828 // Symbols are only defined for GCN targets 1829 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) 1830 return true; 1831 1832 auto SymbolName = getGprCountSymbolName(RegKind); 1833 if (!SymbolName) 1834 return true; 1835 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1836 1837 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1838 int64_t OldCount; 1839 1840 if (!Sym->isVariable()) 1841 return !Error(getParser().getTok().getLoc(), 1842 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1843 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1844 return !Error( 1845 getParser().getTok().getLoc(), 1846 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1847 1848 if (OldCount <= NewMax) 1849 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1850 1851 return true; 1852 } 1853 1854 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1855 const auto &Tok = Parser.getTok(); 1856 SMLoc StartLoc = Tok.getLoc(); 1857 SMLoc EndLoc = Tok.getEndLoc(); 1858 RegisterKind RegKind; 1859 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1860 1861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1862 return nullptr; 1863 } 1864 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1865 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1866 return nullptr; 1867 } else 1868 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1870 } 1871 1872 bool 1873 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1874 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1875 (getLexer().getKind() == AsmToken::Integer || 1876 getLexer().getKind() == AsmToken::Real)) { 1877 // This is a workaround for handling operands like these: 1878 // |1.0| 1879 // |-1| 1880 // This syntax is not compatible with syntax of standard 1881 // MC expressions (due to the trailing '|'). 1882 1883 SMLoc EndLoc; 1884 const MCExpr *Expr; 1885 1886 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1887 return true; 1888 } 1889 1890 return !Expr->evaluateAsAbsolute(Val); 1891 } 1892 1893 return getParser().parseAbsoluteExpression(Val); 1894 } 1895 1896 OperandMatchResultTy 1897 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1898 // TODO: add syntactic sugar for 1/(2*PI) 1899 bool Minus = false; 1900 if (getLexer().getKind() == AsmToken::Minus) { 1901 const AsmToken NextToken = getLexer().peekTok(); 1902 if (!NextToken.is(AsmToken::Integer) && 1903 !NextToken.is(AsmToken::Real)) { 1904 return MatchOperand_NoMatch; 1905 } 1906 Minus = true; 1907 Parser.Lex(); 1908 } 1909 1910 SMLoc S = Parser.getTok().getLoc(); 1911 switch(getLexer().getKind()) { 1912 case AsmToken::Integer: { 1913 int64_t IntVal; 1914 if (parseAbsoluteExpr(IntVal, AbsMod)) 1915 return MatchOperand_ParseFail; 1916 if (Minus) 1917 IntVal *= -1; 1918 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1919 return MatchOperand_Success; 1920 } 1921 case AsmToken::Real: { 1922 int64_t IntVal; 1923 if (parseAbsoluteExpr(IntVal, AbsMod)) 1924 return MatchOperand_ParseFail; 1925 1926 APFloat F(BitsToDouble(IntVal)); 1927 if (Minus) 1928 F.changeSign(); 1929 Operands.push_back( 1930 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1931 AMDGPUOperand::ImmTyNone, true)); 1932 return MatchOperand_Success; 1933 } 1934 default: 1935 return MatchOperand_NoMatch; 1936 } 1937 } 1938 1939 OperandMatchResultTy 1940 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1941 if (auto R = parseRegister()) { 1942 assert(R->isReg()); 1943 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1944 Operands.push_back(std::move(R)); 1945 return MatchOperand_Success; 1946 } 1947 return MatchOperand_NoMatch; 1948 } 1949 1950 OperandMatchResultTy 1951 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1952 auto res = parseImm(Operands, AbsMod); 1953 if (res != MatchOperand_NoMatch) { 1954 return res; 1955 } 1956 1957 return parseReg(Operands); 1958 } 1959 1960 OperandMatchResultTy 1961 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1962 bool AllowImm) { 1963 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1964 1965 if (getLexer().getKind()== AsmToken::Minus) { 1966 const AsmToken NextToken = getLexer().peekTok(); 1967 1968 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1969 if (NextToken.is(AsmToken::Minus)) { 1970 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1971 return MatchOperand_ParseFail; 1972 } 1973 1974 // '-' followed by an integer literal N should be interpreted as integer 1975 // negation rather than a floating-point NEG modifier applied to N. 1976 // Beside being contr-intuitive, such use of floating-point NEG modifier 1977 // results in different meaning of integer literals used with VOP1/2/C 1978 // and VOP3, for example: 1979 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1980 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1981 // Negative fp literals should be handled likewise for unifomtity 1982 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1983 Parser.Lex(); 1984 Negate = true; 1985 } 1986 } 1987 1988 if (getLexer().getKind() == AsmToken::Identifier && 1989 Parser.getTok().getString() == "neg") { 1990 if (Negate) { 1991 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1992 return MatchOperand_ParseFail; 1993 } 1994 Parser.Lex(); 1995 Negate2 = true; 1996 if (getLexer().isNot(AsmToken::LParen)) { 1997 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 1998 return MatchOperand_ParseFail; 1999 } 2000 Parser.Lex(); 2001 } 2002 2003 if (getLexer().getKind() == AsmToken::Identifier && 2004 Parser.getTok().getString() == "abs") { 2005 Parser.Lex(); 2006 Abs2 = true; 2007 if (getLexer().isNot(AsmToken::LParen)) { 2008 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2009 return MatchOperand_ParseFail; 2010 } 2011 Parser.Lex(); 2012 } 2013 2014 if (getLexer().getKind() == AsmToken::Pipe) { 2015 if (Abs2) { 2016 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2017 return MatchOperand_ParseFail; 2018 } 2019 Parser.Lex(); 2020 Abs = true; 2021 } 2022 2023 OperandMatchResultTy Res; 2024 if (AllowImm) { 2025 Res = parseRegOrImm(Operands, Abs); 2026 } else { 2027 Res = parseReg(Operands); 2028 } 2029 if (Res != MatchOperand_Success) { 2030 return Res; 2031 } 2032 2033 AMDGPUOperand::Modifiers Mods; 2034 if (Abs) { 2035 if (getLexer().getKind() != AsmToken::Pipe) { 2036 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2037 return MatchOperand_ParseFail; 2038 } 2039 Parser.Lex(); 2040 Mods.Abs = true; 2041 } 2042 if (Abs2) { 2043 if (getLexer().isNot(AsmToken::RParen)) { 2044 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2045 return MatchOperand_ParseFail; 2046 } 2047 Parser.Lex(); 2048 Mods.Abs = true; 2049 } 2050 2051 if (Negate) { 2052 Mods.Neg = true; 2053 } else if (Negate2) { 2054 if (getLexer().isNot(AsmToken::RParen)) { 2055 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2056 return MatchOperand_ParseFail; 2057 } 2058 Parser.Lex(); 2059 Mods.Neg = true; 2060 } 2061 2062 if (Mods.hasFPModifiers()) { 2063 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2064 Op.setModifiers(Mods); 2065 } 2066 return MatchOperand_Success; 2067 } 2068 2069 OperandMatchResultTy 2070 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2071 bool AllowImm) { 2072 bool Sext = false; 2073 2074 if (getLexer().getKind() == AsmToken::Identifier && 2075 Parser.getTok().getString() == "sext") { 2076 Parser.Lex(); 2077 Sext = true; 2078 if (getLexer().isNot(AsmToken::LParen)) { 2079 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2080 return MatchOperand_ParseFail; 2081 } 2082 Parser.Lex(); 2083 } 2084 2085 OperandMatchResultTy Res; 2086 if (AllowImm) { 2087 Res = parseRegOrImm(Operands); 2088 } else { 2089 Res = parseReg(Operands); 2090 } 2091 if (Res != MatchOperand_Success) { 2092 return Res; 2093 } 2094 2095 AMDGPUOperand::Modifiers Mods; 2096 if (Sext) { 2097 if (getLexer().isNot(AsmToken::RParen)) { 2098 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2099 return MatchOperand_ParseFail; 2100 } 2101 Parser.Lex(); 2102 Mods.Sext = true; 2103 } 2104 2105 if (Mods.hasIntModifiers()) { 2106 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2107 Op.setModifiers(Mods); 2108 } 2109 2110 return MatchOperand_Success; 2111 } 2112 2113 OperandMatchResultTy 2114 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2115 return parseRegOrImmWithFPInputMods(Operands, false); 2116 } 2117 2118 OperandMatchResultTy 2119 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2120 return parseRegOrImmWithIntInputMods(Operands, false); 2121 } 2122 2123 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2124 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2125 if (Reg) { 2126 Operands.push_back(std::move(Reg)); 2127 return MatchOperand_Success; 2128 } 2129 2130 const AsmToken &Tok = Parser.getTok(); 2131 if (Tok.getString() == "off") { 2132 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2133 AMDGPUOperand::ImmTyOff, false)); 2134 Parser.Lex(); 2135 return MatchOperand_Success; 2136 } 2137 2138 return MatchOperand_NoMatch; 2139 } 2140 2141 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2142 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2143 2144 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2145 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2146 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2147 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2148 return Match_InvalidOperand; 2149 2150 if ((TSFlags & SIInstrFlags::VOP3) && 2151 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2152 getForcedEncodingSize() != 64) 2153 return Match_PreferE32; 2154 2155 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2156 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2157 // v_mac_f32/16 allow only dst_sel == DWORD; 2158 auto OpNum = 2159 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2160 const auto &Op = Inst.getOperand(OpNum); 2161 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2162 return Match_InvalidOperand; 2163 } 2164 } 2165 2166 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2167 // FIXME: Produces error without correct column reported. 2168 auto OpNum = 2169 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2170 const auto &Op = Inst.getOperand(OpNum); 2171 if (Op.getImm() != 0) 2172 return Match_InvalidOperand; 2173 } 2174 2175 return Match_Success; 2176 } 2177 2178 // What asm variants we should check 2179 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2180 if (getForcedEncodingSize() == 32) { 2181 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2182 return makeArrayRef(Variants); 2183 } 2184 2185 if (isForcedVOP3()) { 2186 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2187 return makeArrayRef(Variants); 2188 } 2189 2190 if (isForcedSDWA()) { 2191 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2192 AMDGPUAsmVariants::SDWA9}; 2193 return makeArrayRef(Variants); 2194 } 2195 2196 if (isForcedDPP()) { 2197 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2198 return makeArrayRef(Variants); 2199 } 2200 2201 static const unsigned Variants[] = { 2202 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2203 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2204 }; 2205 2206 return makeArrayRef(Variants); 2207 } 2208 2209 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2210 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2211 const unsigned Num = Desc.getNumImplicitUses(); 2212 for (unsigned i = 0; i < Num; ++i) { 2213 unsigned Reg = Desc.ImplicitUses[i]; 2214 switch (Reg) { 2215 case AMDGPU::FLAT_SCR: 2216 case AMDGPU::VCC: 2217 case AMDGPU::M0: 2218 return Reg; 2219 default: 2220 break; 2221 } 2222 } 2223 return AMDGPU::NoRegister; 2224 } 2225 2226 // NB: This code is correct only when used to check constant 2227 // bus limitations because GFX7 support no f16 inline constants. 2228 // Note that there are no cases when a GFX7 opcode violates 2229 // constant bus limitations due to the use of an f16 constant. 2230 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2231 unsigned OpIdx) const { 2232 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2233 2234 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2235 return false; 2236 } 2237 2238 const MCOperand &MO = Inst.getOperand(OpIdx); 2239 2240 int64_t Val = MO.getImm(); 2241 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2242 2243 switch (OpSize) { // expected operand size 2244 case 8: 2245 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2246 case 4: 2247 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2248 case 2: { 2249 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2250 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2251 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2252 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2253 } else { 2254 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2255 } 2256 } 2257 default: 2258 llvm_unreachable("invalid operand size"); 2259 } 2260 } 2261 2262 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2263 const MCOperand &MO = Inst.getOperand(OpIdx); 2264 if (MO.isImm()) { 2265 return !isInlineConstant(Inst, OpIdx); 2266 } 2267 return !MO.isReg() || 2268 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2269 } 2270 2271 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2272 const unsigned Opcode = Inst.getOpcode(); 2273 const MCInstrDesc &Desc = MII.get(Opcode); 2274 unsigned ConstantBusUseCount = 0; 2275 2276 if (Desc.TSFlags & 2277 (SIInstrFlags::VOPC | 2278 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2279 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2280 SIInstrFlags::SDWA)) { 2281 // Check special imm operands (used by madmk, etc) 2282 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2283 ++ConstantBusUseCount; 2284 } 2285 2286 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2287 if (SGPRUsed != AMDGPU::NoRegister) { 2288 ++ConstantBusUseCount; 2289 } 2290 2291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2294 2295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2296 2297 for (int OpIdx : OpIndices) { 2298 if (OpIdx == -1) break; 2299 2300 const MCOperand &MO = Inst.getOperand(OpIdx); 2301 if (usesConstantBus(Inst, OpIdx)) { 2302 if (MO.isReg()) { 2303 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2304 // Pairs of registers with a partial intersections like these 2305 // s0, s[0:1] 2306 // flat_scratch_lo, flat_scratch 2307 // flat_scratch_lo, flat_scratch_hi 2308 // are theoretically valid but they are disabled anyway. 2309 // Note that this code mimics SIInstrInfo::verifyInstruction 2310 if (Reg != SGPRUsed) { 2311 ++ConstantBusUseCount; 2312 } 2313 SGPRUsed = Reg; 2314 } else { // Expression or a literal 2315 ++ConstantBusUseCount; 2316 } 2317 } 2318 } 2319 } 2320 2321 return ConstantBusUseCount <= 1; 2322 } 2323 2324 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2325 const unsigned Opcode = Inst.getOpcode(); 2326 const MCInstrDesc &Desc = MII.get(Opcode); 2327 2328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2329 if (DstIdx == -1 || 2330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2331 return true; 2332 } 2333 2334 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2335 2336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2339 2340 assert(DstIdx != -1); 2341 const MCOperand &Dst = Inst.getOperand(DstIdx); 2342 assert(Dst.isReg()); 2343 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2344 2345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2346 2347 for (int SrcIdx : SrcIndices) { 2348 if (SrcIdx == -1) break; 2349 const MCOperand &Src = Inst.getOperand(SrcIdx); 2350 if (Src.isReg()) { 2351 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2352 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2353 return false; 2354 } 2355 } 2356 } 2357 2358 return true; 2359 } 2360 2361 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2362 2363 const unsigned Opc = Inst.getOpcode(); 2364 const MCInstrDesc &Desc = MII.get(Opc); 2365 2366 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2367 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2368 assert(ClampIdx != -1); 2369 return Inst.getOperand(ClampIdx).getImm() == 0; 2370 } 2371 2372 return true; 2373 } 2374 2375 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2376 2377 const unsigned Opc = Inst.getOpcode(); 2378 const MCInstrDesc &Desc = MII.get(Opc); 2379 2380 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2381 return true; 2382 2383 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2384 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2385 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2386 2387 assert(VDataIdx != -1); 2388 assert(DMaskIdx != -1); 2389 assert(TFEIdx != -1); 2390 2391 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2392 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2393 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2394 if (DMask == 0) 2395 DMask = 1; 2396 2397 unsigned DataSize = 2398 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2399 if (hasPackedD16()) { 2400 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2401 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2402 DataSize = (DataSize + 1) / 2; 2403 } 2404 2405 return (VDataSize / 4) == DataSize + TFESize; 2406 } 2407 2408 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2409 2410 const unsigned Opc = Inst.getOpcode(); 2411 const MCInstrDesc &Desc = MII.get(Opc); 2412 2413 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2414 return true; 2415 if (!Desc.mayLoad() || !Desc.mayStore()) 2416 return true; // Not atomic 2417 2418 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2419 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2420 2421 // This is an incomplete check because image_atomic_cmpswap 2422 // may only use 0x3 and 0xf while other atomic operations 2423 // may use 0x1 and 0x3. However these limitations are 2424 // verified when we check that dmask matches dst size. 2425 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2426 } 2427 2428 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2429 2430 const unsigned Opc = Inst.getOpcode(); 2431 const MCInstrDesc &Desc = MII.get(Opc); 2432 2433 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2434 return true; 2435 2436 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2437 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2438 2439 // GATHER4 instructions use dmask in a different fashion compared to 2440 // other MIMG instructions. The only useful DMASK values are 2441 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2442 // (red,red,red,red) etc.) The ISA document doesn't mention 2443 // this. 2444 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2445 } 2446 2447 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2448 2449 const unsigned Opc = Inst.getOpcode(); 2450 const MCInstrDesc &Desc = MII.get(Opc); 2451 2452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2453 return true; 2454 2455 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2456 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2457 if (isCI() || isSI()) 2458 return false; 2459 } 2460 2461 return true; 2462 } 2463 2464 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2465 const SMLoc &IDLoc) { 2466 if (!validateConstantBusLimitations(Inst)) { 2467 Error(IDLoc, 2468 "invalid operand (violates constant bus restrictions)"); 2469 return false; 2470 } 2471 if (!validateEarlyClobberLimitations(Inst)) { 2472 Error(IDLoc, 2473 "destination must be different than all sources"); 2474 return false; 2475 } 2476 if (!validateIntClampSupported(Inst)) { 2477 Error(IDLoc, 2478 "integer clamping is not supported on this GPU"); 2479 return false; 2480 } 2481 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2482 if (!validateMIMGD16(Inst)) { 2483 Error(IDLoc, 2484 "d16 modifier is not supported on this GPU"); 2485 return false; 2486 } 2487 if (!validateMIMGDataSize(Inst)) { 2488 Error(IDLoc, 2489 "image data size does not match dmask and tfe"); 2490 return false; 2491 } 2492 if (!validateMIMGAtomicDMask(Inst)) { 2493 Error(IDLoc, 2494 "invalid atomic image dmask"); 2495 return false; 2496 } 2497 if (!validateMIMGGatherDMask(Inst)) { 2498 Error(IDLoc, 2499 "invalid image_gather dmask: only one bit must be set"); 2500 return false; 2501 } 2502 2503 return true; 2504 } 2505 2506 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2507 unsigned VariantID = 0); 2508 2509 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2510 OperandVector &Operands, 2511 MCStreamer &Out, 2512 uint64_t &ErrorInfo, 2513 bool MatchingInlineAsm) { 2514 MCInst Inst; 2515 unsigned Result = Match_Success; 2516 for (auto Variant : getMatchedVariants()) { 2517 uint64_t EI; 2518 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2519 Variant); 2520 // We order match statuses from least to most specific. We use most specific 2521 // status as resulting 2522 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2523 if ((R == Match_Success) || 2524 (R == Match_PreferE32) || 2525 (R == Match_MissingFeature && Result != Match_PreferE32) || 2526 (R == Match_InvalidOperand && Result != Match_MissingFeature 2527 && Result != Match_PreferE32) || 2528 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2529 && Result != Match_MissingFeature 2530 && Result != Match_PreferE32)) { 2531 Result = R; 2532 ErrorInfo = EI; 2533 } 2534 if (R == Match_Success) 2535 break; 2536 } 2537 2538 switch (Result) { 2539 default: break; 2540 case Match_Success: 2541 if (!validateInstruction(Inst, IDLoc)) { 2542 return true; 2543 } 2544 Inst.setLoc(IDLoc); 2545 Out.EmitInstruction(Inst, getSTI()); 2546 return false; 2547 2548 case Match_MissingFeature: 2549 return Error(IDLoc, "instruction not supported on this GPU"); 2550 2551 case Match_MnemonicFail: { 2552 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2553 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2554 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2555 return Error(IDLoc, "invalid instruction" + Suggestion, 2556 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2557 } 2558 2559 case Match_InvalidOperand: { 2560 SMLoc ErrorLoc = IDLoc; 2561 if (ErrorInfo != ~0ULL) { 2562 if (ErrorInfo >= Operands.size()) { 2563 return Error(IDLoc, "too few operands for instruction"); 2564 } 2565 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2566 if (ErrorLoc == SMLoc()) 2567 ErrorLoc = IDLoc; 2568 } 2569 return Error(ErrorLoc, "invalid operand for instruction"); 2570 } 2571 2572 case Match_PreferE32: 2573 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2574 "should be encoded as e32"); 2575 } 2576 llvm_unreachable("Implement any new match types added!"); 2577 } 2578 2579 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2580 int64_t Tmp = -1; 2581 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2582 return true; 2583 } 2584 if (getParser().parseAbsoluteExpression(Tmp)) { 2585 return true; 2586 } 2587 Ret = static_cast<uint32_t>(Tmp); 2588 return false; 2589 } 2590 2591 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2592 uint32_t &Minor) { 2593 if (ParseAsAbsoluteExpression(Major)) 2594 return TokError("invalid major version"); 2595 2596 if (getLexer().isNot(AsmToken::Comma)) 2597 return TokError("minor version number required, comma expected"); 2598 Lex(); 2599 2600 if (ParseAsAbsoluteExpression(Minor)) 2601 return TokError("invalid minor version"); 2602 2603 return false; 2604 } 2605 2606 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2607 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2608 return TokError("directive only supported for amdgcn architecture"); 2609 2610 std::string Target; 2611 2612 SMLoc TargetStart = getTok().getLoc(); 2613 if (getParser().parseEscapedString(Target)) 2614 return true; 2615 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2616 2617 std::string ExpectedTarget; 2618 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2619 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2620 2621 if (Target != ExpectedTargetOS.str()) 2622 return getParser().Error(TargetRange.Start, "target must match options", 2623 TargetRange); 2624 2625 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2626 return false; 2627 } 2628 2629 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2630 return getParser().Error(Range.Start, "value out of range", Range); 2631 } 2632 2633 bool AMDGPUAsmParser::calculateGPRBlocks( 2634 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2635 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2636 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2637 unsigned &SGPRBlocks) { 2638 // TODO(scott.linder): These calculations are duplicated from 2639 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2640 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); 2641 2642 unsigned NumVGPRs = NextFreeVGPR; 2643 unsigned NumSGPRs = NextFreeSGPR; 2644 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); 2645 2646 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2647 NumSGPRs > MaxAddressableNumSGPRs) 2648 return OutOfRangeError(SGPRRange); 2649 2650 NumSGPRs += 2651 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); 2652 2653 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2654 NumSGPRs > MaxAddressableNumSGPRs) 2655 return OutOfRangeError(SGPRRange); 2656 2657 if (Features.test(FeatureSGPRInitBug)) 2658 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2659 2660 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); 2661 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); 2662 2663 return false; 2664 } 2665 2666 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2667 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2668 return TokError("directive only supported for amdgcn architecture"); 2669 2670 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2671 return TokError("directive only supported for amdhsa OS"); 2672 2673 StringRef KernelName; 2674 if (getParser().parseIdentifier(KernelName)) 2675 return true; 2676 2677 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2678 2679 StringSet<> Seen; 2680 2681 IsaInfo::IsaVersion IVersion = 2682 IsaInfo::getIsaVersion(getSTI().getFeatureBits()); 2683 2684 SMRange VGPRRange; 2685 uint64_t NextFreeVGPR = 0; 2686 SMRange SGPRRange; 2687 uint64_t NextFreeSGPR = 0; 2688 unsigned UserSGPRCount = 0; 2689 bool ReserveVCC = true; 2690 bool ReserveFlatScr = true; 2691 bool ReserveXNACK = hasXNACK(); 2692 2693 while (true) { 2694 while (getLexer().is(AsmToken::EndOfStatement)) 2695 Lex(); 2696 2697 if (getLexer().isNot(AsmToken::Identifier)) 2698 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2699 2700 StringRef ID = getTok().getIdentifier(); 2701 SMRange IDRange = getTok().getLocRange(); 2702 Lex(); 2703 2704 if (ID == ".end_amdhsa_kernel") 2705 break; 2706 2707 if (Seen.find(ID) != Seen.end()) 2708 return TokError(".amdhsa_ directives cannot be repeated"); 2709 Seen.insert(ID); 2710 2711 SMLoc ValStart = getTok().getLoc(); 2712 int64_t IVal; 2713 if (getParser().parseAbsoluteExpression(IVal)) 2714 return true; 2715 SMLoc ValEnd = getTok().getLoc(); 2716 SMRange ValRange = SMRange(ValStart, ValEnd); 2717 2718 if (IVal < 0) 2719 return OutOfRangeError(ValRange); 2720 2721 uint64_t Val = IVal; 2722 2723 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2724 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2725 return OutOfRangeError(RANGE); \ 2726 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2727 2728 if (ID == ".amdhsa_group_segment_fixed_size") { 2729 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2730 return OutOfRangeError(ValRange); 2731 KD.group_segment_fixed_size = Val; 2732 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2733 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2734 return OutOfRangeError(ValRange); 2735 KD.private_segment_fixed_size = Val; 2736 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2737 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2738 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2739 Val, ValRange); 2740 UserSGPRCount++; 2741 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2742 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2743 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2744 ValRange); 2745 UserSGPRCount++; 2746 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2747 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2749 ValRange); 2750 UserSGPRCount++; 2751 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2752 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2753 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2754 Val, ValRange); 2755 UserSGPRCount++; 2756 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2757 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2758 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2759 ValRange); 2760 UserSGPRCount++; 2761 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2762 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2763 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2764 ValRange); 2765 UserSGPRCount++; 2766 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2767 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2768 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2769 Val, ValRange); 2770 UserSGPRCount++; 2771 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2772 PARSE_BITS_ENTRY( 2773 KD.compute_pgm_rsrc2, 2774 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2775 ValRange); 2776 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2778 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2779 ValRange); 2780 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2782 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2783 ValRange); 2784 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2786 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2787 ValRange); 2788 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2790 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2791 ValRange); 2792 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2793 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2794 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2795 ValRange); 2796 } else if (ID == ".amdhsa_next_free_vgpr") { 2797 VGPRRange = ValRange; 2798 NextFreeVGPR = Val; 2799 } else if (ID == ".amdhsa_next_free_sgpr") { 2800 SGPRRange = ValRange; 2801 NextFreeSGPR = Val; 2802 } else if (ID == ".amdhsa_reserve_vcc") { 2803 if (!isUInt<1>(Val)) 2804 return OutOfRangeError(ValRange); 2805 ReserveVCC = Val; 2806 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2807 if (IVersion.Major < 7) 2808 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2809 IDRange); 2810 if (!isUInt<1>(Val)) 2811 return OutOfRangeError(ValRange); 2812 ReserveFlatScr = Val; 2813 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2814 if (IVersion.Major < 8) 2815 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2816 IDRange); 2817 if (!isUInt<1>(Val)) 2818 return OutOfRangeError(ValRange); 2819 ReserveXNACK = Val; 2820 } else if (ID == ".amdhsa_float_round_mode_32") { 2821 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2822 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2823 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2824 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2825 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2826 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2827 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2828 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2829 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2830 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2831 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2832 ValRange); 2833 } else if (ID == ".amdhsa_dx10_clamp") { 2834 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2835 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2836 } else if (ID == ".amdhsa_ieee_mode") { 2837 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2838 Val, ValRange); 2839 } else if (ID == ".amdhsa_fp16_overflow") { 2840 if (IVersion.Major < 9) 2841 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2842 IDRange); 2843 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2844 ValRange); 2845 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2846 PARSE_BITS_ENTRY( 2847 KD.compute_pgm_rsrc2, 2848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2849 ValRange); 2850 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2853 Val, ValRange); 2854 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2855 PARSE_BITS_ENTRY( 2856 KD.compute_pgm_rsrc2, 2857 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2858 ValRange); 2859 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2860 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2861 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2862 Val, ValRange); 2863 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2865 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2866 Val, ValRange); 2867 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2868 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2869 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2870 Val, ValRange); 2871 } else if (ID == ".amdhsa_exception_int_div_zero") { 2872 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2873 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 2874 Val, ValRange); 2875 } else { 2876 return getParser().Error(IDRange.Start, 2877 "unknown .amdhsa_kernel directive", IDRange); 2878 } 2879 2880 #undef PARSE_BITS_ENTRY 2881 } 2882 2883 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 2884 return TokError(".amdhsa_next_free_vgpr directive is required"); 2885 2886 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 2887 return TokError(".amdhsa_next_free_sgpr directive is required"); 2888 2889 unsigned VGPRBlocks; 2890 unsigned SGPRBlocks; 2891 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 2892 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 2893 SGPRRange, VGPRBlocks, SGPRBlocks)) 2894 return true; 2895 2896 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 2897 VGPRBlocks)) 2898 return OutOfRangeError(VGPRRange); 2899 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2900 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 2901 2902 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 2903 SGPRBlocks)) 2904 return OutOfRangeError(SGPRRange); 2905 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2906 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2907 SGPRBlocks); 2908 2909 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 2910 return TokError("too many user SGPRs enabled"); 2911 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 2912 UserSGPRCount); 2913 2914 getTargetStreamer().EmitAmdhsaKernelDescriptor( 2915 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 2916 ReserveFlatScr, ReserveXNACK); 2917 return false; 2918 } 2919 2920 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 2921 uint32_t Major; 2922 uint32_t Minor; 2923 2924 if (ParseDirectiveMajorMinor(Major, Minor)) 2925 return true; 2926 2927 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 2928 return false; 2929 } 2930 2931 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 2932 uint32_t Major; 2933 uint32_t Minor; 2934 uint32_t Stepping; 2935 StringRef VendorName; 2936 StringRef ArchName; 2937 2938 // If this directive has no arguments, then use the ISA version for the 2939 // targeted GPU. 2940 if (getLexer().is(AsmToken::EndOfStatement)) { 2941 AMDGPU::IsaInfo::IsaVersion ISA = 2942 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 2943 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 2944 ISA.Stepping, 2945 "AMD", "AMDGPU"); 2946 return false; 2947 } 2948 2949 if (ParseDirectiveMajorMinor(Major, Minor)) 2950 return true; 2951 2952 if (getLexer().isNot(AsmToken::Comma)) 2953 return TokError("stepping version number required, comma expected"); 2954 Lex(); 2955 2956 if (ParseAsAbsoluteExpression(Stepping)) 2957 return TokError("invalid stepping version"); 2958 2959 if (getLexer().isNot(AsmToken::Comma)) 2960 return TokError("vendor name required, comma expected"); 2961 Lex(); 2962 2963 if (getLexer().isNot(AsmToken::String)) 2964 return TokError("invalid vendor name"); 2965 2966 VendorName = getLexer().getTok().getStringContents(); 2967 Lex(); 2968 2969 if (getLexer().isNot(AsmToken::Comma)) 2970 return TokError("arch name required, comma expected"); 2971 Lex(); 2972 2973 if (getLexer().isNot(AsmToken::String)) 2974 return TokError("invalid arch name"); 2975 2976 ArchName = getLexer().getTok().getStringContents(); 2977 Lex(); 2978 2979 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 2980 VendorName, ArchName); 2981 return false; 2982 } 2983 2984 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 2985 amd_kernel_code_t &Header) { 2986 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 2987 // assembly for backwards compatibility. 2988 if (ID == "max_scratch_backing_memory_byte_size") { 2989 Parser.eatToEndOfStatement(); 2990 return false; 2991 } 2992 2993 SmallString<40> ErrStr; 2994 raw_svector_ostream Err(ErrStr); 2995 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 2996 return TokError(Err.str()); 2997 } 2998 Lex(); 2999 return false; 3000 } 3001 3002 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3003 amd_kernel_code_t Header; 3004 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); 3005 3006 while (true) { 3007 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3008 // will set the current token to EndOfStatement. 3009 while(getLexer().is(AsmToken::EndOfStatement)) 3010 Lex(); 3011 3012 if (getLexer().isNot(AsmToken::Identifier)) 3013 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3014 3015 StringRef ID = getLexer().getTok().getIdentifier(); 3016 Lex(); 3017 3018 if (ID == ".end_amd_kernel_code_t") 3019 break; 3020 3021 if (ParseAMDKernelCodeTValue(ID, Header)) 3022 return true; 3023 } 3024 3025 getTargetStreamer().EmitAMDKernelCodeT(Header); 3026 3027 return false; 3028 } 3029 3030 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3031 if (getLexer().isNot(AsmToken::Identifier)) 3032 return TokError("expected symbol name"); 3033 3034 StringRef KernelName = Parser.getTok().getString(); 3035 3036 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3037 ELF::STT_AMDGPU_HSA_KERNEL); 3038 Lex(); 3039 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3040 KernelScope.initialize(getContext()); 3041 return false; 3042 } 3043 3044 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3045 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3046 return Error(getParser().getTok().getLoc(), 3047 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3048 "architectures"); 3049 } 3050 3051 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3052 3053 std::string ISAVersionStringFromSTI; 3054 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3055 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3056 3057 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3058 return Error(getParser().getTok().getLoc(), 3059 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3060 "arguments specified through the command line"); 3061 } 3062 3063 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3064 Lex(); 3065 3066 return false; 3067 } 3068 3069 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3070 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3071 return Error(getParser().getTok().getLoc(), 3072 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " 3073 "not available on non-amdhsa OSes")).str()); 3074 } 3075 3076 std::string HSAMetadataString; 3077 raw_string_ostream YamlStream(HSAMetadataString); 3078 3079 getLexer().setSkipSpace(false); 3080 3081 bool FoundEnd = false; 3082 while (!getLexer().is(AsmToken::Eof)) { 3083 while (getLexer().is(AsmToken::Space)) { 3084 YamlStream << getLexer().getTok().getString(); 3085 Lex(); 3086 } 3087 3088 if (getLexer().is(AsmToken::Identifier)) { 3089 StringRef ID = getLexer().getTok().getIdentifier(); 3090 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { 3091 Lex(); 3092 FoundEnd = true; 3093 break; 3094 } 3095 } 3096 3097 YamlStream << Parser.parseStringToEndOfStatement() 3098 << getContext().getAsmInfo()->getSeparatorString(); 3099 3100 Parser.eatToEndOfStatement(); 3101 } 3102 3103 getLexer().setSkipSpace(true); 3104 3105 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3106 return TokError(Twine("expected directive ") + 3107 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3108 } 3109 3110 YamlStream.flush(); 3111 3112 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) 3113 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3114 3115 return false; 3116 } 3117 3118 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3119 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3120 return Error(getParser().getTok().getLoc(), 3121 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3122 "not available on non-amdpal OSes")).str()); 3123 } 3124 3125 PALMD::Metadata PALMetadata; 3126 for (;;) { 3127 uint32_t Value; 3128 if (ParseAsAbsoluteExpression(Value)) { 3129 return TokError(Twine("invalid value in ") + 3130 Twine(PALMD::AssemblerDirective)); 3131 } 3132 PALMetadata.push_back(Value); 3133 if (getLexer().isNot(AsmToken::Comma)) 3134 break; 3135 Lex(); 3136 } 3137 getTargetStreamer().EmitPALMetadata(PALMetadata); 3138 return false; 3139 } 3140 3141 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3142 StringRef IDVal = DirectiveID.getString(); 3143 3144 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3145 if (IDVal == ".amdgcn_target") 3146 return ParseDirectiveAMDGCNTarget(); 3147 3148 if (IDVal == ".amdhsa_kernel") 3149 return ParseDirectiveAMDHSAKernel(); 3150 } else { 3151 if (IDVal == ".hsa_code_object_version") 3152 return ParseDirectiveHSACodeObjectVersion(); 3153 3154 if (IDVal == ".hsa_code_object_isa") 3155 return ParseDirectiveHSACodeObjectISA(); 3156 3157 if (IDVal == ".amd_kernel_code_t") 3158 return ParseDirectiveAMDKernelCodeT(); 3159 3160 if (IDVal == ".amdgpu_hsa_kernel") 3161 return ParseDirectiveAMDGPUHsaKernel(); 3162 3163 if (IDVal == ".amd_amdgpu_isa") 3164 return ParseDirectiveISAVersion(); 3165 } 3166 3167 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3168 return ParseDirectiveHSAMetadata(); 3169 3170 if (IDVal == PALMD::AssemblerDirective) 3171 return ParseDirectivePALMetadata(); 3172 3173 return true; 3174 } 3175 3176 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3177 unsigned RegNo) const { 3178 3179 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3180 R.isValid(); ++R) { 3181 if (*R == RegNo) 3182 return isGFX9(); 3183 } 3184 3185 switch (RegNo) { 3186 case AMDGPU::TBA: 3187 case AMDGPU::TBA_LO: 3188 case AMDGPU::TBA_HI: 3189 case AMDGPU::TMA: 3190 case AMDGPU::TMA_LO: 3191 case AMDGPU::TMA_HI: 3192 return !isGFX9(); 3193 case AMDGPU::XNACK_MASK: 3194 case AMDGPU::XNACK_MASK_LO: 3195 case AMDGPU::XNACK_MASK_HI: 3196 return !isCI() && !isSI() && hasXNACK(); 3197 default: 3198 break; 3199 } 3200 3201 if (isCI()) 3202 return true; 3203 3204 if (isSI()) { 3205 // No flat_scr 3206 switch (RegNo) { 3207 case AMDGPU::FLAT_SCR: 3208 case AMDGPU::FLAT_SCR_LO: 3209 case AMDGPU::FLAT_SCR_HI: 3210 return false; 3211 default: 3212 return true; 3213 } 3214 } 3215 3216 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3217 // SI/CI have. 3218 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3219 R.isValid(); ++R) { 3220 if (*R == RegNo) 3221 return false; 3222 } 3223 3224 return true; 3225 } 3226 3227 OperandMatchResultTy 3228 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3229 // Try to parse with a custom parser 3230 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3231 3232 // If we successfully parsed the operand or if there as an error parsing, 3233 // we are done. 3234 // 3235 // If we are parsing after we reach EndOfStatement then this means we 3236 // are appending default values to the Operands list. This is only done 3237 // by custom parser, so we shouldn't continue on to the generic parsing. 3238 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3239 getLexer().is(AsmToken::EndOfStatement)) 3240 return ResTy; 3241 3242 ResTy = parseRegOrImm(Operands); 3243 3244 if (ResTy == MatchOperand_Success) 3245 return ResTy; 3246 3247 const auto &Tok = Parser.getTok(); 3248 SMLoc S = Tok.getLoc(); 3249 3250 const MCExpr *Expr = nullptr; 3251 if (!Parser.parseExpression(Expr)) { 3252 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3253 return MatchOperand_Success; 3254 } 3255 3256 // Possibly this is an instruction flag like 'gds'. 3257 if (Tok.getKind() == AsmToken::Identifier) { 3258 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3259 Parser.Lex(); 3260 return MatchOperand_Success; 3261 } 3262 3263 return MatchOperand_NoMatch; 3264 } 3265 3266 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3267 // Clear any forced encodings from the previous instruction. 3268 setForcedEncodingSize(0); 3269 setForcedDPP(false); 3270 setForcedSDWA(false); 3271 3272 if (Name.endswith("_e64")) { 3273 setForcedEncodingSize(64); 3274 return Name.substr(0, Name.size() - 4); 3275 } else if (Name.endswith("_e32")) { 3276 setForcedEncodingSize(32); 3277 return Name.substr(0, Name.size() - 4); 3278 } else if (Name.endswith("_dpp")) { 3279 setForcedDPP(true); 3280 return Name.substr(0, Name.size() - 4); 3281 } else if (Name.endswith("_sdwa")) { 3282 setForcedSDWA(true); 3283 return Name.substr(0, Name.size() - 5); 3284 } 3285 return Name; 3286 } 3287 3288 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3289 StringRef Name, 3290 SMLoc NameLoc, OperandVector &Operands) { 3291 // Add the instruction mnemonic 3292 Name = parseMnemonicSuffix(Name); 3293 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3294 3295 while (!getLexer().is(AsmToken::EndOfStatement)) { 3296 OperandMatchResultTy Res = parseOperand(Operands, Name); 3297 3298 // Eat the comma or space if there is one. 3299 if (getLexer().is(AsmToken::Comma)) 3300 Parser.Lex(); 3301 3302 switch (Res) { 3303 case MatchOperand_Success: break; 3304 case MatchOperand_ParseFail: 3305 Error(getLexer().getLoc(), "failed parsing operand."); 3306 while (!getLexer().is(AsmToken::EndOfStatement)) { 3307 Parser.Lex(); 3308 } 3309 return true; 3310 case MatchOperand_NoMatch: 3311 Error(getLexer().getLoc(), "not a valid operand."); 3312 while (!getLexer().is(AsmToken::EndOfStatement)) { 3313 Parser.Lex(); 3314 } 3315 return true; 3316 } 3317 } 3318 3319 return false; 3320 } 3321 3322 //===----------------------------------------------------------------------===// 3323 // Utility functions 3324 //===----------------------------------------------------------------------===// 3325 3326 OperandMatchResultTy 3327 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3328 switch(getLexer().getKind()) { 3329 default: return MatchOperand_NoMatch; 3330 case AsmToken::Identifier: { 3331 StringRef Name = Parser.getTok().getString(); 3332 if (!Name.equals(Prefix)) { 3333 return MatchOperand_NoMatch; 3334 } 3335 3336 Parser.Lex(); 3337 if (getLexer().isNot(AsmToken::Colon)) 3338 return MatchOperand_ParseFail; 3339 3340 Parser.Lex(); 3341 3342 bool IsMinus = false; 3343 if (getLexer().getKind() == AsmToken::Minus) { 3344 Parser.Lex(); 3345 IsMinus = true; 3346 } 3347 3348 if (getLexer().isNot(AsmToken::Integer)) 3349 return MatchOperand_ParseFail; 3350 3351 if (getParser().parseAbsoluteExpression(Int)) 3352 return MatchOperand_ParseFail; 3353 3354 if (IsMinus) 3355 Int = -Int; 3356 break; 3357 } 3358 } 3359 return MatchOperand_Success; 3360 } 3361 3362 OperandMatchResultTy 3363 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3364 AMDGPUOperand::ImmTy ImmTy, 3365 bool (*ConvertResult)(int64_t&)) { 3366 SMLoc S = Parser.getTok().getLoc(); 3367 int64_t Value = 0; 3368 3369 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3370 if (Res != MatchOperand_Success) 3371 return Res; 3372 3373 if (ConvertResult && !ConvertResult(Value)) { 3374 return MatchOperand_ParseFail; 3375 } 3376 3377 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3378 return MatchOperand_Success; 3379 } 3380 3381 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3382 const char *Prefix, 3383 OperandVector &Operands, 3384 AMDGPUOperand::ImmTy ImmTy, 3385 bool (*ConvertResult)(int64_t&)) { 3386 StringRef Name = Parser.getTok().getString(); 3387 if (!Name.equals(Prefix)) 3388 return MatchOperand_NoMatch; 3389 3390 Parser.Lex(); 3391 if (getLexer().isNot(AsmToken::Colon)) 3392 return MatchOperand_ParseFail; 3393 3394 Parser.Lex(); 3395 if (getLexer().isNot(AsmToken::LBrac)) 3396 return MatchOperand_ParseFail; 3397 Parser.Lex(); 3398 3399 unsigned Val = 0; 3400 SMLoc S = Parser.getTok().getLoc(); 3401 3402 // FIXME: How to verify the number of elements matches the number of src 3403 // operands? 3404 for (int I = 0; I < 4; ++I) { 3405 if (I != 0) { 3406 if (getLexer().is(AsmToken::RBrac)) 3407 break; 3408 3409 if (getLexer().isNot(AsmToken::Comma)) 3410 return MatchOperand_ParseFail; 3411 Parser.Lex(); 3412 } 3413 3414 if (getLexer().isNot(AsmToken::Integer)) 3415 return MatchOperand_ParseFail; 3416 3417 int64_t Op; 3418 if (getParser().parseAbsoluteExpression(Op)) 3419 return MatchOperand_ParseFail; 3420 3421 if (Op != 0 && Op != 1) 3422 return MatchOperand_ParseFail; 3423 Val |= (Op << I); 3424 } 3425 3426 Parser.Lex(); 3427 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3428 return MatchOperand_Success; 3429 } 3430 3431 OperandMatchResultTy 3432 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3433 AMDGPUOperand::ImmTy ImmTy) { 3434 int64_t Bit = 0; 3435 SMLoc S = Parser.getTok().getLoc(); 3436 3437 // We are at the end of the statement, and this is a default argument, so 3438 // use a default value. 3439 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3440 switch(getLexer().getKind()) { 3441 case AsmToken::Identifier: { 3442 StringRef Tok = Parser.getTok().getString(); 3443 if (Tok == Name) { 3444 if (Tok == "r128" && isGFX9()) 3445 Error(S, "r128 modifier is not supported on this GPU"); 3446 if (Tok == "a16" && !isGFX9()) 3447 Error(S, "a16 modifier is not supported on this GPU"); 3448 Bit = 1; 3449 Parser.Lex(); 3450 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3451 Bit = 0; 3452 Parser.Lex(); 3453 } else { 3454 return MatchOperand_NoMatch; 3455 } 3456 break; 3457 } 3458 default: 3459 return MatchOperand_NoMatch; 3460 } 3461 } 3462 3463 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3464 return MatchOperand_Success; 3465 } 3466 3467 static void addOptionalImmOperand( 3468 MCInst& Inst, const OperandVector& Operands, 3469 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3470 AMDGPUOperand::ImmTy ImmT, 3471 int64_t Default = 0) { 3472 auto i = OptionalIdx.find(ImmT); 3473 if (i != OptionalIdx.end()) { 3474 unsigned Idx = i->second; 3475 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3476 } else { 3477 Inst.addOperand(MCOperand::createImm(Default)); 3478 } 3479 } 3480 3481 OperandMatchResultTy 3482 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3483 if (getLexer().isNot(AsmToken::Identifier)) { 3484 return MatchOperand_NoMatch; 3485 } 3486 StringRef Tok = Parser.getTok().getString(); 3487 if (Tok != Prefix) { 3488 return MatchOperand_NoMatch; 3489 } 3490 3491 Parser.Lex(); 3492 if (getLexer().isNot(AsmToken::Colon)) { 3493 return MatchOperand_ParseFail; 3494 } 3495 3496 Parser.Lex(); 3497 if (getLexer().isNot(AsmToken::Identifier)) { 3498 return MatchOperand_ParseFail; 3499 } 3500 3501 Value = Parser.getTok().getString(); 3502 return MatchOperand_Success; 3503 } 3504 3505 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3506 // values to live in a joint format operand in the MCInst encoding. 3507 OperandMatchResultTy 3508 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3509 SMLoc S = Parser.getTok().getLoc(); 3510 int64_t Dfmt = 0, Nfmt = 0; 3511 // dfmt and nfmt can appear in either order, and each is optional. 3512 bool GotDfmt = false, GotNfmt = false; 3513 while (!GotDfmt || !GotNfmt) { 3514 if (!GotDfmt) { 3515 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3516 if (Res != MatchOperand_NoMatch) { 3517 if (Res != MatchOperand_Success) 3518 return Res; 3519 if (Dfmt >= 16) { 3520 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3521 return MatchOperand_ParseFail; 3522 } 3523 GotDfmt = true; 3524 Parser.Lex(); 3525 continue; 3526 } 3527 } 3528 if (!GotNfmt) { 3529 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3530 if (Res != MatchOperand_NoMatch) { 3531 if (Res != MatchOperand_Success) 3532 return Res; 3533 if (Nfmt >= 8) { 3534 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3535 return MatchOperand_ParseFail; 3536 } 3537 GotNfmt = true; 3538 Parser.Lex(); 3539 continue; 3540 } 3541 } 3542 break; 3543 } 3544 if (!GotDfmt && !GotNfmt) 3545 return MatchOperand_NoMatch; 3546 auto Format = Dfmt | Nfmt << 4; 3547 Operands.push_back( 3548 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3549 return MatchOperand_Success; 3550 } 3551 3552 //===----------------------------------------------------------------------===// 3553 // ds 3554 //===----------------------------------------------------------------------===// 3555 3556 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3557 const OperandVector &Operands) { 3558 OptionalImmIndexMap OptionalIdx; 3559 3560 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3561 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3562 3563 // Add the register arguments 3564 if (Op.isReg()) { 3565 Op.addRegOperands(Inst, 1); 3566 continue; 3567 } 3568 3569 // Handle optional arguments 3570 OptionalIdx[Op.getImmTy()] = i; 3571 } 3572 3573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3576 3577 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3578 } 3579 3580 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3581 bool IsGdsHardcoded) { 3582 OptionalImmIndexMap OptionalIdx; 3583 3584 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3585 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3586 3587 // Add the register arguments 3588 if (Op.isReg()) { 3589 Op.addRegOperands(Inst, 1); 3590 continue; 3591 } 3592 3593 if (Op.isToken() && Op.getToken() == "gds") { 3594 IsGdsHardcoded = true; 3595 continue; 3596 } 3597 3598 // Handle optional arguments 3599 OptionalIdx[Op.getImmTy()] = i; 3600 } 3601 3602 AMDGPUOperand::ImmTy OffsetType = 3603 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3604 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3605 AMDGPUOperand::ImmTyOffset; 3606 3607 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3608 3609 if (!IsGdsHardcoded) { 3610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3611 } 3612 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3613 } 3614 3615 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3616 OptionalImmIndexMap OptionalIdx; 3617 3618 unsigned OperandIdx[4]; 3619 unsigned EnMask = 0; 3620 int SrcIdx = 0; 3621 3622 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3623 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3624 3625 // Add the register arguments 3626 if (Op.isReg()) { 3627 assert(SrcIdx < 4); 3628 OperandIdx[SrcIdx] = Inst.size(); 3629 Op.addRegOperands(Inst, 1); 3630 ++SrcIdx; 3631 continue; 3632 } 3633 3634 if (Op.isOff()) { 3635 assert(SrcIdx < 4); 3636 OperandIdx[SrcIdx] = Inst.size(); 3637 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3638 ++SrcIdx; 3639 continue; 3640 } 3641 3642 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3643 Op.addImmOperands(Inst, 1); 3644 continue; 3645 } 3646 3647 if (Op.isToken() && Op.getToken() == "done") 3648 continue; 3649 3650 // Handle optional arguments 3651 OptionalIdx[Op.getImmTy()] = i; 3652 } 3653 3654 assert(SrcIdx == 4); 3655 3656 bool Compr = false; 3657 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3658 Compr = true; 3659 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3660 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3661 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3662 } 3663 3664 for (auto i = 0; i < SrcIdx; ++i) { 3665 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3666 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3667 } 3668 } 3669 3670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3672 3673 Inst.addOperand(MCOperand::createImm(EnMask)); 3674 } 3675 3676 //===----------------------------------------------------------------------===// 3677 // s_waitcnt 3678 //===----------------------------------------------------------------------===// 3679 3680 static bool 3681 encodeCnt( 3682 const AMDGPU::IsaInfo::IsaVersion ISA, 3683 int64_t &IntVal, 3684 int64_t CntVal, 3685 bool Saturate, 3686 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), 3687 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) 3688 { 3689 bool Failed = false; 3690 3691 IntVal = encode(ISA, IntVal, CntVal); 3692 if (CntVal != decode(ISA, IntVal)) { 3693 if (Saturate) { 3694 IntVal = encode(ISA, IntVal, -1); 3695 } else { 3696 Failed = true; 3697 } 3698 } 3699 return Failed; 3700 } 3701 3702 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3703 StringRef CntName = Parser.getTok().getString(); 3704 int64_t CntVal; 3705 3706 Parser.Lex(); 3707 if (getLexer().isNot(AsmToken::LParen)) 3708 return true; 3709 3710 Parser.Lex(); 3711 if (getLexer().isNot(AsmToken::Integer)) 3712 return true; 3713 3714 SMLoc ValLoc = Parser.getTok().getLoc(); 3715 if (getParser().parseAbsoluteExpression(CntVal)) 3716 return true; 3717 3718 AMDGPU::IsaInfo::IsaVersion ISA = 3719 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3720 3721 bool Failed = true; 3722 bool Sat = CntName.endswith("_sat"); 3723 3724 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3725 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3726 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3727 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3728 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3729 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3730 } 3731 3732 if (Failed) { 3733 Error(ValLoc, "too large value for " + CntName); 3734 return true; 3735 } 3736 3737 if (getLexer().isNot(AsmToken::RParen)) { 3738 return true; 3739 } 3740 3741 Parser.Lex(); 3742 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3743 const AsmToken NextToken = getLexer().peekTok(); 3744 if (NextToken.is(AsmToken::Identifier)) { 3745 Parser.Lex(); 3746 } 3747 } 3748 3749 return false; 3750 } 3751 3752 OperandMatchResultTy 3753 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3754 AMDGPU::IsaInfo::IsaVersion ISA = 3755 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3756 int64_t Waitcnt = getWaitcntBitMask(ISA); 3757 SMLoc S = Parser.getTok().getLoc(); 3758 3759 switch(getLexer().getKind()) { 3760 default: return MatchOperand_ParseFail; 3761 case AsmToken::Integer: 3762 // The operand can be an integer value. 3763 if (getParser().parseAbsoluteExpression(Waitcnt)) 3764 return MatchOperand_ParseFail; 3765 break; 3766 3767 case AsmToken::Identifier: 3768 do { 3769 if (parseCnt(Waitcnt)) 3770 return MatchOperand_ParseFail; 3771 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3772 break; 3773 } 3774 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3775 return MatchOperand_Success; 3776 } 3777 3778 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3779 int64_t &Width) { 3780 using namespace llvm::AMDGPU::Hwreg; 3781 3782 if (Parser.getTok().getString() != "hwreg") 3783 return true; 3784 Parser.Lex(); 3785 3786 if (getLexer().isNot(AsmToken::LParen)) 3787 return true; 3788 Parser.Lex(); 3789 3790 if (getLexer().is(AsmToken::Identifier)) { 3791 HwReg.IsSymbolic = true; 3792 HwReg.Id = ID_UNKNOWN_; 3793 const StringRef tok = Parser.getTok().getString(); 3794 int Last = ID_SYMBOLIC_LAST_; 3795 if (isSI() || isCI() || isVI()) 3796 Last = ID_SYMBOLIC_FIRST_GFX9_; 3797 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3798 if (tok == IdSymbolic[i]) { 3799 HwReg.Id = i; 3800 break; 3801 } 3802 } 3803 Parser.Lex(); 3804 } else { 3805 HwReg.IsSymbolic = false; 3806 if (getLexer().isNot(AsmToken::Integer)) 3807 return true; 3808 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3809 return true; 3810 } 3811 3812 if (getLexer().is(AsmToken::RParen)) { 3813 Parser.Lex(); 3814 return false; 3815 } 3816 3817 // optional params 3818 if (getLexer().isNot(AsmToken::Comma)) 3819 return true; 3820 Parser.Lex(); 3821 3822 if (getLexer().isNot(AsmToken::Integer)) 3823 return true; 3824 if (getParser().parseAbsoluteExpression(Offset)) 3825 return true; 3826 3827 if (getLexer().isNot(AsmToken::Comma)) 3828 return true; 3829 Parser.Lex(); 3830 3831 if (getLexer().isNot(AsmToken::Integer)) 3832 return true; 3833 if (getParser().parseAbsoluteExpression(Width)) 3834 return true; 3835 3836 if (getLexer().isNot(AsmToken::RParen)) 3837 return true; 3838 Parser.Lex(); 3839 3840 return false; 3841 } 3842 3843 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3844 using namespace llvm::AMDGPU::Hwreg; 3845 3846 int64_t Imm16Val = 0; 3847 SMLoc S = Parser.getTok().getLoc(); 3848 3849 switch(getLexer().getKind()) { 3850 default: return MatchOperand_NoMatch; 3851 case AsmToken::Integer: 3852 // The operand can be an integer value. 3853 if (getParser().parseAbsoluteExpression(Imm16Val)) 3854 return MatchOperand_NoMatch; 3855 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 3856 Error(S, "invalid immediate: only 16-bit values are legal"); 3857 // Do not return error code, but create an imm operand anyway and proceed 3858 // to the next operand, if any. That avoids unneccessary error messages. 3859 } 3860 break; 3861 3862 case AsmToken::Identifier: { 3863 OperandInfoTy HwReg(ID_UNKNOWN_); 3864 int64_t Offset = OFFSET_DEFAULT_; 3865 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 3866 if (parseHwregConstruct(HwReg, Offset, Width)) 3867 return MatchOperand_ParseFail; 3868 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 3869 if (HwReg.IsSymbolic) 3870 Error(S, "invalid symbolic name of hardware register"); 3871 else 3872 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 3873 } 3874 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 3875 Error(S, "invalid bit offset: only 5-bit values are legal"); 3876 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 3877 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 3878 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 3879 } 3880 break; 3881 } 3882 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 3883 return MatchOperand_Success; 3884 } 3885 3886 bool AMDGPUOperand::isSWaitCnt() const { 3887 return isImm(); 3888 } 3889 3890 bool AMDGPUOperand::isHwreg() const { 3891 return isImmTy(ImmTyHwreg); 3892 } 3893 3894 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 3895 using namespace llvm::AMDGPU::SendMsg; 3896 3897 if (Parser.getTok().getString() != "sendmsg") 3898 return true; 3899 Parser.Lex(); 3900 3901 if (getLexer().isNot(AsmToken::LParen)) 3902 return true; 3903 Parser.Lex(); 3904 3905 if (getLexer().is(AsmToken::Identifier)) { 3906 Msg.IsSymbolic = true; 3907 Msg.Id = ID_UNKNOWN_; 3908 const std::string tok = Parser.getTok().getString(); 3909 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 3910 switch(i) { 3911 default: continue; // Omit gaps. 3912 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 3913 } 3914 if (tok == IdSymbolic[i]) { 3915 Msg.Id = i; 3916 break; 3917 } 3918 } 3919 Parser.Lex(); 3920 } else { 3921 Msg.IsSymbolic = false; 3922 if (getLexer().isNot(AsmToken::Integer)) 3923 return true; 3924 if (getParser().parseAbsoluteExpression(Msg.Id)) 3925 return true; 3926 if (getLexer().is(AsmToken::Integer)) 3927 if (getParser().parseAbsoluteExpression(Msg.Id)) 3928 Msg.Id = ID_UNKNOWN_; 3929 } 3930 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 3931 return false; 3932 3933 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 3934 if (getLexer().isNot(AsmToken::RParen)) 3935 return true; 3936 Parser.Lex(); 3937 return false; 3938 } 3939 3940 if (getLexer().isNot(AsmToken::Comma)) 3941 return true; 3942 Parser.Lex(); 3943 3944 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 3945 Operation.Id = ID_UNKNOWN_; 3946 if (getLexer().is(AsmToken::Identifier)) { 3947 Operation.IsSymbolic = true; 3948 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 3949 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 3950 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 3951 const StringRef Tok = Parser.getTok().getString(); 3952 for (int i = F; i < L; ++i) { 3953 if (Tok == S[i]) { 3954 Operation.Id = i; 3955 break; 3956 } 3957 } 3958 Parser.Lex(); 3959 } else { 3960 Operation.IsSymbolic = false; 3961 if (getLexer().isNot(AsmToken::Integer)) 3962 return true; 3963 if (getParser().parseAbsoluteExpression(Operation.Id)) 3964 return true; 3965 } 3966 3967 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 3968 // Stream id is optional. 3969 if (getLexer().is(AsmToken::RParen)) { 3970 Parser.Lex(); 3971 return false; 3972 } 3973 3974 if (getLexer().isNot(AsmToken::Comma)) 3975 return true; 3976 Parser.Lex(); 3977 3978 if (getLexer().isNot(AsmToken::Integer)) 3979 return true; 3980 if (getParser().parseAbsoluteExpression(StreamId)) 3981 return true; 3982 } 3983 3984 if (getLexer().isNot(AsmToken::RParen)) 3985 return true; 3986 Parser.Lex(); 3987 return false; 3988 } 3989 3990 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 3991 if (getLexer().getKind() != AsmToken::Identifier) 3992 return MatchOperand_NoMatch; 3993 3994 StringRef Str = Parser.getTok().getString(); 3995 int Slot = StringSwitch<int>(Str) 3996 .Case("p10", 0) 3997 .Case("p20", 1) 3998 .Case("p0", 2) 3999 .Default(-1); 4000 4001 SMLoc S = Parser.getTok().getLoc(); 4002 if (Slot == -1) 4003 return MatchOperand_ParseFail; 4004 4005 Parser.Lex(); 4006 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4007 AMDGPUOperand::ImmTyInterpSlot)); 4008 return MatchOperand_Success; 4009 } 4010 4011 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4012 if (getLexer().getKind() != AsmToken::Identifier) 4013 return MatchOperand_NoMatch; 4014 4015 StringRef Str = Parser.getTok().getString(); 4016 if (!Str.startswith("attr")) 4017 return MatchOperand_NoMatch; 4018 4019 StringRef Chan = Str.take_back(2); 4020 int AttrChan = StringSwitch<int>(Chan) 4021 .Case(".x", 0) 4022 .Case(".y", 1) 4023 .Case(".z", 2) 4024 .Case(".w", 3) 4025 .Default(-1); 4026 if (AttrChan == -1) 4027 return MatchOperand_ParseFail; 4028 4029 Str = Str.drop_back(2).drop_front(4); 4030 4031 uint8_t Attr; 4032 if (Str.getAsInteger(10, Attr)) 4033 return MatchOperand_ParseFail; 4034 4035 SMLoc S = Parser.getTok().getLoc(); 4036 Parser.Lex(); 4037 if (Attr > 63) { 4038 Error(S, "out of bounds attr"); 4039 return MatchOperand_Success; 4040 } 4041 4042 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4043 4044 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4045 AMDGPUOperand::ImmTyInterpAttr)); 4046 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4047 AMDGPUOperand::ImmTyAttrChan)); 4048 return MatchOperand_Success; 4049 } 4050 4051 void AMDGPUAsmParser::errorExpTgt() { 4052 Error(Parser.getTok().getLoc(), "invalid exp target"); 4053 } 4054 4055 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4056 uint8_t &Val) { 4057 if (Str == "null") { 4058 Val = 9; 4059 return MatchOperand_Success; 4060 } 4061 4062 if (Str.startswith("mrt")) { 4063 Str = Str.drop_front(3); 4064 if (Str == "z") { // == mrtz 4065 Val = 8; 4066 return MatchOperand_Success; 4067 } 4068 4069 if (Str.getAsInteger(10, Val)) 4070 return MatchOperand_ParseFail; 4071 4072 if (Val > 7) 4073 errorExpTgt(); 4074 4075 return MatchOperand_Success; 4076 } 4077 4078 if (Str.startswith("pos")) { 4079 Str = Str.drop_front(3); 4080 if (Str.getAsInteger(10, Val)) 4081 return MatchOperand_ParseFail; 4082 4083 if (Val > 3) 4084 errorExpTgt(); 4085 4086 Val += 12; 4087 return MatchOperand_Success; 4088 } 4089 4090 if (Str.startswith("param")) { 4091 Str = Str.drop_front(5); 4092 if (Str.getAsInteger(10, Val)) 4093 return MatchOperand_ParseFail; 4094 4095 if (Val >= 32) 4096 errorExpTgt(); 4097 4098 Val += 32; 4099 return MatchOperand_Success; 4100 } 4101 4102 if (Str.startswith("invalid_target_")) { 4103 Str = Str.drop_front(15); 4104 if (Str.getAsInteger(10, Val)) 4105 return MatchOperand_ParseFail; 4106 4107 errorExpTgt(); 4108 return MatchOperand_Success; 4109 } 4110 4111 return MatchOperand_NoMatch; 4112 } 4113 4114 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4115 uint8_t Val; 4116 StringRef Str = Parser.getTok().getString(); 4117 4118 auto Res = parseExpTgtImpl(Str, Val); 4119 if (Res != MatchOperand_Success) 4120 return Res; 4121 4122 SMLoc S = Parser.getTok().getLoc(); 4123 Parser.Lex(); 4124 4125 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4126 AMDGPUOperand::ImmTyExpTgt)); 4127 return MatchOperand_Success; 4128 } 4129 4130 OperandMatchResultTy 4131 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4132 using namespace llvm::AMDGPU::SendMsg; 4133 4134 int64_t Imm16Val = 0; 4135 SMLoc S = Parser.getTok().getLoc(); 4136 4137 switch(getLexer().getKind()) { 4138 default: 4139 return MatchOperand_NoMatch; 4140 case AsmToken::Integer: 4141 // The operand can be an integer value. 4142 if (getParser().parseAbsoluteExpression(Imm16Val)) 4143 return MatchOperand_NoMatch; 4144 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4145 Error(S, "invalid immediate: only 16-bit values are legal"); 4146 // Do not return error code, but create an imm operand anyway and proceed 4147 // to the next operand, if any. That avoids unneccessary error messages. 4148 } 4149 break; 4150 case AsmToken::Identifier: { 4151 OperandInfoTy Msg(ID_UNKNOWN_); 4152 OperandInfoTy Operation(OP_UNKNOWN_); 4153 int64_t StreamId = STREAM_ID_DEFAULT_; 4154 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4155 return MatchOperand_ParseFail; 4156 do { 4157 // Validate and encode message ID. 4158 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4159 || Msg.Id == ID_SYSMSG)) { 4160 if (Msg.IsSymbolic) 4161 Error(S, "invalid/unsupported symbolic name of message"); 4162 else 4163 Error(S, "invalid/unsupported code of message"); 4164 break; 4165 } 4166 Imm16Val = (Msg.Id << ID_SHIFT_); 4167 // Validate and encode operation ID. 4168 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4169 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4170 if (Operation.IsSymbolic) 4171 Error(S, "invalid symbolic name of GS_OP"); 4172 else 4173 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4174 break; 4175 } 4176 if (Operation.Id == OP_GS_NOP 4177 && Msg.Id != ID_GS_DONE) { 4178 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4179 break; 4180 } 4181 Imm16Val |= (Operation.Id << OP_SHIFT_); 4182 } 4183 if (Msg.Id == ID_SYSMSG) { 4184 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4185 if (Operation.IsSymbolic) 4186 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4187 else 4188 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4189 break; 4190 } 4191 Imm16Val |= (Operation.Id << OP_SHIFT_); 4192 } 4193 // Validate and encode stream ID. 4194 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4195 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4196 Error(S, "invalid stream id: only 2-bit values are legal"); 4197 break; 4198 } 4199 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4200 } 4201 } while (false); 4202 } 4203 break; 4204 } 4205 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4206 return MatchOperand_Success; 4207 } 4208 4209 bool AMDGPUOperand::isSendMsg() const { 4210 return isImmTy(ImmTySendMsg); 4211 } 4212 4213 //===----------------------------------------------------------------------===// 4214 // parser helpers 4215 //===----------------------------------------------------------------------===// 4216 4217 bool 4218 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4219 if (getLexer().getKind() == AsmToken::Identifier && 4220 Parser.getTok().getString() == Id) { 4221 Parser.Lex(); 4222 return true; 4223 } 4224 return false; 4225 } 4226 4227 bool 4228 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4229 if (getLexer().getKind() == Kind) { 4230 Parser.Lex(); 4231 return true; 4232 } 4233 return false; 4234 } 4235 4236 bool 4237 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4238 const StringRef ErrMsg) { 4239 if (!trySkipToken(Kind)) { 4240 Error(Parser.getTok().getLoc(), ErrMsg); 4241 return false; 4242 } 4243 return true; 4244 } 4245 4246 bool 4247 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4248 return !getParser().parseAbsoluteExpression(Imm); 4249 } 4250 4251 bool 4252 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4253 SMLoc S = Parser.getTok().getLoc(); 4254 if (getLexer().getKind() == AsmToken::String) { 4255 Val = Parser.getTok().getStringContents(); 4256 Parser.Lex(); 4257 return true; 4258 } else { 4259 Error(S, ErrMsg); 4260 return false; 4261 } 4262 } 4263 4264 //===----------------------------------------------------------------------===// 4265 // swizzle 4266 //===----------------------------------------------------------------------===// 4267 4268 LLVM_READNONE 4269 static unsigned 4270 encodeBitmaskPerm(const unsigned AndMask, 4271 const unsigned OrMask, 4272 const unsigned XorMask) { 4273 using namespace llvm::AMDGPU::Swizzle; 4274 4275 return BITMASK_PERM_ENC | 4276 (AndMask << BITMASK_AND_SHIFT) | 4277 (OrMask << BITMASK_OR_SHIFT) | 4278 (XorMask << BITMASK_XOR_SHIFT); 4279 } 4280 4281 bool 4282 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4283 const unsigned MinVal, 4284 const unsigned MaxVal, 4285 const StringRef ErrMsg) { 4286 for (unsigned i = 0; i < OpNum; ++i) { 4287 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4288 return false; 4289 } 4290 SMLoc ExprLoc = Parser.getTok().getLoc(); 4291 if (!parseExpr(Op[i])) { 4292 return false; 4293 } 4294 if (Op[i] < MinVal || Op[i] > MaxVal) { 4295 Error(ExprLoc, ErrMsg); 4296 return false; 4297 } 4298 } 4299 4300 return true; 4301 } 4302 4303 bool 4304 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4305 using namespace llvm::AMDGPU::Swizzle; 4306 4307 int64_t Lane[LANE_NUM]; 4308 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4309 "expected a 2-bit lane id")) { 4310 Imm = QUAD_PERM_ENC; 4311 for (auto i = 0; i < LANE_NUM; ++i) { 4312 Imm |= Lane[i] << (LANE_SHIFT * i); 4313 } 4314 return true; 4315 } 4316 return false; 4317 } 4318 4319 bool 4320 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4321 using namespace llvm::AMDGPU::Swizzle; 4322 4323 SMLoc S = Parser.getTok().getLoc(); 4324 int64_t GroupSize; 4325 int64_t LaneIdx; 4326 4327 if (!parseSwizzleOperands(1, &GroupSize, 4328 2, 32, 4329 "group size must be in the interval [2,32]")) { 4330 return false; 4331 } 4332 if (!isPowerOf2_64(GroupSize)) { 4333 Error(S, "group size must be a power of two"); 4334 return false; 4335 } 4336 if (parseSwizzleOperands(1, &LaneIdx, 4337 0, GroupSize - 1, 4338 "lane id must be in the interval [0,group size - 1]")) { 4339 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4340 return true; 4341 } 4342 return false; 4343 } 4344 4345 bool 4346 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4347 using namespace llvm::AMDGPU::Swizzle; 4348 4349 SMLoc S = Parser.getTok().getLoc(); 4350 int64_t GroupSize; 4351 4352 if (!parseSwizzleOperands(1, &GroupSize, 4353 2, 32, "group size must be in the interval [2,32]")) { 4354 return false; 4355 } 4356 if (!isPowerOf2_64(GroupSize)) { 4357 Error(S, "group size must be a power of two"); 4358 return false; 4359 } 4360 4361 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4362 return true; 4363 } 4364 4365 bool 4366 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4367 using namespace llvm::AMDGPU::Swizzle; 4368 4369 SMLoc S = Parser.getTok().getLoc(); 4370 int64_t GroupSize; 4371 4372 if (!parseSwizzleOperands(1, &GroupSize, 4373 1, 16, "group size must be in the interval [1,16]")) { 4374 return false; 4375 } 4376 if (!isPowerOf2_64(GroupSize)) { 4377 Error(S, "group size must be a power of two"); 4378 return false; 4379 } 4380 4381 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4382 return true; 4383 } 4384 4385 bool 4386 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4387 using namespace llvm::AMDGPU::Swizzle; 4388 4389 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4390 return false; 4391 } 4392 4393 StringRef Ctl; 4394 SMLoc StrLoc = Parser.getTok().getLoc(); 4395 if (!parseString(Ctl)) { 4396 return false; 4397 } 4398 if (Ctl.size() != BITMASK_WIDTH) { 4399 Error(StrLoc, "expected a 5-character mask"); 4400 return false; 4401 } 4402 4403 unsigned AndMask = 0; 4404 unsigned OrMask = 0; 4405 unsigned XorMask = 0; 4406 4407 for (size_t i = 0; i < Ctl.size(); ++i) { 4408 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4409 switch(Ctl[i]) { 4410 default: 4411 Error(StrLoc, "invalid mask"); 4412 return false; 4413 case '0': 4414 break; 4415 case '1': 4416 OrMask |= Mask; 4417 break; 4418 case 'p': 4419 AndMask |= Mask; 4420 break; 4421 case 'i': 4422 AndMask |= Mask; 4423 XorMask |= Mask; 4424 break; 4425 } 4426 } 4427 4428 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4429 return true; 4430 } 4431 4432 bool 4433 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4434 4435 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4436 4437 if (!parseExpr(Imm)) { 4438 return false; 4439 } 4440 if (!isUInt<16>(Imm)) { 4441 Error(OffsetLoc, "expected a 16-bit offset"); 4442 return false; 4443 } 4444 return true; 4445 } 4446 4447 bool 4448 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4449 using namespace llvm::AMDGPU::Swizzle; 4450 4451 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4452 4453 SMLoc ModeLoc = Parser.getTok().getLoc(); 4454 bool Ok = false; 4455 4456 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4457 Ok = parseSwizzleQuadPerm(Imm); 4458 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4459 Ok = parseSwizzleBitmaskPerm(Imm); 4460 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4461 Ok = parseSwizzleBroadcast(Imm); 4462 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4463 Ok = parseSwizzleSwap(Imm); 4464 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4465 Ok = parseSwizzleReverse(Imm); 4466 } else { 4467 Error(ModeLoc, "expected a swizzle mode"); 4468 } 4469 4470 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4471 } 4472 4473 return false; 4474 } 4475 4476 OperandMatchResultTy 4477 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4478 SMLoc S = Parser.getTok().getLoc(); 4479 int64_t Imm = 0; 4480 4481 if (trySkipId("offset")) { 4482 4483 bool Ok = false; 4484 if (skipToken(AsmToken::Colon, "expected a colon")) { 4485 if (trySkipId("swizzle")) { 4486 Ok = parseSwizzleMacro(Imm); 4487 } else { 4488 Ok = parseSwizzleOffset(Imm); 4489 } 4490 } 4491 4492 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4493 4494 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4495 } else { 4496 // Swizzle "offset" operand is optional. 4497 // If it is omitted, try parsing other optional operands. 4498 return parseOptionalOpr(Operands); 4499 } 4500 } 4501 4502 bool 4503 AMDGPUOperand::isSwizzle() const { 4504 return isImmTy(ImmTySwizzle); 4505 } 4506 4507 //===----------------------------------------------------------------------===// 4508 // sopp branch targets 4509 //===----------------------------------------------------------------------===// 4510 4511 OperandMatchResultTy 4512 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4513 SMLoc S = Parser.getTok().getLoc(); 4514 4515 switch (getLexer().getKind()) { 4516 default: return MatchOperand_ParseFail; 4517 case AsmToken::Integer: { 4518 int64_t Imm; 4519 if (getParser().parseAbsoluteExpression(Imm)) 4520 return MatchOperand_ParseFail; 4521 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4522 return MatchOperand_Success; 4523 } 4524 4525 case AsmToken::Identifier: 4526 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4527 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4528 Parser.getTok().getString()), getContext()), S)); 4529 Parser.Lex(); 4530 return MatchOperand_Success; 4531 } 4532 } 4533 4534 //===----------------------------------------------------------------------===// 4535 // mubuf 4536 //===----------------------------------------------------------------------===// 4537 4538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4539 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4540 } 4541 4542 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4543 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4544 } 4545 4546 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4547 const OperandVector &Operands, 4548 bool IsAtomic, 4549 bool IsAtomicReturn, 4550 bool IsLds) { 4551 bool IsLdsOpcode = IsLds; 4552 bool HasLdsModifier = false; 4553 OptionalImmIndexMap OptionalIdx; 4554 assert(IsAtomicReturn ? IsAtomic : true); 4555 4556 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4557 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4558 4559 // Add the register arguments 4560 if (Op.isReg()) { 4561 Op.addRegOperands(Inst, 1); 4562 continue; 4563 } 4564 4565 // Handle the case where soffset is an immediate 4566 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4567 Op.addImmOperands(Inst, 1); 4568 continue; 4569 } 4570 4571 HasLdsModifier = Op.isLDS(); 4572 4573 // Handle tokens like 'offen' which are sometimes hard-coded into the 4574 // asm string. There are no MCInst operands for these. 4575 if (Op.isToken()) { 4576 continue; 4577 } 4578 assert(Op.isImm()); 4579 4580 // Handle optional arguments 4581 OptionalIdx[Op.getImmTy()] = i; 4582 } 4583 4584 // This is a workaround for an llvm quirk which may result in an 4585 // incorrect instruction selection. Lds and non-lds versions of 4586 // MUBUF instructions are identical except that lds versions 4587 // have mandatory 'lds' modifier. However this modifier follows 4588 // optional modifiers and llvm asm matcher regards this 'lds' 4589 // modifier as an optional one. As a result, an lds version 4590 // of opcode may be selected even if it has no 'lds' modifier. 4591 if (IsLdsOpcode && !HasLdsModifier) { 4592 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4593 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4594 Inst.setOpcode(NoLdsOpcode); 4595 IsLdsOpcode = false; 4596 } 4597 } 4598 4599 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4600 if (IsAtomicReturn) { 4601 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4602 Inst.insert(I, *I); 4603 } 4604 4605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4606 if (!IsAtomic) { // glc is hard-coded. 4607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4608 } 4609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4610 4611 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4613 } 4614 } 4615 4616 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4617 OptionalImmIndexMap OptionalIdx; 4618 4619 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4620 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4621 4622 // Add the register arguments 4623 if (Op.isReg()) { 4624 Op.addRegOperands(Inst, 1); 4625 continue; 4626 } 4627 4628 // Handle the case where soffset is an immediate 4629 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4630 Op.addImmOperands(Inst, 1); 4631 continue; 4632 } 4633 4634 // Handle tokens like 'offen' which are sometimes hard-coded into the 4635 // asm string. There are no MCInst operands for these. 4636 if (Op.isToken()) { 4637 continue; 4638 } 4639 assert(Op.isImm()); 4640 4641 // Handle optional arguments 4642 OptionalIdx[Op.getImmTy()] = i; 4643 } 4644 4645 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4646 AMDGPUOperand::ImmTyOffset); 4647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4651 } 4652 4653 //===----------------------------------------------------------------------===// 4654 // mimg 4655 //===----------------------------------------------------------------------===// 4656 4657 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4658 bool IsAtomic) { 4659 unsigned I = 1; 4660 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4661 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4662 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4663 } 4664 4665 if (IsAtomic) { 4666 // Add src, same as dst 4667 assert(Desc.getNumDefs() == 1); 4668 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4669 } 4670 4671 OptionalImmIndexMap OptionalIdx; 4672 4673 for (unsigned E = Operands.size(); I != E; ++I) { 4674 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4675 4676 // Add the register arguments 4677 if (Op.isReg()) { 4678 Op.addRegOperands(Inst, 1); 4679 } else if (Op.isImmModifier()) { 4680 OptionalIdx[Op.getImmTy()] = I; 4681 } else { 4682 llvm_unreachable("unexpected operand type"); 4683 } 4684 } 4685 4686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4693 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4695 } 4696 4697 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4698 cvtMIMG(Inst, Operands, true); 4699 } 4700 4701 //===----------------------------------------------------------------------===// 4702 // smrd 4703 //===----------------------------------------------------------------------===// 4704 4705 bool AMDGPUOperand::isSMRDOffset8() const { 4706 return isImm() && isUInt<8>(getImm()); 4707 } 4708 4709 bool AMDGPUOperand::isSMRDOffset20() const { 4710 return isImm() && isUInt<20>(getImm()); 4711 } 4712 4713 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4714 // 32-bit literals are only supported on CI and we only want to use them 4715 // when the offset is > 8-bits. 4716 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4717 } 4718 4719 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4720 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4721 } 4722 4723 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4724 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4725 } 4726 4727 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4728 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4729 } 4730 4731 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4732 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4733 } 4734 4735 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4736 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4737 } 4738 4739 //===----------------------------------------------------------------------===// 4740 // vop3 4741 //===----------------------------------------------------------------------===// 4742 4743 static bool ConvertOmodMul(int64_t &Mul) { 4744 if (Mul != 1 && Mul != 2 && Mul != 4) 4745 return false; 4746 4747 Mul >>= 1; 4748 return true; 4749 } 4750 4751 static bool ConvertOmodDiv(int64_t &Div) { 4752 if (Div == 1) { 4753 Div = 0; 4754 return true; 4755 } 4756 4757 if (Div == 2) { 4758 Div = 3; 4759 return true; 4760 } 4761 4762 return false; 4763 } 4764 4765 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4766 if (BoundCtrl == 0) { 4767 BoundCtrl = 1; 4768 return true; 4769 } 4770 4771 if (BoundCtrl == -1) { 4772 BoundCtrl = 0; 4773 return true; 4774 } 4775 4776 return false; 4777 } 4778 4779 // Note: the order in this table matches the order of operands in AsmString. 4780 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4781 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4782 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4783 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4784 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4785 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4786 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4787 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4788 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4789 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4790 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 4791 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4792 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4793 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4794 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4795 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4796 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4797 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4798 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4799 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4800 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4801 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4802 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4803 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4804 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4805 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4806 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4807 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4808 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4809 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4810 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4811 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4812 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4813 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4814 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4815 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4816 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4817 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4818 }; 4819 4820 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4821 unsigned size = Operands.size(); 4822 assert(size > 0); 4823 4824 OperandMatchResultTy res = parseOptionalOpr(Operands); 4825 4826 // This is a hack to enable hardcoded mandatory operands which follow 4827 // optional operands. 4828 // 4829 // Current design assumes that all operands after the first optional operand 4830 // are also optional. However implementation of some instructions violates 4831 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4832 // 4833 // To alleviate this problem, we have to (implicitly) parse extra operands 4834 // to make sure autogenerated parser of custom operands never hit hardcoded 4835 // mandatory operands. 4836 4837 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4838 4839 // We have parsed the first optional operand. 4840 // Parse as many operands as necessary to skip all mandatory operands. 4841 4842 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4843 if (res != MatchOperand_Success || 4844 getLexer().is(AsmToken::EndOfStatement)) break; 4845 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4846 res = parseOptionalOpr(Operands); 4847 } 4848 } 4849 4850 return res; 4851 } 4852 4853 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4854 OperandMatchResultTy res; 4855 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 4856 // try to parse any optional operand here 4857 if (Op.IsBit) { 4858 res = parseNamedBit(Op.Name, Operands, Op.Type); 4859 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 4860 res = parseOModOperand(Operands); 4861 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 4862 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 4863 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 4864 res = parseSDWASel(Operands, Op.Name, Op.Type); 4865 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 4866 res = parseSDWADstUnused(Operands); 4867 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 4868 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 4869 Op.Type == AMDGPUOperand::ImmTyNegLo || 4870 Op.Type == AMDGPUOperand::ImmTyNegHi) { 4871 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 4872 Op.ConvertResult); 4873 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 4874 res = parseDfmtNfmt(Operands); 4875 } else { 4876 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 4877 } 4878 if (res != MatchOperand_NoMatch) { 4879 return res; 4880 } 4881 } 4882 return MatchOperand_NoMatch; 4883 } 4884 4885 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 4886 StringRef Name = Parser.getTok().getString(); 4887 if (Name == "mul") { 4888 return parseIntWithPrefix("mul", Operands, 4889 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 4890 } 4891 4892 if (Name == "div") { 4893 return parseIntWithPrefix("div", Operands, 4894 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 4895 } 4896 4897 return MatchOperand_NoMatch; 4898 } 4899 4900 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 4901 cvtVOP3P(Inst, Operands); 4902 4903 int Opc = Inst.getOpcode(); 4904 4905 int SrcNum; 4906 const int Ops[] = { AMDGPU::OpName::src0, 4907 AMDGPU::OpName::src1, 4908 AMDGPU::OpName::src2 }; 4909 for (SrcNum = 0; 4910 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 4911 ++SrcNum); 4912 assert(SrcNum > 0); 4913 4914 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4915 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4916 4917 if ((OpSel & (1 << SrcNum)) != 0) { 4918 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 4919 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 4920 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 4921 } 4922 } 4923 4924 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 4925 // 1. This operand is input modifiers 4926 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 4927 // 2. This is not last operand 4928 && Desc.NumOperands > (OpNum + 1) 4929 // 3. Next operand is register class 4930 && Desc.OpInfo[OpNum + 1].RegClass != -1 4931 // 4. Next register is not tied to any other operand 4932 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 4933 } 4934 4935 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 4936 { 4937 OptionalImmIndexMap OptionalIdx; 4938 unsigned Opc = Inst.getOpcode(); 4939 4940 unsigned I = 1; 4941 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4942 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4943 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4944 } 4945 4946 for (unsigned E = Operands.size(); I != E; ++I) { 4947 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4948 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4949 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4950 } else if (Op.isInterpSlot() || 4951 Op.isInterpAttr() || 4952 Op.isAttrChan()) { 4953 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 4954 } else if (Op.isImmModifier()) { 4955 OptionalIdx[Op.getImmTy()] = I; 4956 } else { 4957 llvm_unreachable("unhandled operand type"); 4958 } 4959 } 4960 4961 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 4962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 4963 } 4964 4965 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4967 } 4968 4969 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4970 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4971 } 4972 } 4973 4974 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 4975 OptionalImmIndexMap &OptionalIdx) { 4976 unsigned Opc = Inst.getOpcode(); 4977 4978 unsigned I = 1; 4979 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4980 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4981 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4982 } 4983 4984 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 4985 // This instruction has src modifiers 4986 for (unsigned E = Operands.size(); I != E; ++I) { 4987 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4988 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4989 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4990 } else if (Op.isImmModifier()) { 4991 OptionalIdx[Op.getImmTy()] = I; 4992 } else if (Op.isRegOrImm()) { 4993 Op.addRegOrImmOperands(Inst, 1); 4994 } else { 4995 llvm_unreachable("unhandled operand type"); 4996 } 4997 } 4998 } else { 4999 // No src modifiers 5000 for (unsigned E = Operands.size(); I != E; ++I) { 5001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5002 if (Op.isMod()) { 5003 OptionalIdx[Op.getImmTy()] = I; 5004 } else { 5005 Op.addRegOrImmOperands(Inst, 1); 5006 } 5007 } 5008 } 5009 5010 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5012 } 5013 5014 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5015 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5016 } 5017 5018 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5019 // it has src2 register operand that is tied to dst operand 5020 // we don't allow modifiers for this operand in assembler so src2_modifiers 5021 // should be 0. 5022 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5023 Opc == AMDGPU::V_MAC_F32_e64_vi || 5024 Opc == AMDGPU::V_MAC_F16_e64_vi || 5025 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5026 auto it = Inst.begin(); 5027 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5028 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5029 ++it; 5030 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5031 } 5032 } 5033 5034 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5035 OptionalImmIndexMap OptionalIdx; 5036 cvtVOP3(Inst, Operands, OptionalIdx); 5037 } 5038 5039 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5040 const OperandVector &Operands) { 5041 OptionalImmIndexMap OptIdx; 5042 const int Opc = Inst.getOpcode(); 5043 const MCInstrDesc &Desc = MII.get(Opc); 5044 5045 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5046 5047 cvtVOP3(Inst, Operands, OptIdx); 5048 5049 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5050 assert(!IsPacked); 5051 Inst.addOperand(Inst.getOperand(0)); 5052 } 5053 5054 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5055 // instruction, and then figure out where to actually put the modifiers 5056 5057 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5058 5059 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5060 if (OpSelHiIdx != -1) { 5061 int DefaultVal = IsPacked ? -1 : 0; 5062 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5063 DefaultVal); 5064 } 5065 5066 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5067 if (NegLoIdx != -1) { 5068 assert(IsPacked); 5069 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5070 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5071 } 5072 5073 const int Ops[] = { AMDGPU::OpName::src0, 5074 AMDGPU::OpName::src1, 5075 AMDGPU::OpName::src2 }; 5076 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5077 AMDGPU::OpName::src1_modifiers, 5078 AMDGPU::OpName::src2_modifiers }; 5079 5080 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5081 5082 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5083 unsigned OpSelHi = 0; 5084 unsigned NegLo = 0; 5085 unsigned NegHi = 0; 5086 5087 if (OpSelHiIdx != -1) { 5088 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5089 } 5090 5091 if (NegLoIdx != -1) { 5092 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5093 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5094 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5095 } 5096 5097 for (int J = 0; J < 3; ++J) { 5098 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5099 if (OpIdx == -1) 5100 break; 5101 5102 uint32_t ModVal = 0; 5103 5104 if ((OpSel & (1 << J)) != 0) 5105 ModVal |= SISrcMods::OP_SEL_0; 5106 5107 if ((OpSelHi & (1 << J)) != 0) 5108 ModVal |= SISrcMods::OP_SEL_1; 5109 5110 if ((NegLo & (1 << J)) != 0) 5111 ModVal |= SISrcMods::NEG; 5112 5113 if ((NegHi & (1 << J)) != 0) 5114 ModVal |= SISrcMods::NEG_HI; 5115 5116 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5117 5118 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5119 } 5120 } 5121 5122 //===----------------------------------------------------------------------===// 5123 // dpp 5124 //===----------------------------------------------------------------------===// 5125 5126 bool AMDGPUOperand::isDPPCtrl() const { 5127 using namespace AMDGPU::DPP; 5128 5129 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5130 if (result) { 5131 int64_t Imm = getImm(); 5132 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5133 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5134 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5135 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5136 (Imm == DppCtrl::WAVE_SHL1) || 5137 (Imm == DppCtrl::WAVE_ROL1) || 5138 (Imm == DppCtrl::WAVE_SHR1) || 5139 (Imm == DppCtrl::WAVE_ROR1) || 5140 (Imm == DppCtrl::ROW_MIRROR) || 5141 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5142 (Imm == DppCtrl::BCAST15) || 5143 (Imm == DppCtrl::BCAST31); 5144 } 5145 return false; 5146 } 5147 5148 bool AMDGPUOperand::isGPRIdxMode() const { 5149 return isImm() && isUInt<4>(getImm()); 5150 } 5151 5152 bool AMDGPUOperand::isS16Imm() const { 5153 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5154 } 5155 5156 bool AMDGPUOperand::isU16Imm() const { 5157 return isImm() && isUInt<16>(getImm()); 5158 } 5159 5160 OperandMatchResultTy 5161 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5162 using namespace AMDGPU::DPP; 5163 5164 SMLoc S = Parser.getTok().getLoc(); 5165 StringRef Prefix; 5166 int64_t Int; 5167 5168 if (getLexer().getKind() == AsmToken::Identifier) { 5169 Prefix = Parser.getTok().getString(); 5170 } else { 5171 return MatchOperand_NoMatch; 5172 } 5173 5174 if (Prefix == "row_mirror") { 5175 Int = DppCtrl::ROW_MIRROR; 5176 Parser.Lex(); 5177 } else if (Prefix == "row_half_mirror") { 5178 Int = DppCtrl::ROW_HALF_MIRROR; 5179 Parser.Lex(); 5180 } else { 5181 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5182 if (Prefix != "quad_perm" 5183 && Prefix != "row_shl" 5184 && Prefix != "row_shr" 5185 && Prefix != "row_ror" 5186 && Prefix != "wave_shl" 5187 && Prefix != "wave_rol" 5188 && Prefix != "wave_shr" 5189 && Prefix != "wave_ror" 5190 && Prefix != "row_bcast") { 5191 return MatchOperand_NoMatch; 5192 } 5193 5194 Parser.Lex(); 5195 if (getLexer().isNot(AsmToken::Colon)) 5196 return MatchOperand_ParseFail; 5197 5198 if (Prefix == "quad_perm") { 5199 // quad_perm:[%d,%d,%d,%d] 5200 Parser.Lex(); 5201 if (getLexer().isNot(AsmToken::LBrac)) 5202 return MatchOperand_ParseFail; 5203 Parser.Lex(); 5204 5205 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5206 return MatchOperand_ParseFail; 5207 5208 for (int i = 0; i < 3; ++i) { 5209 if (getLexer().isNot(AsmToken::Comma)) 5210 return MatchOperand_ParseFail; 5211 Parser.Lex(); 5212 5213 int64_t Temp; 5214 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5215 return MatchOperand_ParseFail; 5216 const int shift = i*2 + 2; 5217 Int += (Temp << shift); 5218 } 5219 5220 if (getLexer().isNot(AsmToken::RBrac)) 5221 return MatchOperand_ParseFail; 5222 Parser.Lex(); 5223 } else { 5224 // sel:%d 5225 Parser.Lex(); 5226 if (getParser().parseAbsoluteExpression(Int)) 5227 return MatchOperand_ParseFail; 5228 5229 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5230 Int |= DppCtrl::ROW_SHL0; 5231 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5232 Int |= DppCtrl::ROW_SHR0; 5233 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5234 Int |= DppCtrl::ROW_ROR0; 5235 } else if (Prefix == "wave_shl" && 1 == Int) { 5236 Int = DppCtrl::WAVE_SHL1; 5237 } else if (Prefix == "wave_rol" && 1 == Int) { 5238 Int = DppCtrl::WAVE_ROL1; 5239 } else if (Prefix == "wave_shr" && 1 == Int) { 5240 Int = DppCtrl::WAVE_SHR1; 5241 } else if (Prefix == "wave_ror" && 1 == Int) { 5242 Int = DppCtrl::WAVE_ROR1; 5243 } else if (Prefix == "row_bcast") { 5244 if (Int == 15) { 5245 Int = DppCtrl::BCAST15; 5246 } else if (Int == 31) { 5247 Int = DppCtrl::BCAST31; 5248 } else { 5249 return MatchOperand_ParseFail; 5250 } 5251 } else { 5252 return MatchOperand_ParseFail; 5253 } 5254 } 5255 } 5256 5257 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5258 return MatchOperand_Success; 5259 } 5260 5261 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5262 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5263 } 5264 5265 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5266 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5267 } 5268 5269 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5270 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5271 } 5272 5273 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5274 OptionalImmIndexMap OptionalIdx; 5275 5276 unsigned I = 1; 5277 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5278 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5279 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5280 } 5281 5282 // All DPP instructions with at least one source operand have a fake "old" 5283 // source at the beginning that's tied to the dst operand. Handle it here. 5284 if (Desc.getNumOperands() >= 2) 5285 Inst.addOperand(Inst.getOperand(0)); 5286 5287 for (unsigned E = Operands.size(); I != E; ++I) { 5288 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5289 // Add the register arguments 5290 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5291 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5292 // Skip it. 5293 continue; 5294 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5295 Op.addRegWithFPInputModsOperands(Inst, 2); 5296 } else if (Op.isDPPCtrl()) { 5297 Op.addImmOperands(Inst, 1); 5298 } else if (Op.isImm()) { 5299 // Handle optional arguments 5300 OptionalIdx[Op.getImmTy()] = I; 5301 } else { 5302 llvm_unreachable("Invalid operand type"); 5303 } 5304 } 5305 5306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5309 } 5310 5311 //===----------------------------------------------------------------------===// 5312 // sdwa 5313 //===----------------------------------------------------------------------===// 5314 5315 OperandMatchResultTy 5316 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5317 AMDGPUOperand::ImmTy Type) { 5318 using namespace llvm::AMDGPU::SDWA; 5319 5320 SMLoc S = Parser.getTok().getLoc(); 5321 StringRef Value; 5322 OperandMatchResultTy res; 5323 5324 res = parseStringWithPrefix(Prefix, Value); 5325 if (res != MatchOperand_Success) { 5326 return res; 5327 } 5328 5329 int64_t Int; 5330 Int = StringSwitch<int64_t>(Value) 5331 .Case("BYTE_0", SdwaSel::BYTE_0) 5332 .Case("BYTE_1", SdwaSel::BYTE_1) 5333 .Case("BYTE_2", SdwaSel::BYTE_2) 5334 .Case("BYTE_3", SdwaSel::BYTE_3) 5335 .Case("WORD_0", SdwaSel::WORD_0) 5336 .Case("WORD_1", SdwaSel::WORD_1) 5337 .Case("DWORD", SdwaSel::DWORD) 5338 .Default(0xffffffff); 5339 Parser.Lex(); // eat last token 5340 5341 if (Int == 0xffffffff) { 5342 return MatchOperand_ParseFail; 5343 } 5344 5345 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5346 return MatchOperand_Success; 5347 } 5348 5349 OperandMatchResultTy 5350 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5351 using namespace llvm::AMDGPU::SDWA; 5352 5353 SMLoc S = Parser.getTok().getLoc(); 5354 StringRef Value; 5355 OperandMatchResultTy res; 5356 5357 res = parseStringWithPrefix("dst_unused", Value); 5358 if (res != MatchOperand_Success) { 5359 return res; 5360 } 5361 5362 int64_t Int; 5363 Int = StringSwitch<int64_t>(Value) 5364 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5365 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5366 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5367 .Default(0xffffffff); 5368 Parser.Lex(); // eat last token 5369 5370 if (Int == 0xffffffff) { 5371 return MatchOperand_ParseFail; 5372 } 5373 5374 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5375 return MatchOperand_Success; 5376 } 5377 5378 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5379 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5380 } 5381 5382 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5383 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5384 } 5385 5386 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5387 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5388 } 5389 5390 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5391 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5392 } 5393 5394 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5395 uint64_t BasicInstType, bool skipVcc) { 5396 using namespace llvm::AMDGPU::SDWA; 5397 5398 OptionalImmIndexMap OptionalIdx; 5399 bool skippedVcc = false; 5400 5401 unsigned I = 1; 5402 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5403 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5404 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5405 } 5406 5407 for (unsigned E = Operands.size(); I != E; ++I) { 5408 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5409 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5410 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5411 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5412 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5413 // Skip VCC only if we didn't skip it on previous iteration. 5414 if (BasicInstType == SIInstrFlags::VOP2 && 5415 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5416 skippedVcc = true; 5417 continue; 5418 } else if (BasicInstType == SIInstrFlags::VOPC && 5419 Inst.getNumOperands() == 0) { 5420 skippedVcc = true; 5421 continue; 5422 } 5423 } 5424 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5425 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5426 } else if (Op.isImm()) { 5427 // Handle optional arguments 5428 OptionalIdx[Op.getImmTy()] = I; 5429 } else { 5430 llvm_unreachable("Invalid operand type"); 5431 } 5432 skippedVcc = false; 5433 } 5434 5435 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5436 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5437 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5438 switch (BasicInstType) { 5439 case SIInstrFlags::VOP1: 5440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5441 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5442 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5443 } 5444 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5447 break; 5448 5449 case SIInstrFlags::VOP2: 5450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5451 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5453 } 5454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5458 break; 5459 5460 case SIInstrFlags::VOPC: 5461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5464 break; 5465 5466 default: 5467 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5468 } 5469 } 5470 5471 // special case v_mac_{f16, f32}: 5472 // it has src2 register operand that is tied to dst operand 5473 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5474 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5475 auto it = Inst.begin(); 5476 std::advance( 5477 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5478 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5479 } 5480 } 5481 5482 /// Force static initialization. 5483 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5484 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5485 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5486 } 5487 5488 #define GET_REGISTER_MATCHER 5489 #define GET_MATCHER_IMPLEMENTATION 5490 #define GET_MNEMONIC_SPELL_CHECKER 5491 #include "AMDGPUGenAsmMatcher.inc" 5492 5493 // This fuction should be defined after auto-generated include so that we have 5494 // MatchClassKind enum defined 5495 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5496 unsigned Kind) { 5497 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5498 // But MatchInstructionImpl() expects to meet token and fails to validate 5499 // operand. This method checks if we are given immediate operand but expect to 5500 // get corresponding token. 5501 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5502 switch (Kind) { 5503 case MCK_addr64: 5504 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5505 case MCK_gds: 5506 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5507 case MCK_lds: 5508 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5509 case MCK_glc: 5510 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5511 case MCK_idxen: 5512 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5513 case MCK_offen: 5514 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5515 case MCK_SSrcB32: 5516 // When operands have expression values, they will return true for isToken, 5517 // because it is not possible to distinguish between a token and an 5518 // expression at parse time. MatchInstructionImpl() will always try to 5519 // match an operand as a token, when isToken returns true, and when the 5520 // name of the expression is not a valid token, the match will fail, 5521 // so we need to handle it here. 5522 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5523 case MCK_SSrcF32: 5524 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5525 case MCK_SoppBrTarget: 5526 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5527 case MCK_VReg32OrOff: 5528 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5529 case MCK_InterpSlot: 5530 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5531 case MCK_Attr: 5532 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5533 case MCK_AttrChan: 5534 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5535 default: 5536 return Match_InvalidOperand; 5537 } 5538 } 5539