1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDppCtrl, 151 ImmTyDppRowMask, 152 ImmTyDppBankMask, 153 ImmTyDppBoundCtrl, 154 ImmTySdwaDstSel, 155 ImmTySdwaSrc0Sel, 156 ImmTySdwaSrc1Sel, 157 ImmTySdwaDstUnused, 158 ImmTyDMask, 159 ImmTyDim, 160 ImmTyUNorm, 161 ImmTyDA, 162 ImmTyR128A16, 163 ImmTyLWE, 164 ImmTyExpTgt, 165 ImmTyExpCompr, 166 ImmTyExpVM, 167 ImmTyFORMAT, 168 ImmTyHwreg, 169 ImmTyOff, 170 ImmTySendMsg, 171 ImmTyInterpSlot, 172 ImmTyInterpAttr, 173 ImmTyAttrChan, 174 ImmTyOpSel, 175 ImmTyOpSelHi, 176 ImmTyNegLo, 177 ImmTyNegHi, 178 ImmTySwizzle, 179 ImmTyGprIdxMode, 180 ImmTyEndpgm, 181 ImmTyHigh 182 }; 183 184 private: 185 struct TokOp { 186 const char *Data; 187 unsigned Length; 188 }; 189 190 struct ImmOp { 191 int64_t Val; 192 ImmTy Type; 193 bool IsFPImm; 194 Modifiers Mods; 195 }; 196 197 struct RegOp { 198 unsigned RegNo; 199 Modifiers Mods; 200 }; 201 202 union { 203 TokOp Tok; 204 ImmOp Imm; 205 RegOp Reg; 206 const MCExpr *Expr; 207 }; 208 209 public: 210 bool isToken() const override { 211 if (Kind == Token) 212 return true; 213 214 if (Kind != Expression || !Expr) 215 return false; 216 217 // When parsing operands, we can't always tell if something was meant to be 218 // a token, like 'gds', or an expression that references a global variable. 219 // In this case, we assume the string is an expression, and if we need to 220 // interpret is a token, then we treat the symbol name as the token. 221 return isa<MCSymbolRefExpr>(Expr); 222 } 223 224 bool isImm() const override { 225 return Kind == Immediate; 226 } 227 228 bool isInlinableImm(MVT type) const; 229 bool isLiteralImm(MVT type) const; 230 231 bool isRegKind() const { 232 return Kind == Register; 233 } 234 235 bool isReg() const override { 236 return isRegKind() && !hasModifiers(); 237 } 238 239 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 240 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 241 } 242 243 bool isRegOrImmWithInt16InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 245 } 246 247 bool isRegOrImmWithInt32InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 249 } 250 251 bool isRegOrImmWithInt64InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 253 } 254 255 bool isRegOrImmWithFP16InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 257 } 258 259 bool isRegOrImmWithFP32InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 261 } 262 263 bool isRegOrImmWithFP64InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 265 } 266 267 bool isVReg() const { 268 return isRegClass(AMDGPU::VGPR_32RegClassID) || 269 isRegClass(AMDGPU::VReg_64RegClassID) || 270 isRegClass(AMDGPU::VReg_96RegClassID) || 271 isRegClass(AMDGPU::VReg_128RegClassID) || 272 isRegClass(AMDGPU::VReg_256RegClassID) || 273 isRegClass(AMDGPU::VReg_512RegClassID); 274 } 275 276 bool isVReg32() const { 277 return isRegClass(AMDGPU::VGPR_32RegClassID); 278 } 279 280 bool isVReg32OrOff() const { 281 return isOff() || isVReg32(); 282 } 283 284 bool isSDWAOperand(MVT type) const; 285 bool isSDWAFP16Operand() const; 286 bool isSDWAFP32Operand() const; 287 bool isSDWAInt16Operand() const; 288 bool isSDWAInt32Operand() const; 289 290 bool isImmTy(ImmTy ImmT) const { 291 return isImm() && Imm.Type == ImmT; 292 } 293 294 bool isImmModifier() const { 295 return isImm() && Imm.Type != ImmTyNone; 296 } 297 298 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 299 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 300 bool isDMask() const { return isImmTy(ImmTyDMask); } 301 bool isDim() const { return isImmTy(ImmTyDim); } 302 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 303 bool isDA() const { return isImmTy(ImmTyDA); } 304 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 305 bool isLWE() const { return isImmTy(ImmTyLWE); } 306 bool isOff() const { return isImmTy(ImmTyOff); } 307 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 308 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 309 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 310 bool isOffen() const { return isImmTy(ImmTyOffen); } 311 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 312 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 313 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 314 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 315 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 316 317 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 318 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 319 bool isGDS() const { return isImmTy(ImmTyGDS); } 320 bool isLDS() const { return isImmTy(ImmTyLDS); } 321 bool isDLC() const { return isImmTy(ImmTyDLC); } 322 bool isGLC() const { return isImmTy(ImmTyGLC); } 323 bool isSLC() const { return isImmTy(ImmTySLC); } 324 bool isTFE() const { return isImmTy(ImmTyTFE); } 325 bool isD16() const { return isImmTy(ImmTyD16); } 326 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 327 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 328 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 329 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 330 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 331 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 332 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 333 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 334 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 335 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 336 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 337 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 338 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 339 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 340 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 341 bool isHigh() const { return isImmTy(ImmTyHigh); } 342 343 bool isMod() const { 344 return isClampSI() || isOModSI(); 345 } 346 347 bool isRegOrImm() const { 348 return isReg() || isImm(); 349 } 350 351 bool isRegClass(unsigned RCID) const; 352 353 bool isInlineValue() const; 354 355 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 356 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 357 } 358 359 bool isSCSrcB16() const { 360 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 361 } 362 363 bool isSCSrcV2B16() const { 364 return isSCSrcB16(); 365 } 366 367 bool isSCSrcB32() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 369 } 370 371 bool isSCSrcB64() const { 372 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 373 } 374 375 bool isSCSrcF16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 377 } 378 379 bool isSCSrcV2F16() const { 380 return isSCSrcF16(); 381 } 382 383 bool isSCSrcF32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 385 } 386 387 bool isSCSrcF64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 389 } 390 391 bool isSSrcB32() const { 392 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 393 } 394 395 bool isSSrcB16() const { 396 return isSCSrcB16() || isLiteralImm(MVT::i16); 397 } 398 399 bool isSSrcV2B16() const { 400 llvm_unreachable("cannot happen"); 401 return isSSrcB16(); 402 } 403 404 bool isSSrcB64() const { 405 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 406 // See isVSrc64(). 407 return isSCSrcB64() || isLiteralImm(MVT::i64); 408 } 409 410 bool isSSrcF32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 412 } 413 414 bool isSSrcF64() const { 415 return isSCSrcB64() || isLiteralImm(MVT::f64); 416 } 417 418 bool isSSrcF16() const { 419 return isSCSrcB16() || isLiteralImm(MVT::f16); 420 } 421 422 bool isSSrcV2F16() const { 423 llvm_unreachable("cannot happen"); 424 return isSSrcF16(); 425 } 426 427 bool isSSrcOrLdsB32() const { 428 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 429 isLiteralImm(MVT::i32) || isExpr(); 430 } 431 432 bool isVCSrcB32() const { 433 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 434 } 435 436 bool isVCSrcB64() const { 437 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 438 } 439 440 bool isVCSrcB16() const { 441 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 442 } 443 444 bool isVCSrcV2B16() const { 445 return isVCSrcB16(); 446 } 447 448 bool isVCSrcF32() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 450 } 451 452 bool isVCSrcF64() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 454 } 455 456 bool isVCSrcF16() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 458 } 459 460 bool isVCSrcV2F16() const { 461 return isVCSrcF16(); 462 } 463 464 bool isVSrcB32() const { 465 return isVCSrcF32() || isLiteralImm(MVT::i32); 466 } 467 468 bool isVSrcB64() const { 469 return isVCSrcF64() || isLiteralImm(MVT::i64); 470 } 471 472 bool isVSrcB16() const { 473 return isVCSrcF16() || isLiteralImm(MVT::i16); 474 } 475 476 bool isVSrcV2B16() const { 477 return isVSrcB16() || isLiteralImm(MVT::v2i16); 478 } 479 480 bool isVSrcF32() const { 481 return isVCSrcF32() || isLiteralImm(MVT::f32); 482 } 483 484 bool isVSrcF64() const { 485 return isVCSrcF64() || isLiteralImm(MVT::f64); 486 } 487 488 bool isVSrcF16() const { 489 return isVCSrcF16() || isLiteralImm(MVT::f16); 490 } 491 492 bool isVSrcV2F16() const { 493 return isVSrcF16() || isLiteralImm(MVT::v2f16); 494 } 495 496 bool isKImmFP32() const { 497 return isLiteralImm(MVT::f32); 498 } 499 500 bool isKImmFP16() const { 501 return isLiteralImm(MVT::f16); 502 } 503 504 bool isMem() const override { 505 return false; 506 } 507 508 bool isExpr() const { 509 return Kind == Expression; 510 } 511 512 bool isSoppBrTarget() const { 513 return isExpr() || isImm(); 514 } 515 516 bool isSWaitCnt() const; 517 bool isHwreg() const; 518 bool isSendMsg() const; 519 bool isSwizzle() const; 520 bool isSMRDOffset8() const; 521 bool isSMRDOffset20() const; 522 bool isSMRDLiteralOffset() const; 523 bool isDPPCtrl() const; 524 bool isGPRIdxMode() const; 525 bool isS16Imm() const; 526 bool isU16Imm() const; 527 bool isEndpgm() const; 528 529 StringRef getExpressionAsToken() const { 530 assert(isExpr()); 531 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 532 return S->getSymbol().getName(); 533 } 534 535 StringRef getToken() const { 536 assert(isToken()); 537 538 if (Kind == Expression) 539 return getExpressionAsToken(); 540 541 return StringRef(Tok.Data, Tok.Length); 542 } 543 544 int64_t getImm() const { 545 assert(isImm()); 546 return Imm.Val; 547 } 548 549 ImmTy getImmTy() const { 550 assert(isImm()); 551 return Imm.Type; 552 } 553 554 unsigned getReg() const override { 555 assert(isRegKind()); 556 return Reg.RegNo; 557 } 558 559 SMLoc getStartLoc() const override { 560 return StartLoc; 561 } 562 563 SMLoc getEndLoc() const override { 564 return EndLoc; 565 } 566 567 SMRange getLocRange() const { 568 return SMRange(StartLoc, EndLoc); 569 } 570 571 Modifiers getModifiers() const { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 return isRegKind() ? Reg.Mods : Imm.Mods; 574 } 575 576 void setModifiers(Modifiers Mods) { 577 assert(isRegKind() || isImmTy(ImmTyNone)); 578 if (isRegKind()) 579 Reg.Mods = Mods; 580 else 581 Imm.Mods = Mods; 582 } 583 584 bool hasModifiers() const { 585 return getModifiers().hasModifiers(); 586 } 587 588 bool hasFPModifiers() const { 589 return getModifiers().hasFPModifiers(); 590 } 591 592 bool hasIntModifiers() const { 593 return getModifiers().hasIntModifiers(); 594 } 595 596 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 597 598 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 599 600 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 601 602 template <unsigned Bitwidth> 603 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 604 605 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 606 addKImmFPOperands<16>(Inst, N); 607 } 608 609 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 610 addKImmFPOperands<32>(Inst, N); 611 } 612 613 void addRegOperands(MCInst &Inst, unsigned N) const; 614 615 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 616 if (isRegKind()) 617 addRegOperands(Inst, N); 618 else if (isExpr()) 619 Inst.addOperand(MCOperand::createExpr(Expr)); 620 else 621 addImmOperands(Inst, N); 622 } 623 624 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 625 Modifiers Mods = getModifiers(); 626 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 627 if (isRegKind()) { 628 addRegOperands(Inst, N); 629 } else { 630 addImmOperands(Inst, N, false); 631 } 632 } 633 634 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasIntModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 640 assert(!hasFPModifiers()); 641 addRegOrImmWithInputModsOperands(Inst, N); 642 } 643 644 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 645 Modifiers Mods = getModifiers(); 646 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 647 assert(isRegKind()); 648 addRegOperands(Inst, N); 649 } 650 651 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasIntModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 657 assert(!hasFPModifiers()); 658 addRegWithInputModsOperands(Inst, N); 659 } 660 661 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 662 if (isImm()) 663 addImmOperands(Inst, N); 664 else { 665 assert(isExpr()); 666 Inst.addOperand(MCOperand::createExpr(Expr)); 667 } 668 } 669 670 static void printImmTy(raw_ostream& OS, ImmTy Type) { 671 switch (Type) { 672 case ImmTyNone: OS << "None"; break; 673 case ImmTyGDS: OS << "GDS"; break; 674 case ImmTyLDS: OS << "LDS"; break; 675 case ImmTyOffen: OS << "Offen"; break; 676 case ImmTyIdxen: OS << "Idxen"; break; 677 case ImmTyAddr64: OS << "Addr64"; break; 678 case ImmTyOffset: OS << "Offset"; break; 679 case ImmTyInstOffset: OS << "InstOffset"; break; 680 case ImmTyOffset0: OS << "Offset0"; break; 681 case ImmTyOffset1: OS << "Offset1"; break; 682 case ImmTyDLC: OS << "DLC"; break; 683 case ImmTyGLC: OS << "GLC"; break; 684 case ImmTySLC: OS << "SLC"; break; 685 case ImmTyTFE: OS << "TFE"; break; 686 case ImmTyD16: OS << "D16"; break; 687 case ImmTyFORMAT: OS << "FORMAT"; break; 688 case ImmTyClampSI: OS << "ClampSI"; break; 689 case ImmTyOModSI: OS << "OModSI"; break; 690 case ImmTyDppCtrl: OS << "DppCtrl"; break; 691 case ImmTyDppRowMask: OS << "DppRowMask"; break; 692 case ImmTyDppBankMask: OS << "DppBankMask"; break; 693 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 694 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 695 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 696 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 697 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 698 case ImmTyDMask: OS << "DMask"; break; 699 case ImmTyDim: OS << "Dim"; break; 700 case ImmTyUNorm: OS << "UNorm"; break; 701 case ImmTyDA: OS << "DA"; break; 702 case ImmTyR128A16: OS << "R128A16"; break; 703 case ImmTyLWE: OS << "LWE"; break; 704 case ImmTyOff: OS << "Off"; break; 705 case ImmTyExpTgt: OS << "ExpTgt"; break; 706 case ImmTyExpCompr: OS << "ExpCompr"; break; 707 case ImmTyExpVM: OS << "ExpVM"; break; 708 case ImmTyHwreg: OS << "Hwreg"; break; 709 case ImmTySendMsg: OS << "SendMsg"; break; 710 case ImmTyInterpSlot: OS << "InterpSlot"; break; 711 case ImmTyInterpAttr: OS << "InterpAttr"; break; 712 case ImmTyAttrChan: OS << "AttrChan"; break; 713 case ImmTyOpSel: OS << "OpSel"; break; 714 case ImmTyOpSelHi: OS << "OpSelHi"; break; 715 case ImmTyNegLo: OS << "NegLo"; break; 716 case ImmTyNegHi: OS << "NegHi"; break; 717 case ImmTySwizzle: OS << "Swizzle"; break; 718 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 719 case ImmTyHigh: OS << "High"; break; 720 case ImmTyEndpgm: 721 OS << "Endpgm"; 722 break; 723 } 724 } 725 726 void print(raw_ostream &OS) const override { 727 switch (Kind) { 728 case Register: 729 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 730 break; 731 case Immediate: 732 OS << '<' << getImm(); 733 if (getImmTy() != ImmTyNone) { 734 OS << " type: "; printImmTy(OS, getImmTy()); 735 } 736 OS << " mods: " << Imm.Mods << '>'; 737 break; 738 case Token: 739 OS << '\'' << getToken() << '\''; 740 break; 741 case Expression: 742 OS << "<expr " << *Expr << '>'; 743 break; 744 } 745 } 746 747 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 748 int64_t Val, SMLoc Loc, 749 ImmTy Type = ImmTyNone, 750 bool IsFPImm = false) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 752 Op->Imm.Val = Val; 753 Op->Imm.IsFPImm = IsFPImm; 754 Op->Imm.Type = Type; 755 Op->Imm.Mods = Modifiers(); 756 Op->StartLoc = Loc; 757 Op->EndLoc = Loc; 758 return Op; 759 } 760 761 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 762 StringRef Str, SMLoc Loc, 763 bool HasExplicitEncodingSize = true) { 764 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 765 Res->Tok.Data = Str.data(); 766 Res->Tok.Length = Str.size(); 767 Res->StartLoc = Loc; 768 Res->EndLoc = Loc; 769 return Res; 770 } 771 772 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 773 unsigned RegNo, SMLoc S, 774 SMLoc E) { 775 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 776 Op->Reg.RegNo = RegNo; 777 Op->Reg.Mods = Modifiers(); 778 Op->StartLoc = S; 779 Op->EndLoc = E; 780 return Op; 781 } 782 783 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 784 const class MCExpr *Expr, SMLoc S) { 785 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 786 Op->Expr = Expr; 787 Op->StartLoc = S; 788 Op->EndLoc = S; 789 return Op; 790 } 791 }; 792 793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 794 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 795 return OS; 796 } 797 798 //===----------------------------------------------------------------------===// 799 // AsmParser 800 //===----------------------------------------------------------------------===// 801 802 // Holds info related to the current kernel, e.g. count of SGPRs used. 803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 804 // .amdgpu_hsa_kernel or at EOF. 805 class KernelScopeInfo { 806 int SgprIndexUnusedMin = -1; 807 int VgprIndexUnusedMin = -1; 808 MCContext *Ctx = nullptr; 809 810 void usesSgprAt(int i) { 811 if (i >= SgprIndexUnusedMin) { 812 SgprIndexUnusedMin = ++i; 813 if (Ctx) { 814 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 815 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 816 } 817 } 818 } 819 820 void usesVgprAt(int i) { 821 if (i >= VgprIndexUnusedMin) { 822 VgprIndexUnusedMin = ++i; 823 if (Ctx) { 824 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 825 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 826 } 827 } 828 } 829 830 public: 831 KernelScopeInfo() = default; 832 833 void initialize(MCContext &Context) { 834 Ctx = &Context; 835 usesSgprAt(SgprIndexUnusedMin = -1); 836 usesVgprAt(VgprIndexUnusedMin = -1); 837 } 838 839 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 840 switch (RegKind) { 841 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 842 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 843 default: break; 844 } 845 } 846 }; 847 848 class AMDGPUAsmParser : public MCTargetAsmParser { 849 MCAsmParser &Parser; 850 851 // Number of extra operands parsed after the first optional operand. 852 // This may be necessary to skip hardcoded mandatory operands. 853 static const unsigned MAX_OPR_LOOKAHEAD = 8; 854 855 unsigned ForcedEncodingSize = 0; 856 bool ForcedDPP = false; 857 bool ForcedSDWA = false; 858 KernelScopeInfo KernelScope; 859 860 /// @name Auto-generated Match Functions 861 /// { 862 863 #define GET_ASSEMBLER_HEADER 864 #include "AMDGPUGenAsmMatcher.inc" 865 866 /// } 867 868 private: 869 bool ParseAsAbsoluteExpression(uint32_t &Ret); 870 bool OutOfRangeError(SMRange Range); 871 /// Calculate VGPR/SGPR blocks required for given target, reserved 872 /// registers, and user-specified NextFreeXGPR values. 873 /// 874 /// \param Features [in] Target features, used for bug corrections. 875 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 876 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 877 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 878 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 879 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 880 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 881 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 882 /// \param VGPRBlocks [out] Result VGPR block count. 883 /// \param SGPRBlocks [out] Result SGPR block count. 884 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 885 bool FlatScrUsed, bool XNACKUsed, 886 unsigned NextFreeVGPR, SMRange VGPRRange, 887 unsigned NextFreeSGPR, SMRange SGPRRange, 888 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 889 bool ParseDirectiveAMDGCNTarget(); 890 bool ParseDirectiveAMDHSAKernel(); 891 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 892 bool ParseDirectiveHSACodeObjectVersion(); 893 bool ParseDirectiveHSACodeObjectISA(); 894 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 895 bool ParseDirectiveAMDKernelCodeT(); 896 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 897 bool ParseDirectiveAMDGPUHsaKernel(); 898 899 bool ParseDirectiveISAVersion(); 900 bool ParseDirectiveHSAMetadata(); 901 bool ParseDirectivePALMetadataBegin(); 902 bool ParseDirectivePALMetadata(); 903 904 /// Common code to parse out a block of text (typically YAML) between start and 905 /// end directives. 906 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 907 const char *AssemblerDirectiveEnd, 908 std::string &CollectString); 909 910 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 911 RegisterKind RegKind, unsigned Reg1, 912 unsigned RegNum); 913 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 914 unsigned& RegNum, unsigned& RegWidth, 915 unsigned *DwordRegIndex); 916 bool isRegister(); 917 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 918 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 919 void initializeGprCountSymbol(RegisterKind RegKind); 920 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 921 unsigned RegWidth); 922 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 923 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 924 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 925 bool IsGdsHardcoded); 926 927 public: 928 enum AMDGPUMatchResultTy { 929 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 930 }; 931 enum OperandMode { 932 OperandMode_Default, 933 OperandMode_NSA, 934 }; 935 936 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 937 938 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 939 const MCInstrInfo &MII, 940 const MCTargetOptions &Options) 941 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 942 MCAsmParserExtension::Initialize(Parser); 943 944 if (getFeatureBits().none()) { 945 // Set default features. 946 copySTI().ToggleFeature("southern-islands"); 947 } 948 949 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 950 951 { 952 // TODO: make those pre-defined variables read-only. 953 // Currently there is none suitable machinery in the core llvm-mc for this. 954 // MCSymbol::isRedefinable is intended for another purpose, and 955 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 956 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 957 MCContext &Ctx = getContext(); 958 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 959 MCSymbol *Sym = 960 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 961 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 962 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 963 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 964 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 965 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 966 } else { 967 MCSymbol *Sym = 968 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 969 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 970 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 971 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 972 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 973 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 974 } 975 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 976 initializeGprCountSymbol(IS_VGPR); 977 initializeGprCountSymbol(IS_SGPR); 978 } else 979 KernelScope.initialize(getContext()); 980 } 981 } 982 983 bool hasXNACK() const { 984 return AMDGPU::hasXNACK(getSTI()); 985 } 986 987 bool hasMIMG_R128() const { 988 return AMDGPU::hasMIMG_R128(getSTI()); 989 } 990 991 bool hasPackedD16() const { 992 return AMDGPU::hasPackedD16(getSTI()); 993 } 994 995 bool isSI() const { 996 return AMDGPU::isSI(getSTI()); 997 } 998 999 bool isCI() const { 1000 return AMDGPU::isCI(getSTI()); 1001 } 1002 1003 bool isVI() const { 1004 return AMDGPU::isVI(getSTI()); 1005 } 1006 1007 bool isGFX9() const { 1008 return AMDGPU::isGFX9(getSTI()); 1009 } 1010 1011 bool isGFX10() const { 1012 return AMDGPU::isGFX10(getSTI()); 1013 } 1014 1015 bool hasInv2PiInlineImm() const { 1016 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1017 } 1018 1019 bool hasFlatOffsets() const { 1020 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1021 } 1022 1023 bool hasSGPR102_SGPR103() const { 1024 return !isVI() && !isGFX9(); 1025 } 1026 1027 bool hasSGPR104_SGPR105() const { 1028 return isGFX10(); 1029 } 1030 1031 bool hasIntClamp() const { 1032 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1033 } 1034 1035 AMDGPUTargetStreamer &getTargetStreamer() { 1036 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1037 return static_cast<AMDGPUTargetStreamer &>(TS); 1038 } 1039 1040 const MCRegisterInfo *getMRI() const { 1041 // We need this const_cast because for some reason getContext() is not const 1042 // in MCAsmParser. 1043 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1044 } 1045 1046 const MCInstrInfo *getMII() const { 1047 return &MII; 1048 } 1049 1050 const FeatureBitset &getFeatureBits() const { 1051 return getSTI().getFeatureBits(); 1052 } 1053 1054 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1055 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1056 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1057 1058 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1059 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1060 bool isForcedDPP() const { return ForcedDPP; } 1061 bool isForcedSDWA() const { return ForcedSDWA; } 1062 ArrayRef<unsigned> getMatchedVariants() const; 1063 1064 std::unique_ptr<AMDGPUOperand> parseRegister(); 1065 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1066 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1067 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1068 unsigned Kind) override; 1069 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1070 OperandVector &Operands, MCStreamer &Out, 1071 uint64_t &ErrorInfo, 1072 bool MatchingInlineAsm) override; 1073 bool ParseDirective(AsmToken DirectiveID) override; 1074 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1075 OperandMode Mode = OperandMode_Default); 1076 StringRef parseMnemonicSuffix(StringRef Name); 1077 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1078 SMLoc NameLoc, OperandVector &Operands) override; 1079 //bool ProcessInstruction(MCInst &Inst); 1080 1081 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1082 1083 OperandMatchResultTy 1084 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1085 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1086 bool (*ConvertResult)(int64_t &) = nullptr); 1087 1088 OperandMatchResultTy parseOperandArrayWithPrefix( 1089 const char *Prefix, 1090 OperandVector &Operands, 1091 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1092 bool (*ConvertResult)(int64_t&) = nullptr); 1093 1094 OperandMatchResultTy 1095 parseNamedBit(const char *Name, OperandVector &Operands, 1096 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1097 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1098 StringRef &Value); 1099 1100 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1101 bool parseSP3NegModifier(); 1102 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1103 OperandMatchResultTy parseReg(OperandVector &Operands); 1104 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1105 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1106 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1107 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1108 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1109 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1110 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1111 1112 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1113 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1114 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1115 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1116 1117 bool parseCnt(int64_t &IntVal); 1118 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1119 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1120 1121 private: 1122 struct OperandInfoTy { 1123 int64_t Id; 1124 bool IsSymbolic = false; 1125 1126 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1127 }; 1128 1129 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1130 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1131 1132 void errorExpTgt(); 1133 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1134 1135 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1136 bool validateSOPLiteral(const MCInst &Inst) const; 1137 bool validateConstantBusLimitations(const MCInst &Inst); 1138 bool validateEarlyClobberLimitations(const MCInst &Inst); 1139 bool validateIntClampSupported(const MCInst &Inst); 1140 bool validateMIMGAtomicDMask(const MCInst &Inst); 1141 bool validateMIMGGatherDMask(const MCInst &Inst); 1142 bool validateMIMGDataSize(const MCInst &Inst); 1143 bool validateMIMGAddrSize(const MCInst &Inst); 1144 bool validateMIMGD16(const MCInst &Inst); 1145 bool validateMIMGDim(const MCInst &Inst); 1146 bool validateLdsDirect(const MCInst &Inst); 1147 bool validateVOP3Literal(const MCInst &Inst) const; 1148 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1149 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1150 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1151 1152 bool isId(const StringRef Id) const; 1153 bool isId(const AsmToken &Token, const StringRef Id) const; 1154 bool isToken(const AsmToken::TokenKind Kind) const; 1155 bool trySkipId(const StringRef Id); 1156 bool trySkipToken(const AsmToken::TokenKind Kind); 1157 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1158 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1159 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1160 AsmToken::TokenKind getTokenKind() const; 1161 bool parseExpr(int64_t &Imm); 1162 StringRef getTokenStr() const; 1163 AsmToken peekToken(); 1164 AsmToken getToken() const; 1165 SMLoc getLoc() const; 1166 void lex(); 1167 1168 public: 1169 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1170 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1171 1172 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1173 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1174 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1175 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1176 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1177 1178 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1179 const unsigned MinVal, 1180 const unsigned MaxVal, 1181 const StringRef ErrMsg); 1182 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1183 bool parseSwizzleOffset(int64_t &Imm); 1184 bool parseSwizzleMacro(int64_t &Imm); 1185 bool parseSwizzleQuadPerm(int64_t &Imm); 1186 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1187 bool parseSwizzleBroadcast(int64_t &Imm); 1188 bool parseSwizzleSwap(int64_t &Imm); 1189 bool parseSwizzleReverse(int64_t &Imm); 1190 1191 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1192 int64_t parseGPRIdxMacro(); 1193 1194 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1195 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1196 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1197 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1198 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1199 1200 AMDGPUOperand::Ptr defaultDLC() const; 1201 AMDGPUOperand::Ptr defaultGLC() const; 1202 AMDGPUOperand::Ptr defaultSLC() const; 1203 1204 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1205 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1206 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1207 AMDGPUOperand::Ptr defaultOffsetU12() const; 1208 AMDGPUOperand::Ptr defaultOffsetS13() const; 1209 1210 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1211 1212 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1213 OptionalImmIndexMap &OptionalIdx); 1214 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1215 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1216 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1217 1218 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1219 1220 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1221 bool IsAtomic = false); 1222 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1223 1224 OperandMatchResultTy parseDim(OperandVector &Operands); 1225 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1226 AMDGPUOperand::Ptr defaultRowMask() const; 1227 AMDGPUOperand::Ptr defaultBankMask() const; 1228 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1229 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1230 1231 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1232 AMDGPUOperand::ImmTy Type); 1233 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1234 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1235 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1236 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1237 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1238 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1239 uint64_t BasicInstType, bool skipVcc = false); 1240 1241 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1242 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1243 }; 1244 1245 struct OptionalOperand { 1246 const char *Name; 1247 AMDGPUOperand::ImmTy Type; 1248 bool IsBit; 1249 bool (*ConvertResult)(int64_t&); 1250 }; 1251 1252 } // end anonymous namespace 1253 1254 // May be called with integer type with equivalent bitwidth. 1255 static const fltSemantics *getFltSemantics(unsigned Size) { 1256 switch (Size) { 1257 case 4: 1258 return &APFloat::IEEEsingle(); 1259 case 8: 1260 return &APFloat::IEEEdouble(); 1261 case 2: 1262 return &APFloat::IEEEhalf(); 1263 default: 1264 llvm_unreachable("unsupported fp type"); 1265 } 1266 } 1267 1268 static const fltSemantics *getFltSemantics(MVT VT) { 1269 return getFltSemantics(VT.getSizeInBits() / 8); 1270 } 1271 1272 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1273 switch (OperandType) { 1274 case AMDGPU::OPERAND_REG_IMM_INT32: 1275 case AMDGPU::OPERAND_REG_IMM_FP32: 1276 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1277 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1278 return &APFloat::IEEEsingle(); 1279 case AMDGPU::OPERAND_REG_IMM_INT64: 1280 case AMDGPU::OPERAND_REG_IMM_FP64: 1281 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1282 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1283 return &APFloat::IEEEdouble(); 1284 case AMDGPU::OPERAND_REG_IMM_INT16: 1285 case AMDGPU::OPERAND_REG_IMM_FP16: 1286 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1287 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1288 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1289 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1290 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1291 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1292 return &APFloat::IEEEhalf(); 1293 default: 1294 llvm_unreachable("unsupported fp type"); 1295 } 1296 } 1297 1298 //===----------------------------------------------------------------------===// 1299 // Operand 1300 //===----------------------------------------------------------------------===// 1301 1302 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1303 bool Lost; 1304 1305 // Convert literal to single precision 1306 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1307 APFloat::rmNearestTiesToEven, 1308 &Lost); 1309 // We allow precision lost but not overflow or underflow 1310 if (Status != APFloat::opOK && 1311 Lost && 1312 ((Status & APFloat::opOverflow) != 0 || 1313 (Status & APFloat::opUnderflow) != 0)) { 1314 return false; 1315 } 1316 1317 return true; 1318 } 1319 1320 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1321 return isUIntN(Size, Val) || isIntN(Size, Val); 1322 } 1323 1324 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1325 1326 // This is a hack to enable named inline values like 1327 // shared_base with both 32-bit and 64-bit operands. 1328 // Note that these values are defined as 1329 // 32-bit operands only. 1330 if (isInlineValue()) { 1331 return true; 1332 } 1333 1334 if (!isImmTy(ImmTyNone)) { 1335 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1336 return false; 1337 } 1338 // TODO: We should avoid using host float here. It would be better to 1339 // check the float bit values which is what a few other places do. 1340 // We've had bot failures before due to weird NaN support on mips hosts. 1341 1342 APInt Literal(64, Imm.Val); 1343 1344 if (Imm.IsFPImm) { // We got fp literal token 1345 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1346 return AMDGPU::isInlinableLiteral64(Imm.Val, 1347 AsmParser->hasInv2PiInlineImm()); 1348 } 1349 1350 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1351 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1352 return false; 1353 1354 if (type.getScalarSizeInBits() == 16) { 1355 return AMDGPU::isInlinableLiteral16( 1356 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1357 AsmParser->hasInv2PiInlineImm()); 1358 } 1359 1360 // Check if single precision literal is inlinable 1361 return AMDGPU::isInlinableLiteral32( 1362 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1363 AsmParser->hasInv2PiInlineImm()); 1364 } 1365 1366 // We got int literal token. 1367 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1368 return AMDGPU::isInlinableLiteral64(Imm.Val, 1369 AsmParser->hasInv2PiInlineImm()); 1370 } 1371 1372 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1373 return false; 1374 } 1375 1376 if (type.getScalarSizeInBits() == 16) { 1377 return AMDGPU::isInlinableLiteral16( 1378 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1379 AsmParser->hasInv2PiInlineImm()); 1380 } 1381 1382 return AMDGPU::isInlinableLiteral32( 1383 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1384 AsmParser->hasInv2PiInlineImm()); 1385 } 1386 1387 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1388 // Check that this immediate can be added as literal 1389 if (!isImmTy(ImmTyNone)) { 1390 return false; 1391 } 1392 1393 if (!Imm.IsFPImm) { 1394 // We got int literal token. 1395 1396 if (type == MVT::f64 && hasFPModifiers()) { 1397 // Cannot apply fp modifiers to int literals preserving the same semantics 1398 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1399 // disable these cases. 1400 return false; 1401 } 1402 1403 unsigned Size = type.getSizeInBits(); 1404 if (Size == 64) 1405 Size = 32; 1406 1407 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1408 // types. 1409 return isSafeTruncation(Imm.Val, Size); 1410 } 1411 1412 // We got fp literal token 1413 if (type == MVT::f64) { // Expected 64-bit fp operand 1414 // We would set low 64-bits of literal to zeroes but we accept this literals 1415 return true; 1416 } 1417 1418 if (type == MVT::i64) { // Expected 64-bit int operand 1419 // We don't allow fp literals in 64-bit integer instructions. It is 1420 // unclear how we should encode them. 1421 return false; 1422 } 1423 1424 // We allow fp literals with f16x2 operands assuming that the specified 1425 // literal goes into the lower half and the upper half is zero. We also 1426 // require that the literal may be losslesly converted to f16. 1427 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1428 (type == MVT::v2i16)? MVT::i16 : type; 1429 1430 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1431 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1432 } 1433 1434 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1435 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1436 } 1437 1438 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1439 if (AsmParser->isVI()) 1440 return isVReg32(); 1441 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1442 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1443 else 1444 return false; 1445 } 1446 1447 bool AMDGPUOperand::isSDWAFP16Operand() const { 1448 return isSDWAOperand(MVT::f16); 1449 } 1450 1451 bool AMDGPUOperand::isSDWAFP32Operand() const { 1452 return isSDWAOperand(MVT::f32); 1453 } 1454 1455 bool AMDGPUOperand::isSDWAInt16Operand() const { 1456 return isSDWAOperand(MVT::i16); 1457 } 1458 1459 bool AMDGPUOperand::isSDWAInt32Operand() const { 1460 return isSDWAOperand(MVT::i32); 1461 } 1462 1463 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1464 { 1465 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1466 assert(Size == 2 || Size == 4 || Size == 8); 1467 1468 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1469 1470 if (Imm.Mods.Abs) { 1471 Val &= ~FpSignMask; 1472 } 1473 if (Imm.Mods.Neg) { 1474 Val ^= FpSignMask; 1475 } 1476 1477 return Val; 1478 } 1479 1480 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1481 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1482 Inst.getNumOperands())) { 1483 addLiteralImmOperand(Inst, Imm.Val, 1484 ApplyModifiers & 1485 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1486 } else { 1487 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1488 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1489 } 1490 } 1491 1492 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1493 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1494 auto OpNum = Inst.getNumOperands(); 1495 // Check that this operand accepts literals 1496 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1497 1498 if (ApplyModifiers) { 1499 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1500 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1501 Val = applyInputFPModifiers(Val, Size); 1502 } 1503 1504 APInt Literal(64, Val); 1505 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1506 1507 if (Imm.IsFPImm) { // We got fp literal token 1508 switch (OpTy) { 1509 case AMDGPU::OPERAND_REG_IMM_INT64: 1510 case AMDGPU::OPERAND_REG_IMM_FP64: 1511 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1512 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1513 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1514 AsmParser->hasInv2PiInlineImm())) { 1515 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1516 return; 1517 } 1518 1519 // Non-inlineable 1520 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1521 // For fp operands we check if low 32 bits are zeros 1522 if (Literal.getLoBits(32) != 0) { 1523 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1524 "Can't encode literal as exact 64-bit floating-point operand. " 1525 "Low 32-bits will be set to zero"); 1526 } 1527 1528 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1529 return; 1530 } 1531 1532 // We don't allow fp literals in 64-bit integer instructions. It is 1533 // unclear how we should encode them. This case should be checked earlier 1534 // in predicate methods (isLiteralImm()) 1535 llvm_unreachable("fp literal in 64-bit integer instruction."); 1536 1537 case AMDGPU::OPERAND_REG_IMM_INT32: 1538 case AMDGPU::OPERAND_REG_IMM_FP32: 1539 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1540 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1541 case AMDGPU::OPERAND_REG_IMM_INT16: 1542 case AMDGPU::OPERAND_REG_IMM_FP16: 1543 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1544 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1545 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1546 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1547 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1548 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1549 bool lost; 1550 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1551 // Convert literal to single precision 1552 FPLiteral.convert(*getOpFltSemantics(OpTy), 1553 APFloat::rmNearestTiesToEven, &lost); 1554 // We allow precision lost but not overflow or underflow. This should be 1555 // checked earlier in isLiteralImm() 1556 1557 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1558 Inst.addOperand(MCOperand::createImm(ImmVal)); 1559 return; 1560 } 1561 default: 1562 llvm_unreachable("invalid operand size"); 1563 } 1564 1565 return; 1566 } 1567 1568 // We got int literal token. 1569 // Only sign extend inline immediates. 1570 switch (OpTy) { 1571 case AMDGPU::OPERAND_REG_IMM_INT32: 1572 case AMDGPU::OPERAND_REG_IMM_FP32: 1573 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1574 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1575 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1576 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1577 if (isSafeTruncation(Val, 32) && 1578 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1579 AsmParser->hasInv2PiInlineImm())) { 1580 Inst.addOperand(MCOperand::createImm(Val)); 1581 return; 1582 } 1583 1584 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1585 return; 1586 1587 case AMDGPU::OPERAND_REG_IMM_INT64: 1588 case AMDGPU::OPERAND_REG_IMM_FP64: 1589 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1590 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1591 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1592 Inst.addOperand(MCOperand::createImm(Val)); 1593 return; 1594 } 1595 1596 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1597 return; 1598 1599 case AMDGPU::OPERAND_REG_IMM_INT16: 1600 case AMDGPU::OPERAND_REG_IMM_FP16: 1601 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1602 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1603 if (isSafeTruncation(Val, 16) && 1604 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1605 AsmParser->hasInv2PiInlineImm())) { 1606 Inst.addOperand(MCOperand::createImm(Val)); 1607 return; 1608 } 1609 1610 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1611 return; 1612 1613 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1614 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1615 assert(isSafeTruncation(Val, 16)); 1616 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1617 AsmParser->hasInv2PiInlineImm())); 1618 1619 Inst.addOperand(MCOperand::createImm(Val)); 1620 return; 1621 } 1622 default: 1623 llvm_unreachable("invalid operand size"); 1624 } 1625 } 1626 1627 template <unsigned Bitwidth> 1628 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1629 APInt Literal(64, Imm.Val); 1630 1631 if (!Imm.IsFPImm) { 1632 // We got int literal token. 1633 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1634 return; 1635 } 1636 1637 bool Lost; 1638 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1639 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1640 APFloat::rmNearestTiesToEven, &Lost); 1641 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1642 } 1643 1644 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1645 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1646 } 1647 1648 static bool isInlineValue(unsigned Reg) { 1649 switch (Reg) { 1650 case AMDGPU::SRC_SHARED_BASE: 1651 case AMDGPU::SRC_SHARED_LIMIT: 1652 case AMDGPU::SRC_PRIVATE_BASE: 1653 case AMDGPU::SRC_PRIVATE_LIMIT: 1654 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1655 return true; 1656 default: 1657 return false; 1658 } 1659 } 1660 1661 bool AMDGPUOperand::isInlineValue() const { 1662 return isRegKind() && ::isInlineValue(getReg()); 1663 } 1664 1665 //===----------------------------------------------------------------------===// 1666 // AsmParser 1667 //===----------------------------------------------------------------------===// 1668 1669 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1670 if (Is == IS_VGPR) { 1671 switch (RegWidth) { 1672 default: return -1; 1673 case 1: return AMDGPU::VGPR_32RegClassID; 1674 case 2: return AMDGPU::VReg_64RegClassID; 1675 case 3: return AMDGPU::VReg_96RegClassID; 1676 case 4: return AMDGPU::VReg_128RegClassID; 1677 case 8: return AMDGPU::VReg_256RegClassID; 1678 case 16: return AMDGPU::VReg_512RegClassID; 1679 } 1680 } else if (Is == IS_TTMP) { 1681 switch (RegWidth) { 1682 default: return -1; 1683 case 1: return AMDGPU::TTMP_32RegClassID; 1684 case 2: return AMDGPU::TTMP_64RegClassID; 1685 case 4: return AMDGPU::TTMP_128RegClassID; 1686 case 8: return AMDGPU::TTMP_256RegClassID; 1687 case 16: return AMDGPU::TTMP_512RegClassID; 1688 } 1689 } else if (Is == IS_SGPR) { 1690 switch (RegWidth) { 1691 default: return -1; 1692 case 1: return AMDGPU::SGPR_32RegClassID; 1693 case 2: return AMDGPU::SGPR_64RegClassID; 1694 case 4: return AMDGPU::SGPR_128RegClassID; 1695 case 8: return AMDGPU::SGPR_256RegClassID; 1696 case 16: return AMDGPU::SGPR_512RegClassID; 1697 } 1698 } 1699 return -1; 1700 } 1701 1702 static unsigned getSpecialRegForName(StringRef RegName) { 1703 return StringSwitch<unsigned>(RegName) 1704 .Case("exec", AMDGPU::EXEC) 1705 .Case("vcc", AMDGPU::VCC) 1706 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1707 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1708 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1709 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1710 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1711 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1712 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1713 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1714 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1715 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1716 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1717 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1718 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1719 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1720 .Case("m0", AMDGPU::M0) 1721 .Case("scc", AMDGPU::SCC) 1722 .Case("tba", AMDGPU::TBA) 1723 .Case("tma", AMDGPU::TMA) 1724 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1725 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1726 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1727 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1728 .Case("vcc_lo", AMDGPU::VCC_LO) 1729 .Case("vcc_hi", AMDGPU::VCC_HI) 1730 .Case("exec_lo", AMDGPU::EXEC_LO) 1731 .Case("exec_hi", AMDGPU::EXEC_HI) 1732 .Case("tma_lo", AMDGPU::TMA_LO) 1733 .Case("tma_hi", AMDGPU::TMA_HI) 1734 .Case("tba_lo", AMDGPU::TBA_LO) 1735 .Case("tba_hi", AMDGPU::TBA_HI) 1736 .Case("null", AMDGPU::SGPR_NULL) 1737 .Default(0); 1738 } 1739 1740 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1741 SMLoc &EndLoc) { 1742 auto R = parseRegister(); 1743 if (!R) return true; 1744 assert(R->isReg()); 1745 RegNo = R->getReg(); 1746 StartLoc = R->getStartLoc(); 1747 EndLoc = R->getEndLoc(); 1748 return false; 1749 } 1750 1751 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1752 RegisterKind RegKind, unsigned Reg1, 1753 unsigned RegNum) { 1754 switch (RegKind) { 1755 case IS_SPECIAL: 1756 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1757 Reg = AMDGPU::EXEC; 1758 RegWidth = 2; 1759 return true; 1760 } 1761 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1762 Reg = AMDGPU::FLAT_SCR; 1763 RegWidth = 2; 1764 return true; 1765 } 1766 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1767 Reg = AMDGPU::XNACK_MASK; 1768 RegWidth = 2; 1769 return true; 1770 } 1771 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1772 Reg = AMDGPU::VCC; 1773 RegWidth = 2; 1774 return true; 1775 } 1776 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1777 Reg = AMDGPU::TBA; 1778 RegWidth = 2; 1779 return true; 1780 } 1781 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1782 Reg = AMDGPU::TMA; 1783 RegWidth = 2; 1784 return true; 1785 } 1786 return false; 1787 case IS_VGPR: 1788 case IS_SGPR: 1789 case IS_TTMP: 1790 if (Reg1 != Reg + RegWidth) { 1791 return false; 1792 } 1793 RegWidth++; 1794 return true; 1795 default: 1796 llvm_unreachable("unexpected register kind"); 1797 } 1798 } 1799 1800 static const StringRef Registers[] = { 1801 { "v" }, 1802 { "s" }, 1803 { "ttmp" }, 1804 }; 1805 1806 bool 1807 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1808 const AsmToken &NextToken) const { 1809 1810 // A list of consecutive registers: [s0,s1,s2,s3] 1811 if (Token.is(AsmToken::LBrac)) 1812 return true; 1813 1814 if (!Token.is(AsmToken::Identifier)) 1815 return false; 1816 1817 // A single register like s0 or a range of registers like s[0:1] 1818 1819 StringRef RegName = Token.getString(); 1820 1821 for (StringRef Reg : Registers) { 1822 if (RegName.startswith(Reg)) { 1823 if (Reg.size() < RegName.size()) { 1824 unsigned RegNum; 1825 // A single register with an index: rXX 1826 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1827 return true; 1828 } else { 1829 // A range of registers: r[XX:YY]. 1830 if (NextToken.is(AsmToken::LBrac)) 1831 return true; 1832 } 1833 } 1834 } 1835 1836 return getSpecialRegForName(RegName); 1837 } 1838 1839 bool 1840 AMDGPUAsmParser::isRegister() 1841 { 1842 return isRegister(getToken(), peekToken()); 1843 } 1844 1845 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1846 unsigned &RegNum, unsigned &RegWidth, 1847 unsigned *DwordRegIndex) { 1848 if (DwordRegIndex) { *DwordRegIndex = 0; } 1849 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1850 if (getLexer().is(AsmToken::Identifier)) { 1851 StringRef RegName = Parser.getTok().getString(); 1852 if ((Reg = getSpecialRegForName(RegName))) { 1853 Parser.Lex(); 1854 RegKind = IS_SPECIAL; 1855 } else { 1856 unsigned RegNumIndex = 0; 1857 if (RegName[0] == 'v') { 1858 RegNumIndex = 1; 1859 RegKind = IS_VGPR; 1860 } else if (RegName[0] == 's') { 1861 RegNumIndex = 1; 1862 RegKind = IS_SGPR; 1863 } else if (RegName.startswith("ttmp")) { 1864 RegNumIndex = strlen("ttmp"); 1865 RegKind = IS_TTMP; 1866 } else { 1867 return false; 1868 } 1869 if (RegName.size() > RegNumIndex) { 1870 // Single 32-bit register: vXX. 1871 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1872 return false; 1873 Parser.Lex(); 1874 RegWidth = 1; 1875 } else { 1876 // Range of registers: v[XX:YY]. ":YY" is optional. 1877 Parser.Lex(); 1878 int64_t RegLo, RegHi; 1879 if (getLexer().isNot(AsmToken::LBrac)) 1880 return false; 1881 Parser.Lex(); 1882 1883 if (getParser().parseAbsoluteExpression(RegLo)) 1884 return false; 1885 1886 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1887 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1888 return false; 1889 Parser.Lex(); 1890 1891 if (isRBrace) { 1892 RegHi = RegLo; 1893 } else { 1894 if (getParser().parseAbsoluteExpression(RegHi)) 1895 return false; 1896 1897 if (getLexer().isNot(AsmToken::RBrac)) 1898 return false; 1899 Parser.Lex(); 1900 } 1901 RegNum = (unsigned) RegLo; 1902 RegWidth = (RegHi - RegLo) + 1; 1903 } 1904 } 1905 } else if (getLexer().is(AsmToken::LBrac)) { 1906 // List of consecutive registers: [s0,s1,s2,s3] 1907 Parser.Lex(); 1908 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1909 return false; 1910 if (RegWidth != 1) 1911 return false; 1912 RegisterKind RegKind1; 1913 unsigned Reg1, RegNum1, RegWidth1; 1914 do { 1915 if (getLexer().is(AsmToken::Comma)) { 1916 Parser.Lex(); 1917 } else if (getLexer().is(AsmToken::RBrac)) { 1918 Parser.Lex(); 1919 break; 1920 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1921 if (RegWidth1 != 1) { 1922 return false; 1923 } 1924 if (RegKind1 != RegKind) { 1925 return false; 1926 } 1927 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1928 return false; 1929 } 1930 } else { 1931 return false; 1932 } 1933 } while (true); 1934 } else { 1935 return false; 1936 } 1937 switch (RegKind) { 1938 case IS_SPECIAL: 1939 RegNum = 0; 1940 RegWidth = 1; 1941 break; 1942 case IS_VGPR: 1943 case IS_SGPR: 1944 case IS_TTMP: 1945 { 1946 unsigned Size = 1; 1947 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1948 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1949 Size = std::min(RegWidth, 4u); 1950 } 1951 if (RegNum % Size != 0) 1952 return false; 1953 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1954 RegNum = RegNum / Size; 1955 int RCID = getRegClass(RegKind, RegWidth); 1956 if (RCID == -1) 1957 return false; 1958 const MCRegisterClass RC = TRI->getRegClass(RCID); 1959 if (RegNum >= RC.getNumRegs()) 1960 return false; 1961 Reg = RC.getRegister(RegNum); 1962 break; 1963 } 1964 1965 default: 1966 llvm_unreachable("unexpected register kind"); 1967 } 1968 1969 if (!subtargetHasRegister(*TRI, Reg)) 1970 return false; 1971 return true; 1972 } 1973 1974 Optional<StringRef> 1975 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1976 switch (RegKind) { 1977 case IS_VGPR: 1978 return StringRef(".amdgcn.next_free_vgpr"); 1979 case IS_SGPR: 1980 return StringRef(".amdgcn.next_free_sgpr"); 1981 default: 1982 return None; 1983 } 1984 } 1985 1986 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1987 auto SymbolName = getGprCountSymbolName(RegKind); 1988 assert(SymbolName && "initializing invalid register kind"); 1989 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1990 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1991 } 1992 1993 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1994 unsigned DwordRegIndex, 1995 unsigned RegWidth) { 1996 // Symbols are only defined for GCN targets 1997 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1998 return true; 1999 2000 auto SymbolName = getGprCountSymbolName(RegKind); 2001 if (!SymbolName) 2002 return true; 2003 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2004 2005 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2006 int64_t OldCount; 2007 2008 if (!Sym->isVariable()) 2009 return !Error(getParser().getTok().getLoc(), 2010 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2011 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2012 return !Error( 2013 getParser().getTok().getLoc(), 2014 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2015 2016 if (OldCount <= NewMax) 2017 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2018 2019 return true; 2020 } 2021 2022 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2023 const auto &Tok = Parser.getTok(); 2024 SMLoc StartLoc = Tok.getLoc(); 2025 SMLoc EndLoc = Tok.getEndLoc(); 2026 RegisterKind RegKind; 2027 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2028 2029 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2030 //FIXME: improve error messages (bug 41303). 2031 Error(StartLoc, "not a valid operand."); 2032 return nullptr; 2033 } 2034 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2035 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2036 return nullptr; 2037 } else 2038 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2039 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2040 } 2041 2042 bool 2043 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2044 if (HasSP3AbsModifier) { 2045 // This is a workaround for handling expressions 2046 // as arguments of SP3 'abs' modifier, for example: 2047 // |1.0| 2048 // |-1| 2049 // |1+x| 2050 // This syntax is not compatible with syntax of standard 2051 // MC expressions (due to the trailing '|'). 2052 2053 SMLoc EndLoc; 2054 const MCExpr *Expr; 2055 SMLoc StartLoc = getLoc(); 2056 2057 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2058 return true; 2059 } 2060 2061 if (!Expr->evaluateAsAbsolute(Val)) 2062 return Error(StartLoc, "expected absolute expression"); 2063 2064 return false; 2065 } 2066 2067 return getParser().parseAbsoluteExpression(Val); 2068 } 2069 2070 OperandMatchResultTy 2071 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2072 // TODO: add syntactic sugar for 1/(2*PI) 2073 2074 const auto& Tok = getToken(); 2075 const auto& NextTok = peekToken(); 2076 bool IsReal = Tok.is(AsmToken::Real); 2077 SMLoc S = Tok.getLoc(); 2078 bool Negate = false; 2079 2080 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2081 lex(); 2082 IsReal = true; 2083 Negate = true; 2084 } 2085 2086 if (IsReal) { 2087 // Floating-point expressions are not supported. 2088 // Can only allow floating-point literals with an 2089 // optional sign. 2090 2091 StringRef Num = getTokenStr(); 2092 lex(); 2093 2094 APFloat RealVal(APFloat::IEEEdouble()); 2095 auto roundMode = APFloat::rmNearestTiesToEven; 2096 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2097 return MatchOperand_ParseFail; 2098 } 2099 if (Negate) 2100 RealVal.changeSign(); 2101 2102 Operands.push_back( 2103 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2104 AMDGPUOperand::ImmTyNone, true)); 2105 2106 return MatchOperand_Success; 2107 2108 // FIXME: Should enable arbitrary expressions here 2109 } else if (Tok.is(AsmToken::Integer) || 2110 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2111 2112 int64_t IntVal; 2113 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2114 return MatchOperand_ParseFail; 2115 2116 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2117 return MatchOperand_Success; 2118 } 2119 2120 return MatchOperand_NoMatch; 2121 } 2122 2123 OperandMatchResultTy 2124 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2125 if (!isRegister()) 2126 return MatchOperand_NoMatch; 2127 2128 if (auto R = parseRegister()) { 2129 assert(R->isReg()); 2130 Operands.push_back(std::move(R)); 2131 return MatchOperand_Success; 2132 } 2133 return MatchOperand_ParseFail; 2134 } 2135 2136 OperandMatchResultTy 2137 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2138 auto res = parseReg(Operands); 2139 return (res == MatchOperand_NoMatch)? 2140 parseImm(Operands, HasSP3AbsMod) : 2141 res; 2142 } 2143 2144 // Check if the current token is an SP3 'neg' modifier. 2145 // Currently this modifier is allowed in the following context: 2146 // 2147 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2148 // 2. Before an 'abs' modifier: -abs(...) 2149 // 3. Before an SP3 'abs' modifier: -|...| 2150 // 2151 // In all other cases "-" is handled as a part 2152 // of an expression that follows the sign. 2153 // 2154 // Note: When "-" is followed by an integer literal, 2155 // this is interpreted as integer negation rather 2156 // than a floating-point NEG modifier applied to N. 2157 // Beside being contr-intuitive, such use of floating-point 2158 // NEG modifier would have resulted in different meaning 2159 // of integer literals used with VOP1/2/C and VOP3, 2160 // for example: 2161 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2162 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2163 // Negative fp literals with preceding "-" are 2164 // handled likewise for unifomtity 2165 // 2166 bool 2167 AMDGPUAsmParser::parseSP3NegModifier() { 2168 2169 AsmToken NextToken[2]; 2170 peekTokens(NextToken); 2171 2172 if (isToken(AsmToken::Minus) && 2173 (isRegister(NextToken[0], NextToken[1]) || 2174 NextToken[0].is(AsmToken::Pipe) || 2175 isId(NextToken[0], "abs"))) { 2176 lex(); 2177 return true; 2178 } 2179 2180 return false; 2181 } 2182 2183 OperandMatchResultTy 2184 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2185 bool AllowImm) { 2186 bool Neg, SP3Neg; 2187 bool Abs, SP3Abs; 2188 SMLoc Loc; 2189 2190 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2191 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2192 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2193 return MatchOperand_ParseFail; 2194 } 2195 2196 SP3Neg = parseSP3NegModifier(); 2197 2198 Loc = getLoc(); 2199 Neg = trySkipId("neg"); 2200 if (Neg && SP3Neg) { 2201 Error(Loc, "expected register or immediate"); 2202 return MatchOperand_ParseFail; 2203 } 2204 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2205 return MatchOperand_ParseFail; 2206 2207 Abs = trySkipId("abs"); 2208 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2209 return MatchOperand_ParseFail; 2210 2211 Loc = getLoc(); 2212 SP3Abs = trySkipToken(AsmToken::Pipe); 2213 if (Abs && SP3Abs) { 2214 Error(Loc, "expected register or immediate"); 2215 return MatchOperand_ParseFail; 2216 } 2217 2218 OperandMatchResultTy Res; 2219 if (AllowImm) { 2220 Res = parseRegOrImm(Operands, SP3Abs); 2221 } else { 2222 Res = parseReg(Operands); 2223 } 2224 if (Res != MatchOperand_Success) { 2225 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2226 } 2227 2228 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2229 return MatchOperand_ParseFail; 2230 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2231 return MatchOperand_ParseFail; 2232 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2233 return MatchOperand_ParseFail; 2234 2235 AMDGPUOperand::Modifiers Mods; 2236 Mods.Abs = Abs || SP3Abs; 2237 Mods.Neg = Neg || SP3Neg; 2238 2239 if (Mods.hasFPModifiers()) { 2240 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2241 Op.setModifiers(Mods); 2242 } 2243 return MatchOperand_Success; 2244 } 2245 2246 OperandMatchResultTy 2247 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2248 bool AllowImm) { 2249 bool Sext = trySkipId("sext"); 2250 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2251 return MatchOperand_ParseFail; 2252 2253 OperandMatchResultTy Res; 2254 if (AllowImm) { 2255 Res = parseRegOrImm(Operands); 2256 } else { 2257 Res = parseReg(Operands); 2258 } 2259 if (Res != MatchOperand_Success) { 2260 return Sext? MatchOperand_ParseFail : Res; 2261 } 2262 2263 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2264 return MatchOperand_ParseFail; 2265 2266 AMDGPUOperand::Modifiers Mods; 2267 Mods.Sext = Sext; 2268 2269 if (Mods.hasIntModifiers()) { 2270 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2271 Op.setModifiers(Mods); 2272 } 2273 2274 return MatchOperand_Success; 2275 } 2276 2277 OperandMatchResultTy 2278 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2279 return parseRegOrImmWithFPInputMods(Operands, false); 2280 } 2281 2282 OperandMatchResultTy 2283 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2284 return parseRegOrImmWithIntInputMods(Operands, false); 2285 } 2286 2287 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2288 auto Loc = getLoc(); 2289 if (trySkipId("off")) { 2290 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2291 AMDGPUOperand::ImmTyOff, false)); 2292 return MatchOperand_Success; 2293 } 2294 2295 if (!isRegister()) 2296 return MatchOperand_NoMatch; 2297 2298 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2299 if (Reg) { 2300 Operands.push_back(std::move(Reg)); 2301 return MatchOperand_Success; 2302 } 2303 2304 return MatchOperand_ParseFail; 2305 2306 } 2307 2308 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2309 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2310 2311 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2312 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2313 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2314 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2315 return Match_InvalidOperand; 2316 2317 if ((TSFlags & SIInstrFlags::VOP3) && 2318 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2319 getForcedEncodingSize() != 64) 2320 return Match_PreferE32; 2321 2322 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2323 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2324 // v_mac_f32/16 allow only dst_sel == DWORD; 2325 auto OpNum = 2326 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2327 const auto &Op = Inst.getOperand(OpNum); 2328 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2329 return Match_InvalidOperand; 2330 } 2331 } 2332 2333 if (TSFlags & SIInstrFlags::FLAT) { 2334 // FIXME: Produces error without correct column reported. 2335 auto Opcode = Inst.getOpcode(); 2336 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2337 2338 const auto &Op = Inst.getOperand(OpNum); 2339 if (!hasFlatOffsets() && Op.getImm() != 0) 2340 return Match_InvalidOperand; 2341 2342 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2343 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2344 if (isGFX10()) { 2345 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2346 if (!isInt<12>(Op.getImm())) 2347 return Match_InvalidOperand; 2348 } else { 2349 if (!isUInt<11>(Op.getImm())) 2350 return Match_InvalidOperand; 2351 } 2352 } 2353 } 2354 2355 return Match_Success; 2356 } 2357 2358 // What asm variants we should check 2359 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2360 if (getForcedEncodingSize() == 32) { 2361 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2362 return makeArrayRef(Variants); 2363 } 2364 2365 if (isForcedVOP3()) { 2366 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2367 return makeArrayRef(Variants); 2368 } 2369 2370 if (isForcedSDWA()) { 2371 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2372 AMDGPUAsmVariants::SDWA9}; 2373 return makeArrayRef(Variants); 2374 } 2375 2376 if (isForcedDPP()) { 2377 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2378 return makeArrayRef(Variants); 2379 } 2380 2381 static const unsigned Variants[] = { 2382 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2383 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2384 }; 2385 2386 return makeArrayRef(Variants); 2387 } 2388 2389 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2390 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2391 const unsigned Num = Desc.getNumImplicitUses(); 2392 for (unsigned i = 0; i < Num; ++i) { 2393 unsigned Reg = Desc.ImplicitUses[i]; 2394 switch (Reg) { 2395 case AMDGPU::FLAT_SCR: 2396 case AMDGPU::VCC: 2397 case AMDGPU::VCC_LO: 2398 case AMDGPU::VCC_HI: 2399 case AMDGPU::M0: 2400 case AMDGPU::SGPR_NULL: 2401 return Reg; 2402 default: 2403 break; 2404 } 2405 } 2406 return AMDGPU::NoRegister; 2407 } 2408 2409 // NB: This code is correct only when used to check constant 2410 // bus limitations because GFX7 support no f16 inline constants. 2411 // Note that there are no cases when a GFX7 opcode violates 2412 // constant bus limitations due to the use of an f16 constant. 2413 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2414 unsigned OpIdx) const { 2415 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2416 2417 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2418 return false; 2419 } 2420 2421 const MCOperand &MO = Inst.getOperand(OpIdx); 2422 2423 int64_t Val = MO.getImm(); 2424 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2425 2426 switch (OpSize) { // expected operand size 2427 case 8: 2428 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2429 case 4: 2430 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2431 case 2: { 2432 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2433 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2434 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2435 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2436 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2437 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2438 } else { 2439 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2440 } 2441 } 2442 default: 2443 llvm_unreachable("invalid operand size"); 2444 } 2445 } 2446 2447 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2448 const MCOperand &MO = Inst.getOperand(OpIdx); 2449 if (MO.isImm()) { 2450 return !isInlineConstant(Inst, OpIdx); 2451 } 2452 return !MO.isReg() || 2453 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2454 } 2455 2456 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2457 const unsigned Opcode = Inst.getOpcode(); 2458 const MCInstrDesc &Desc = MII.get(Opcode); 2459 unsigned ConstantBusUseCount = 0; 2460 unsigned NumLiterals = 0; 2461 unsigned LiteralSize; 2462 2463 if (Desc.TSFlags & 2464 (SIInstrFlags::VOPC | 2465 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2466 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2467 SIInstrFlags::SDWA)) { 2468 // Check special imm operands (used by madmk, etc) 2469 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2470 ++ConstantBusUseCount; 2471 } 2472 2473 SmallDenseSet<unsigned> SGPRsUsed; 2474 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2475 if (SGPRUsed != AMDGPU::NoRegister) { 2476 SGPRsUsed.insert(SGPRUsed); 2477 ++ConstantBusUseCount; 2478 } 2479 2480 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2481 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2482 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2483 2484 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2485 2486 for (int OpIdx : OpIndices) { 2487 if (OpIdx == -1) break; 2488 2489 const MCOperand &MO = Inst.getOperand(OpIdx); 2490 if (usesConstantBus(Inst, OpIdx)) { 2491 if (MO.isReg()) { 2492 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2493 // Pairs of registers with a partial intersections like these 2494 // s0, s[0:1] 2495 // flat_scratch_lo, flat_scratch 2496 // flat_scratch_lo, flat_scratch_hi 2497 // are theoretically valid but they are disabled anyway. 2498 // Note that this code mimics SIInstrInfo::verifyInstruction 2499 if (!SGPRsUsed.count(Reg)) { 2500 SGPRsUsed.insert(Reg); 2501 ++ConstantBusUseCount; 2502 } 2503 SGPRUsed = Reg; 2504 } else { // Expression or a literal 2505 2506 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2507 continue; // special operand like VINTERP attr_chan 2508 2509 // An instruction may use only one literal. 2510 // This has been validated on the previous step. 2511 // See validateVOP3Literal. 2512 // This literal may be used as more than one operand. 2513 // If all these operands are of the same size, 2514 // this literal counts as one scalar value. 2515 // Otherwise it counts as 2 scalar values. 2516 // See "GFX10 Shader Programming", section 3.6.2.3. 2517 2518 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2519 if (Size < 4) Size = 4; 2520 2521 if (NumLiterals == 0) { 2522 NumLiterals = 1; 2523 LiteralSize = Size; 2524 } else if (LiteralSize != Size) { 2525 NumLiterals = 2; 2526 } 2527 } 2528 } 2529 } 2530 } 2531 ConstantBusUseCount += NumLiterals; 2532 2533 if (isGFX10()) 2534 return ConstantBusUseCount <= 2; 2535 2536 return ConstantBusUseCount <= 1; 2537 } 2538 2539 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2540 const unsigned Opcode = Inst.getOpcode(); 2541 const MCInstrDesc &Desc = MII.get(Opcode); 2542 2543 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2544 if (DstIdx == -1 || 2545 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2546 return true; 2547 } 2548 2549 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2550 2551 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2552 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2553 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2554 2555 assert(DstIdx != -1); 2556 const MCOperand &Dst = Inst.getOperand(DstIdx); 2557 assert(Dst.isReg()); 2558 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2559 2560 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2561 2562 for (int SrcIdx : SrcIndices) { 2563 if (SrcIdx == -1) break; 2564 const MCOperand &Src = Inst.getOperand(SrcIdx); 2565 if (Src.isReg()) { 2566 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2567 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2568 return false; 2569 } 2570 } 2571 } 2572 2573 return true; 2574 } 2575 2576 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2577 2578 const unsigned Opc = Inst.getOpcode(); 2579 const MCInstrDesc &Desc = MII.get(Opc); 2580 2581 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2582 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2583 assert(ClampIdx != -1); 2584 return Inst.getOperand(ClampIdx).getImm() == 0; 2585 } 2586 2587 return true; 2588 } 2589 2590 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2591 2592 const unsigned Opc = Inst.getOpcode(); 2593 const MCInstrDesc &Desc = MII.get(Opc); 2594 2595 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2596 return true; 2597 2598 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2599 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2600 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2601 2602 assert(VDataIdx != -1); 2603 assert(DMaskIdx != -1); 2604 assert(TFEIdx != -1); 2605 2606 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2607 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2608 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2609 if (DMask == 0) 2610 DMask = 1; 2611 2612 unsigned DataSize = 2613 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2614 if (hasPackedD16()) { 2615 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2616 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2617 DataSize = (DataSize + 1) / 2; 2618 } 2619 2620 return (VDataSize / 4) == DataSize + TFESize; 2621 } 2622 2623 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2624 const unsigned Opc = Inst.getOpcode(); 2625 const MCInstrDesc &Desc = MII.get(Opc); 2626 2627 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2628 return true; 2629 2630 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2631 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2632 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2633 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2634 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2635 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2636 2637 assert(VAddr0Idx != -1); 2638 assert(SrsrcIdx != -1); 2639 assert(DimIdx != -1); 2640 assert(SrsrcIdx > VAddr0Idx); 2641 2642 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2643 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2644 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2645 unsigned VAddrSize = 2646 IsNSA ? SrsrcIdx - VAddr0Idx 2647 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2648 2649 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2650 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2651 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2652 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2653 if (!IsNSA) { 2654 if (AddrSize > 8) 2655 AddrSize = 16; 2656 else if (AddrSize > 4) 2657 AddrSize = 8; 2658 } 2659 2660 return VAddrSize == AddrSize; 2661 } 2662 2663 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2664 2665 const unsigned Opc = Inst.getOpcode(); 2666 const MCInstrDesc &Desc = MII.get(Opc); 2667 2668 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2669 return true; 2670 if (!Desc.mayLoad() || !Desc.mayStore()) 2671 return true; // Not atomic 2672 2673 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2674 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2675 2676 // This is an incomplete check because image_atomic_cmpswap 2677 // may only use 0x3 and 0xf while other atomic operations 2678 // may use 0x1 and 0x3. However these limitations are 2679 // verified when we check that dmask matches dst size. 2680 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2681 } 2682 2683 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2684 2685 const unsigned Opc = Inst.getOpcode(); 2686 const MCInstrDesc &Desc = MII.get(Opc); 2687 2688 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2689 return true; 2690 2691 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2692 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2693 2694 // GATHER4 instructions use dmask in a different fashion compared to 2695 // other MIMG instructions. The only useful DMASK values are 2696 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2697 // (red,red,red,red) etc.) The ISA document doesn't mention 2698 // this. 2699 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2700 } 2701 2702 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2703 2704 const unsigned Opc = Inst.getOpcode(); 2705 const MCInstrDesc &Desc = MII.get(Opc); 2706 2707 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2708 return true; 2709 2710 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2711 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2712 if (isCI() || isSI()) 2713 return false; 2714 } 2715 2716 return true; 2717 } 2718 2719 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2720 const unsigned Opc = Inst.getOpcode(); 2721 const MCInstrDesc &Desc = MII.get(Opc); 2722 2723 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2724 return true; 2725 2726 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2727 if (DimIdx < 0) 2728 return true; 2729 2730 long Imm = Inst.getOperand(DimIdx).getImm(); 2731 if (Imm < 0 || Imm >= 8) 2732 return false; 2733 2734 return true; 2735 } 2736 2737 static bool IsRevOpcode(const unsigned Opcode) 2738 { 2739 switch (Opcode) { 2740 case AMDGPU::V_SUBREV_F32_e32: 2741 case AMDGPU::V_SUBREV_F32_e64: 2742 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2743 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2744 case AMDGPU::V_SUBREV_F32_e32_vi: 2745 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2746 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2747 case AMDGPU::V_SUBREV_F32_e64_vi: 2748 2749 case AMDGPU::V_SUBREV_I32_e32: 2750 case AMDGPU::V_SUBREV_I32_e64: 2751 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2752 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2753 2754 case AMDGPU::V_SUBBREV_U32_e32: 2755 case AMDGPU::V_SUBBREV_U32_e64: 2756 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2757 case AMDGPU::V_SUBBREV_U32_e32_vi: 2758 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2759 case AMDGPU::V_SUBBREV_U32_e64_vi: 2760 2761 case AMDGPU::V_SUBREV_U32_e32: 2762 case AMDGPU::V_SUBREV_U32_e64: 2763 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2764 case AMDGPU::V_SUBREV_U32_e32_vi: 2765 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2766 case AMDGPU::V_SUBREV_U32_e64_vi: 2767 2768 case AMDGPU::V_SUBREV_F16_e32: 2769 case AMDGPU::V_SUBREV_F16_e64: 2770 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2771 case AMDGPU::V_SUBREV_F16_e32_vi: 2772 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2773 case AMDGPU::V_SUBREV_F16_e64_vi: 2774 2775 case AMDGPU::V_SUBREV_U16_e32: 2776 case AMDGPU::V_SUBREV_U16_e64: 2777 case AMDGPU::V_SUBREV_U16_e32_vi: 2778 case AMDGPU::V_SUBREV_U16_e64_vi: 2779 2780 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2781 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2782 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2783 2784 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2785 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2786 2787 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2788 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2789 2790 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2791 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2792 2793 case AMDGPU::V_LSHRREV_B32_e32: 2794 case AMDGPU::V_LSHRREV_B32_e64: 2795 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2796 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2797 case AMDGPU::V_LSHRREV_B32_e32_vi: 2798 case AMDGPU::V_LSHRREV_B32_e64_vi: 2799 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2800 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2801 2802 case AMDGPU::V_ASHRREV_I32_e32: 2803 case AMDGPU::V_ASHRREV_I32_e64: 2804 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2805 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2806 case AMDGPU::V_ASHRREV_I32_e32_vi: 2807 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2808 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2809 case AMDGPU::V_ASHRREV_I32_e64_vi: 2810 2811 case AMDGPU::V_LSHLREV_B32_e32: 2812 case AMDGPU::V_LSHLREV_B32_e64: 2813 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2814 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2815 case AMDGPU::V_LSHLREV_B32_e32_vi: 2816 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2817 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2818 case AMDGPU::V_LSHLREV_B32_e64_vi: 2819 2820 case AMDGPU::V_LSHLREV_B16_e32: 2821 case AMDGPU::V_LSHLREV_B16_e64: 2822 case AMDGPU::V_LSHLREV_B16_e32_vi: 2823 case AMDGPU::V_LSHLREV_B16_e64_vi: 2824 case AMDGPU::V_LSHLREV_B16_gfx10: 2825 2826 case AMDGPU::V_LSHRREV_B16_e32: 2827 case AMDGPU::V_LSHRREV_B16_e64: 2828 case AMDGPU::V_LSHRREV_B16_e32_vi: 2829 case AMDGPU::V_LSHRREV_B16_e64_vi: 2830 case AMDGPU::V_LSHRREV_B16_gfx10: 2831 2832 case AMDGPU::V_ASHRREV_I16_e32: 2833 case AMDGPU::V_ASHRREV_I16_e64: 2834 case AMDGPU::V_ASHRREV_I16_e32_vi: 2835 case AMDGPU::V_ASHRREV_I16_e64_vi: 2836 case AMDGPU::V_ASHRREV_I16_gfx10: 2837 2838 case AMDGPU::V_LSHLREV_B64: 2839 case AMDGPU::V_LSHLREV_B64_gfx10: 2840 case AMDGPU::V_LSHLREV_B64_vi: 2841 2842 case AMDGPU::V_LSHRREV_B64: 2843 case AMDGPU::V_LSHRREV_B64_gfx10: 2844 case AMDGPU::V_LSHRREV_B64_vi: 2845 2846 case AMDGPU::V_ASHRREV_I64: 2847 case AMDGPU::V_ASHRREV_I64_gfx10: 2848 case AMDGPU::V_ASHRREV_I64_vi: 2849 2850 case AMDGPU::V_PK_LSHLREV_B16: 2851 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2852 case AMDGPU::V_PK_LSHLREV_B16_vi: 2853 2854 case AMDGPU::V_PK_LSHRREV_B16: 2855 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2856 case AMDGPU::V_PK_LSHRREV_B16_vi: 2857 case AMDGPU::V_PK_ASHRREV_I16: 2858 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2859 case AMDGPU::V_PK_ASHRREV_I16_vi: 2860 return true; 2861 default: 2862 return false; 2863 } 2864 } 2865 2866 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2867 2868 using namespace SIInstrFlags; 2869 const unsigned Opcode = Inst.getOpcode(); 2870 const MCInstrDesc &Desc = MII.get(Opcode); 2871 2872 // lds_direct register is defined so that it can be used 2873 // with 9-bit operands only. Ignore encodings which do not accept these. 2874 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2875 return true; 2876 2877 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2878 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2879 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2880 2881 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2882 2883 // lds_direct cannot be specified as either src1 or src2. 2884 for (int SrcIdx : SrcIndices) { 2885 if (SrcIdx == -1) break; 2886 const MCOperand &Src = Inst.getOperand(SrcIdx); 2887 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2888 return false; 2889 } 2890 } 2891 2892 if (Src0Idx == -1) 2893 return true; 2894 2895 const MCOperand &Src = Inst.getOperand(Src0Idx); 2896 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2897 return true; 2898 2899 // lds_direct is specified as src0. Check additional limitations. 2900 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2901 } 2902 2903 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2904 unsigned Opcode = Inst.getOpcode(); 2905 const MCInstrDesc &Desc = MII.get(Opcode); 2906 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2907 return true; 2908 2909 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2910 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2911 2912 const int OpIndices[] = { Src0Idx, Src1Idx }; 2913 2914 unsigned NumLiterals = 0; 2915 uint32_t LiteralValue; 2916 2917 for (int OpIdx : OpIndices) { 2918 if (OpIdx == -1) break; 2919 2920 const MCOperand &MO = Inst.getOperand(OpIdx); 2921 if (MO.isImm() && 2922 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2923 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2924 !isInlineConstant(Inst, OpIdx)) { 2925 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2926 if (NumLiterals == 0 || LiteralValue != Value) { 2927 LiteralValue = Value; 2928 ++NumLiterals; 2929 } 2930 } 2931 } 2932 2933 return NumLiterals <= 1; 2934 } 2935 2936 // VOP3 literal is only allowed in GFX10+ and only one can be used 2937 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 2938 unsigned Opcode = Inst.getOpcode(); 2939 const MCInstrDesc &Desc = MII.get(Opcode); 2940 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 2941 return true; 2942 2943 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2944 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2945 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2946 2947 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2948 2949 unsigned NumLiterals = 0; 2950 uint32_t LiteralValue; 2951 2952 for (int OpIdx : OpIndices) { 2953 if (OpIdx == -1) break; 2954 2955 const MCOperand &MO = Inst.getOperand(OpIdx); 2956 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 2957 continue; 2958 2959 if (!isInlineConstant(Inst, OpIdx)) { 2960 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2961 if (NumLiterals == 0 || LiteralValue != Value) { 2962 LiteralValue = Value; 2963 ++NumLiterals; 2964 } 2965 } 2966 } 2967 2968 return !NumLiterals || 2969 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 2970 } 2971 2972 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2973 const SMLoc &IDLoc) { 2974 if (!validateLdsDirect(Inst)) { 2975 Error(IDLoc, 2976 "invalid use of lds_direct"); 2977 return false; 2978 } 2979 if (!validateSOPLiteral(Inst)) { 2980 Error(IDLoc, 2981 "only one literal operand is allowed"); 2982 return false; 2983 } 2984 if (!validateVOP3Literal(Inst)) { 2985 Error(IDLoc, 2986 "invalid literal operand"); 2987 return false; 2988 } 2989 if (!validateConstantBusLimitations(Inst)) { 2990 Error(IDLoc, 2991 "invalid operand (violates constant bus restrictions)"); 2992 return false; 2993 } 2994 if (!validateEarlyClobberLimitations(Inst)) { 2995 Error(IDLoc, 2996 "destination must be different than all sources"); 2997 return false; 2998 } 2999 if (!validateIntClampSupported(Inst)) { 3000 Error(IDLoc, 3001 "integer clamping is not supported on this GPU"); 3002 return false; 3003 } 3004 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3005 if (!validateMIMGD16(Inst)) { 3006 Error(IDLoc, 3007 "d16 modifier is not supported on this GPU"); 3008 return false; 3009 } 3010 if (!validateMIMGDim(Inst)) { 3011 Error(IDLoc, "dim modifier is required on this GPU"); 3012 return false; 3013 } 3014 if (!validateMIMGDataSize(Inst)) { 3015 Error(IDLoc, 3016 "image data size does not match dmask and tfe"); 3017 return false; 3018 } 3019 if (!validateMIMGAddrSize(Inst)) { 3020 Error(IDLoc, 3021 "image address size does not match dim and a16"); 3022 return false; 3023 } 3024 if (!validateMIMGAtomicDMask(Inst)) { 3025 Error(IDLoc, 3026 "invalid atomic image dmask"); 3027 return false; 3028 } 3029 if (!validateMIMGGatherDMask(Inst)) { 3030 Error(IDLoc, 3031 "invalid image_gather dmask: only one bit must be set"); 3032 return false; 3033 } 3034 3035 return true; 3036 } 3037 3038 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3039 const FeatureBitset &FBS, 3040 unsigned VariantID = 0); 3041 3042 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3043 OperandVector &Operands, 3044 MCStreamer &Out, 3045 uint64_t &ErrorInfo, 3046 bool MatchingInlineAsm) { 3047 MCInst Inst; 3048 unsigned Result = Match_Success; 3049 for (auto Variant : getMatchedVariants()) { 3050 uint64_t EI; 3051 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3052 Variant); 3053 // We order match statuses from least to most specific. We use most specific 3054 // status as resulting 3055 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3056 if ((R == Match_Success) || 3057 (R == Match_PreferE32) || 3058 (R == Match_MissingFeature && Result != Match_PreferE32) || 3059 (R == Match_InvalidOperand && Result != Match_MissingFeature 3060 && Result != Match_PreferE32) || 3061 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3062 && Result != Match_MissingFeature 3063 && Result != Match_PreferE32)) { 3064 Result = R; 3065 ErrorInfo = EI; 3066 } 3067 if (R == Match_Success) 3068 break; 3069 } 3070 3071 switch (Result) { 3072 default: break; 3073 case Match_Success: 3074 if (!validateInstruction(Inst, IDLoc)) { 3075 return true; 3076 } 3077 Inst.setLoc(IDLoc); 3078 Out.EmitInstruction(Inst, getSTI()); 3079 return false; 3080 3081 case Match_MissingFeature: 3082 return Error(IDLoc, "instruction not supported on this GPU"); 3083 3084 case Match_MnemonicFail: { 3085 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3086 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3087 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3088 return Error(IDLoc, "invalid instruction" + Suggestion, 3089 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3090 } 3091 3092 case Match_InvalidOperand: { 3093 SMLoc ErrorLoc = IDLoc; 3094 if (ErrorInfo != ~0ULL) { 3095 if (ErrorInfo >= Operands.size()) { 3096 return Error(IDLoc, "too few operands for instruction"); 3097 } 3098 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3099 if (ErrorLoc == SMLoc()) 3100 ErrorLoc = IDLoc; 3101 } 3102 return Error(ErrorLoc, "invalid operand for instruction"); 3103 } 3104 3105 case Match_PreferE32: 3106 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3107 "should be encoded as e32"); 3108 } 3109 llvm_unreachable("Implement any new match types added!"); 3110 } 3111 3112 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3113 int64_t Tmp = -1; 3114 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3115 return true; 3116 } 3117 if (getParser().parseAbsoluteExpression(Tmp)) { 3118 return true; 3119 } 3120 Ret = static_cast<uint32_t>(Tmp); 3121 return false; 3122 } 3123 3124 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3125 uint32_t &Minor) { 3126 if (ParseAsAbsoluteExpression(Major)) 3127 return TokError("invalid major version"); 3128 3129 if (getLexer().isNot(AsmToken::Comma)) 3130 return TokError("minor version number required, comma expected"); 3131 Lex(); 3132 3133 if (ParseAsAbsoluteExpression(Minor)) 3134 return TokError("invalid minor version"); 3135 3136 return false; 3137 } 3138 3139 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3140 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3141 return TokError("directive only supported for amdgcn architecture"); 3142 3143 std::string Target; 3144 3145 SMLoc TargetStart = getTok().getLoc(); 3146 if (getParser().parseEscapedString(Target)) 3147 return true; 3148 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3149 3150 std::string ExpectedTarget; 3151 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3152 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3153 3154 if (Target != ExpectedTargetOS.str()) 3155 return getParser().Error(TargetRange.Start, "target must match options", 3156 TargetRange); 3157 3158 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3159 return false; 3160 } 3161 3162 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3163 return getParser().Error(Range.Start, "value out of range", Range); 3164 } 3165 3166 bool AMDGPUAsmParser::calculateGPRBlocks( 3167 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3168 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 3169 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 3170 unsigned &SGPRBlocks) { 3171 // TODO(scott.linder): These calculations are duplicated from 3172 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3173 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3174 3175 unsigned NumVGPRs = NextFreeVGPR; 3176 unsigned NumSGPRs = NextFreeSGPR; 3177 3178 if (Version.Major >= 10) 3179 NumSGPRs = 0; 3180 else { 3181 unsigned MaxAddressableNumSGPRs = 3182 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3183 3184 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3185 NumSGPRs > MaxAddressableNumSGPRs) 3186 return OutOfRangeError(SGPRRange); 3187 3188 NumSGPRs += 3189 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3190 3191 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3192 NumSGPRs > MaxAddressableNumSGPRs) 3193 return OutOfRangeError(SGPRRange); 3194 3195 if (Features.test(FeatureSGPRInitBug)) 3196 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3197 } 3198 3199 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 3200 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3201 3202 return false; 3203 } 3204 3205 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3206 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3207 return TokError("directive only supported for amdgcn architecture"); 3208 3209 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3210 return TokError("directive only supported for amdhsa OS"); 3211 3212 StringRef KernelName; 3213 if (getParser().parseIdentifier(KernelName)) 3214 return true; 3215 3216 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3217 3218 StringSet<> Seen; 3219 3220 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3221 3222 SMRange VGPRRange; 3223 uint64_t NextFreeVGPR = 0; 3224 SMRange SGPRRange; 3225 uint64_t NextFreeSGPR = 0; 3226 unsigned UserSGPRCount = 0; 3227 bool ReserveVCC = true; 3228 bool ReserveFlatScr = true; 3229 bool ReserveXNACK = hasXNACK(); 3230 3231 while (true) { 3232 while (getLexer().is(AsmToken::EndOfStatement)) 3233 Lex(); 3234 3235 if (getLexer().isNot(AsmToken::Identifier)) 3236 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3237 3238 StringRef ID = getTok().getIdentifier(); 3239 SMRange IDRange = getTok().getLocRange(); 3240 Lex(); 3241 3242 if (ID == ".end_amdhsa_kernel") 3243 break; 3244 3245 if (Seen.find(ID) != Seen.end()) 3246 return TokError(".amdhsa_ directives cannot be repeated"); 3247 Seen.insert(ID); 3248 3249 SMLoc ValStart = getTok().getLoc(); 3250 int64_t IVal; 3251 if (getParser().parseAbsoluteExpression(IVal)) 3252 return true; 3253 SMLoc ValEnd = getTok().getLoc(); 3254 SMRange ValRange = SMRange(ValStart, ValEnd); 3255 3256 if (IVal < 0) 3257 return OutOfRangeError(ValRange); 3258 3259 uint64_t Val = IVal; 3260 3261 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3262 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3263 return OutOfRangeError(RANGE); \ 3264 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3265 3266 if (ID == ".amdhsa_group_segment_fixed_size") { 3267 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3268 return OutOfRangeError(ValRange); 3269 KD.group_segment_fixed_size = Val; 3270 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3271 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3272 return OutOfRangeError(ValRange); 3273 KD.private_segment_fixed_size = Val; 3274 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3275 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3276 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3277 Val, ValRange); 3278 UserSGPRCount += 4; 3279 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3280 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3281 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3282 ValRange); 3283 UserSGPRCount += 2; 3284 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3285 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3286 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3287 ValRange); 3288 UserSGPRCount += 2; 3289 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3290 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3291 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3292 Val, ValRange); 3293 UserSGPRCount += 2; 3294 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3295 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3296 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3297 ValRange); 3298 UserSGPRCount += 2; 3299 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3300 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3301 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3302 ValRange); 3303 UserSGPRCount += 2; 3304 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3305 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3306 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3307 Val, ValRange); 3308 UserSGPRCount += 1; 3309 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3310 PARSE_BITS_ENTRY( 3311 KD.compute_pgm_rsrc2, 3312 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3313 ValRange); 3314 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3315 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3316 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3317 ValRange); 3318 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3319 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3320 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3321 ValRange); 3322 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3323 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3324 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3325 ValRange); 3326 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3328 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3329 ValRange); 3330 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3331 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3332 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3333 ValRange); 3334 } else if (ID == ".amdhsa_next_free_vgpr") { 3335 VGPRRange = ValRange; 3336 NextFreeVGPR = Val; 3337 } else if (ID == ".amdhsa_next_free_sgpr") { 3338 SGPRRange = ValRange; 3339 NextFreeSGPR = Val; 3340 } else if (ID == ".amdhsa_reserve_vcc") { 3341 if (!isUInt<1>(Val)) 3342 return OutOfRangeError(ValRange); 3343 ReserveVCC = Val; 3344 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3345 if (IVersion.Major < 7) 3346 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3347 IDRange); 3348 if (!isUInt<1>(Val)) 3349 return OutOfRangeError(ValRange); 3350 ReserveFlatScr = Val; 3351 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3352 if (IVersion.Major < 8) 3353 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3354 IDRange); 3355 if (!isUInt<1>(Val)) 3356 return OutOfRangeError(ValRange); 3357 ReserveXNACK = Val; 3358 } else if (ID == ".amdhsa_float_round_mode_32") { 3359 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3360 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3361 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3362 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3363 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3364 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3365 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3366 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3367 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3368 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3369 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3370 ValRange); 3371 } else if (ID == ".amdhsa_dx10_clamp") { 3372 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3373 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3374 } else if (ID == ".amdhsa_ieee_mode") { 3375 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3376 Val, ValRange); 3377 } else if (ID == ".amdhsa_fp16_overflow") { 3378 if (IVersion.Major < 9) 3379 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3380 IDRange); 3381 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3382 ValRange); 3383 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3384 if (IVersion.Major < 10) 3385 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3386 IDRange); 3387 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3388 ValRange); 3389 } else if (ID == ".amdhsa_memory_ordered") { 3390 if (IVersion.Major < 10) 3391 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3392 IDRange); 3393 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3394 ValRange); 3395 } else if (ID == ".amdhsa_forward_progress") { 3396 if (IVersion.Major < 10) 3397 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3398 IDRange); 3399 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3400 ValRange); 3401 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3402 PARSE_BITS_ENTRY( 3403 KD.compute_pgm_rsrc2, 3404 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3405 ValRange); 3406 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3407 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3408 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3409 Val, ValRange); 3410 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3411 PARSE_BITS_ENTRY( 3412 KD.compute_pgm_rsrc2, 3413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3414 ValRange); 3415 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3416 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3417 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3418 Val, ValRange); 3419 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3420 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3421 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3422 Val, ValRange); 3423 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3424 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3425 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3426 Val, ValRange); 3427 } else if (ID == ".amdhsa_exception_int_div_zero") { 3428 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3429 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3430 Val, ValRange); 3431 } else { 3432 return getParser().Error(IDRange.Start, 3433 "unknown .amdhsa_kernel directive", IDRange); 3434 } 3435 3436 #undef PARSE_BITS_ENTRY 3437 } 3438 3439 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3440 return TokError(".amdhsa_next_free_vgpr directive is required"); 3441 3442 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3443 return TokError(".amdhsa_next_free_sgpr directive is required"); 3444 3445 unsigned VGPRBlocks; 3446 unsigned SGPRBlocks; 3447 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3448 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3449 SGPRRange, VGPRBlocks, SGPRBlocks)) 3450 return true; 3451 3452 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3453 VGPRBlocks)) 3454 return OutOfRangeError(VGPRRange); 3455 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3456 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3457 3458 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3459 SGPRBlocks)) 3460 return OutOfRangeError(SGPRRange); 3461 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3462 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3463 SGPRBlocks); 3464 3465 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3466 return TokError("too many user SGPRs enabled"); 3467 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3468 UserSGPRCount); 3469 3470 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3471 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3472 ReserveFlatScr, ReserveXNACK); 3473 return false; 3474 } 3475 3476 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3477 uint32_t Major; 3478 uint32_t Minor; 3479 3480 if (ParseDirectiveMajorMinor(Major, Minor)) 3481 return true; 3482 3483 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3484 return false; 3485 } 3486 3487 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3488 uint32_t Major; 3489 uint32_t Minor; 3490 uint32_t Stepping; 3491 StringRef VendorName; 3492 StringRef ArchName; 3493 3494 // If this directive has no arguments, then use the ISA version for the 3495 // targeted GPU. 3496 if (getLexer().is(AsmToken::EndOfStatement)) { 3497 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3498 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3499 ISA.Stepping, 3500 "AMD", "AMDGPU"); 3501 return false; 3502 } 3503 3504 if (ParseDirectiveMajorMinor(Major, Minor)) 3505 return true; 3506 3507 if (getLexer().isNot(AsmToken::Comma)) 3508 return TokError("stepping version number required, comma expected"); 3509 Lex(); 3510 3511 if (ParseAsAbsoluteExpression(Stepping)) 3512 return TokError("invalid stepping version"); 3513 3514 if (getLexer().isNot(AsmToken::Comma)) 3515 return TokError("vendor name required, comma expected"); 3516 Lex(); 3517 3518 if (getLexer().isNot(AsmToken::String)) 3519 return TokError("invalid vendor name"); 3520 3521 VendorName = getLexer().getTok().getStringContents(); 3522 Lex(); 3523 3524 if (getLexer().isNot(AsmToken::Comma)) 3525 return TokError("arch name required, comma expected"); 3526 Lex(); 3527 3528 if (getLexer().isNot(AsmToken::String)) 3529 return TokError("invalid arch name"); 3530 3531 ArchName = getLexer().getTok().getStringContents(); 3532 Lex(); 3533 3534 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3535 VendorName, ArchName); 3536 return false; 3537 } 3538 3539 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3540 amd_kernel_code_t &Header) { 3541 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3542 // assembly for backwards compatibility. 3543 if (ID == "max_scratch_backing_memory_byte_size") { 3544 Parser.eatToEndOfStatement(); 3545 return false; 3546 } 3547 3548 SmallString<40> ErrStr; 3549 raw_svector_ostream Err(ErrStr); 3550 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3551 return TokError(Err.str()); 3552 } 3553 Lex(); 3554 3555 if (ID == "enable_wgp_mode") { 3556 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3557 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3558 } 3559 3560 if (ID == "enable_mem_ordered") { 3561 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3562 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3563 } 3564 3565 if (ID == "enable_fwd_progress") { 3566 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3567 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3568 } 3569 3570 return false; 3571 } 3572 3573 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3574 amd_kernel_code_t Header; 3575 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3576 3577 while (true) { 3578 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3579 // will set the current token to EndOfStatement. 3580 while(getLexer().is(AsmToken::EndOfStatement)) 3581 Lex(); 3582 3583 if (getLexer().isNot(AsmToken::Identifier)) 3584 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3585 3586 StringRef ID = getLexer().getTok().getIdentifier(); 3587 Lex(); 3588 3589 if (ID == ".end_amd_kernel_code_t") 3590 break; 3591 3592 if (ParseAMDKernelCodeTValue(ID, Header)) 3593 return true; 3594 } 3595 3596 getTargetStreamer().EmitAMDKernelCodeT(Header); 3597 3598 return false; 3599 } 3600 3601 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3602 if (getLexer().isNot(AsmToken::Identifier)) 3603 return TokError("expected symbol name"); 3604 3605 StringRef KernelName = Parser.getTok().getString(); 3606 3607 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3608 ELF::STT_AMDGPU_HSA_KERNEL); 3609 Lex(); 3610 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3611 KernelScope.initialize(getContext()); 3612 return false; 3613 } 3614 3615 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3616 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3617 return Error(getParser().getTok().getLoc(), 3618 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3619 "architectures"); 3620 } 3621 3622 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3623 3624 std::string ISAVersionStringFromSTI; 3625 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3626 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3627 3628 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3629 return Error(getParser().getTok().getLoc(), 3630 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3631 "arguments specified through the command line"); 3632 } 3633 3634 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3635 Lex(); 3636 3637 return false; 3638 } 3639 3640 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3641 const char *AssemblerDirectiveBegin; 3642 const char *AssemblerDirectiveEnd; 3643 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3644 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3645 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3646 HSAMD::V3::AssemblerDirectiveEnd) 3647 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3648 HSAMD::AssemblerDirectiveEnd); 3649 3650 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3651 return Error(getParser().getTok().getLoc(), 3652 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3653 "not available on non-amdhsa OSes")).str()); 3654 } 3655 3656 std::string HSAMetadataString; 3657 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3658 HSAMetadataString)) 3659 return true; 3660 3661 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3662 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3663 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3664 } else { 3665 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3666 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3667 } 3668 3669 return false; 3670 } 3671 3672 /// Common code to parse out a block of text (typically YAML) between start and 3673 /// end directives. 3674 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3675 const char *AssemblerDirectiveEnd, 3676 std::string &CollectString) { 3677 3678 raw_string_ostream CollectStream(CollectString); 3679 3680 getLexer().setSkipSpace(false); 3681 3682 bool FoundEnd = false; 3683 while (!getLexer().is(AsmToken::Eof)) { 3684 while (getLexer().is(AsmToken::Space)) { 3685 CollectStream << getLexer().getTok().getString(); 3686 Lex(); 3687 } 3688 3689 if (getLexer().is(AsmToken::Identifier)) { 3690 StringRef ID = getLexer().getTok().getIdentifier(); 3691 if (ID == AssemblerDirectiveEnd) { 3692 Lex(); 3693 FoundEnd = true; 3694 break; 3695 } 3696 } 3697 3698 CollectStream << Parser.parseStringToEndOfStatement() 3699 << getContext().getAsmInfo()->getSeparatorString(); 3700 3701 Parser.eatToEndOfStatement(); 3702 } 3703 3704 getLexer().setSkipSpace(true); 3705 3706 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3707 return TokError(Twine("expected directive ") + 3708 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3709 } 3710 3711 CollectStream.flush(); 3712 return false; 3713 } 3714 3715 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3716 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3717 std::string String; 3718 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3719 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3720 return true; 3721 3722 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3723 if (!PALMetadata->setFromString(String)) 3724 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3725 return false; 3726 } 3727 3728 /// Parse the assembler directive for old linear-format PAL metadata. 3729 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3730 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3731 return Error(getParser().getTok().getLoc(), 3732 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3733 "not available on non-amdpal OSes")).str()); 3734 } 3735 3736 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3737 PALMetadata->setLegacy(); 3738 for (;;) { 3739 uint32_t Key, Value; 3740 if (ParseAsAbsoluteExpression(Key)) { 3741 return TokError(Twine("invalid value in ") + 3742 Twine(PALMD::AssemblerDirective)); 3743 } 3744 if (getLexer().isNot(AsmToken::Comma)) { 3745 return TokError(Twine("expected an even number of values in ") + 3746 Twine(PALMD::AssemblerDirective)); 3747 } 3748 Lex(); 3749 if (ParseAsAbsoluteExpression(Value)) { 3750 return TokError(Twine("invalid value in ") + 3751 Twine(PALMD::AssemblerDirective)); 3752 } 3753 PALMetadata->setRegister(Key, Value); 3754 if (getLexer().isNot(AsmToken::Comma)) 3755 break; 3756 Lex(); 3757 } 3758 return false; 3759 } 3760 3761 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3762 StringRef IDVal = DirectiveID.getString(); 3763 3764 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3765 if (IDVal == ".amdgcn_target") 3766 return ParseDirectiveAMDGCNTarget(); 3767 3768 if (IDVal == ".amdhsa_kernel") 3769 return ParseDirectiveAMDHSAKernel(); 3770 3771 // TODO: Restructure/combine with PAL metadata directive. 3772 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3773 return ParseDirectiveHSAMetadata(); 3774 } else { 3775 if (IDVal == ".hsa_code_object_version") 3776 return ParseDirectiveHSACodeObjectVersion(); 3777 3778 if (IDVal == ".hsa_code_object_isa") 3779 return ParseDirectiveHSACodeObjectISA(); 3780 3781 if (IDVal == ".amd_kernel_code_t") 3782 return ParseDirectiveAMDKernelCodeT(); 3783 3784 if (IDVal == ".amdgpu_hsa_kernel") 3785 return ParseDirectiveAMDGPUHsaKernel(); 3786 3787 if (IDVal == ".amd_amdgpu_isa") 3788 return ParseDirectiveISAVersion(); 3789 3790 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3791 return ParseDirectiveHSAMetadata(); 3792 } 3793 3794 if (IDVal == PALMD::AssemblerDirectiveBegin) 3795 return ParseDirectivePALMetadataBegin(); 3796 3797 if (IDVal == PALMD::AssemblerDirective) 3798 return ParseDirectivePALMetadata(); 3799 3800 return true; 3801 } 3802 3803 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3804 unsigned RegNo) const { 3805 3806 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3807 R.isValid(); ++R) { 3808 if (*R == RegNo) 3809 return isGFX9() || isGFX10(); 3810 } 3811 3812 // GFX10 has 2 more SGPRs 104 and 105. 3813 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 3814 R.isValid(); ++R) { 3815 if (*R == RegNo) 3816 return hasSGPR104_SGPR105(); 3817 } 3818 3819 switch (RegNo) { 3820 case AMDGPU::TBA: 3821 case AMDGPU::TBA_LO: 3822 case AMDGPU::TBA_HI: 3823 case AMDGPU::TMA: 3824 case AMDGPU::TMA_LO: 3825 case AMDGPU::TMA_HI: 3826 return !isGFX9() && !isGFX10(); 3827 case AMDGPU::XNACK_MASK: 3828 case AMDGPU::XNACK_MASK_LO: 3829 case AMDGPU::XNACK_MASK_HI: 3830 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 3831 case AMDGPU::SGPR_NULL: 3832 return isGFX10(); 3833 default: 3834 break; 3835 } 3836 3837 if (isInlineValue(RegNo)) 3838 return !isCI() && !isSI() && !isVI(); 3839 3840 if (isCI()) 3841 return true; 3842 3843 if (isSI() || isGFX10()) { 3844 // No flat_scr on SI. 3845 // On GFX10 flat scratch is not a valid register operand and can only be 3846 // accessed with s_setreg/s_getreg. 3847 switch (RegNo) { 3848 case AMDGPU::FLAT_SCR: 3849 case AMDGPU::FLAT_SCR_LO: 3850 case AMDGPU::FLAT_SCR_HI: 3851 return false; 3852 default: 3853 return true; 3854 } 3855 } 3856 3857 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3858 // SI/CI have. 3859 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3860 R.isValid(); ++R) { 3861 if (*R == RegNo) 3862 return hasSGPR102_SGPR103(); 3863 } 3864 3865 return true; 3866 } 3867 3868 OperandMatchResultTy 3869 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 3870 OperandMode Mode) { 3871 // Try to parse with a custom parser 3872 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3873 3874 // If we successfully parsed the operand or if there as an error parsing, 3875 // we are done. 3876 // 3877 // If we are parsing after we reach EndOfStatement then this means we 3878 // are appending default values to the Operands list. This is only done 3879 // by custom parser, so we shouldn't continue on to the generic parsing. 3880 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3881 getLexer().is(AsmToken::EndOfStatement)) 3882 return ResTy; 3883 3884 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 3885 unsigned Prefix = Operands.size(); 3886 SMLoc LBraceLoc = getTok().getLoc(); 3887 Parser.Lex(); // eat the '[' 3888 3889 for (;;) { 3890 ResTy = parseReg(Operands); 3891 if (ResTy != MatchOperand_Success) 3892 return ResTy; 3893 3894 if (getLexer().is(AsmToken::RBrac)) 3895 break; 3896 3897 if (getLexer().isNot(AsmToken::Comma)) 3898 return MatchOperand_ParseFail; 3899 Parser.Lex(); 3900 } 3901 3902 if (Operands.size() - Prefix > 1) { 3903 Operands.insert(Operands.begin() + Prefix, 3904 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 3905 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 3906 getTok().getLoc())); 3907 } 3908 3909 Parser.Lex(); // eat the ']' 3910 return MatchOperand_Success; 3911 } 3912 3913 ResTy = parseRegOrImm(Operands); 3914 3915 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3916 return ResTy; 3917 3918 const auto &Tok = Parser.getTok(); 3919 SMLoc S = Tok.getLoc(); 3920 3921 const MCExpr *Expr = nullptr; 3922 if (!Parser.parseExpression(Expr)) { 3923 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3924 return MatchOperand_Success; 3925 } 3926 3927 // Possibly this is an instruction flag like 'gds'. 3928 if (Tok.getKind() == AsmToken::Identifier) { 3929 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3930 Parser.Lex(); 3931 return MatchOperand_Success; 3932 } 3933 3934 return MatchOperand_NoMatch; 3935 } 3936 3937 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3938 // Clear any forced encodings from the previous instruction. 3939 setForcedEncodingSize(0); 3940 setForcedDPP(false); 3941 setForcedSDWA(false); 3942 3943 if (Name.endswith("_e64")) { 3944 setForcedEncodingSize(64); 3945 return Name.substr(0, Name.size() - 4); 3946 } else if (Name.endswith("_e32")) { 3947 setForcedEncodingSize(32); 3948 return Name.substr(0, Name.size() - 4); 3949 } else if (Name.endswith("_dpp")) { 3950 setForcedDPP(true); 3951 return Name.substr(0, Name.size() - 4); 3952 } else if (Name.endswith("_sdwa")) { 3953 setForcedSDWA(true); 3954 return Name.substr(0, Name.size() - 5); 3955 } 3956 return Name; 3957 } 3958 3959 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3960 StringRef Name, 3961 SMLoc NameLoc, OperandVector &Operands) { 3962 // Add the instruction mnemonic 3963 Name = parseMnemonicSuffix(Name); 3964 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3965 3966 bool IsMIMG = Name.startswith("image_"); 3967 3968 while (!getLexer().is(AsmToken::EndOfStatement)) { 3969 OperandMode Mode = OperandMode_Default; 3970 if (IsMIMG && isGFX10() && Operands.size() == 2) 3971 Mode = OperandMode_NSA; 3972 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 3973 3974 // Eat the comma or space if there is one. 3975 if (getLexer().is(AsmToken::Comma)) 3976 Parser.Lex(); 3977 3978 switch (Res) { 3979 case MatchOperand_Success: break; 3980 case MatchOperand_ParseFail: 3981 Error(getLexer().getLoc(), "failed parsing operand."); 3982 while (!getLexer().is(AsmToken::EndOfStatement)) { 3983 Parser.Lex(); 3984 } 3985 return true; 3986 case MatchOperand_NoMatch: 3987 Error(getLexer().getLoc(), "not a valid operand."); 3988 while (!getLexer().is(AsmToken::EndOfStatement)) { 3989 Parser.Lex(); 3990 } 3991 return true; 3992 } 3993 } 3994 3995 return false; 3996 } 3997 3998 //===----------------------------------------------------------------------===// 3999 // Utility functions 4000 //===----------------------------------------------------------------------===// 4001 4002 OperandMatchResultTy 4003 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 4004 switch(getLexer().getKind()) { 4005 default: return MatchOperand_NoMatch; 4006 case AsmToken::Identifier: { 4007 StringRef Name = Parser.getTok().getString(); 4008 if (!Name.equals(Prefix)) { 4009 return MatchOperand_NoMatch; 4010 } 4011 4012 Parser.Lex(); 4013 if (getLexer().isNot(AsmToken::Colon)) 4014 return MatchOperand_ParseFail; 4015 4016 Parser.Lex(); 4017 4018 bool IsMinus = false; 4019 if (getLexer().getKind() == AsmToken::Minus) { 4020 Parser.Lex(); 4021 IsMinus = true; 4022 } 4023 4024 if (getLexer().isNot(AsmToken::Integer)) 4025 return MatchOperand_ParseFail; 4026 4027 if (getParser().parseAbsoluteExpression(Int)) 4028 return MatchOperand_ParseFail; 4029 4030 if (IsMinus) 4031 Int = -Int; 4032 break; 4033 } 4034 } 4035 return MatchOperand_Success; 4036 } 4037 4038 OperandMatchResultTy 4039 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4040 AMDGPUOperand::ImmTy ImmTy, 4041 bool (*ConvertResult)(int64_t&)) { 4042 SMLoc S = Parser.getTok().getLoc(); 4043 int64_t Value = 0; 4044 4045 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4046 if (Res != MatchOperand_Success) 4047 return Res; 4048 4049 if (ConvertResult && !ConvertResult(Value)) { 4050 return MatchOperand_ParseFail; 4051 } 4052 4053 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4054 return MatchOperand_Success; 4055 } 4056 4057 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 4058 const char *Prefix, 4059 OperandVector &Operands, 4060 AMDGPUOperand::ImmTy ImmTy, 4061 bool (*ConvertResult)(int64_t&)) { 4062 StringRef Name = Parser.getTok().getString(); 4063 if (!Name.equals(Prefix)) 4064 return MatchOperand_NoMatch; 4065 4066 Parser.Lex(); 4067 if (getLexer().isNot(AsmToken::Colon)) 4068 return MatchOperand_ParseFail; 4069 4070 Parser.Lex(); 4071 if (getLexer().isNot(AsmToken::LBrac)) 4072 return MatchOperand_ParseFail; 4073 Parser.Lex(); 4074 4075 unsigned Val = 0; 4076 SMLoc S = Parser.getTok().getLoc(); 4077 4078 // FIXME: How to verify the number of elements matches the number of src 4079 // operands? 4080 for (int I = 0; I < 4; ++I) { 4081 if (I != 0) { 4082 if (getLexer().is(AsmToken::RBrac)) 4083 break; 4084 4085 if (getLexer().isNot(AsmToken::Comma)) 4086 return MatchOperand_ParseFail; 4087 Parser.Lex(); 4088 } 4089 4090 if (getLexer().isNot(AsmToken::Integer)) 4091 return MatchOperand_ParseFail; 4092 4093 int64_t Op; 4094 if (getParser().parseAbsoluteExpression(Op)) 4095 return MatchOperand_ParseFail; 4096 4097 if (Op != 0 && Op != 1) 4098 return MatchOperand_ParseFail; 4099 Val |= (Op << I); 4100 } 4101 4102 Parser.Lex(); 4103 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4104 return MatchOperand_Success; 4105 } 4106 4107 OperandMatchResultTy 4108 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4109 AMDGPUOperand::ImmTy ImmTy) { 4110 int64_t Bit = 0; 4111 SMLoc S = Parser.getTok().getLoc(); 4112 4113 // We are at the end of the statement, and this is a default argument, so 4114 // use a default value. 4115 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4116 switch(getLexer().getKind()) { 4117 case AsmToken::Identifier: { 4118 StringRef Tok = Parser.getTok().getString(); 4119 if (Tok == Name) { 4120 if (Tok == "r128" && isGFX9()) 4121 Error(S, "r128 modifier is not supported on this GPU"); 4122 if (Tok == "a16" && !isGFX9()) 4123 Error(S, "a16 modifier is not supported on this GPU"); 4124 Bit = 1; 4125 Parser.Lex(); 4126 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4127 Bit = 0; 4128 Parser.Lex(); 4129 } else { 4130 return MatchOperand_NoMatch; 4131 } 4132 break; 4133 } 4134 default: 4135 return MatchOperand_NoMatch; 4136 } 4137 } 4138 4139 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4140 return MatchOperand_ParseFail; 4141 4142 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4143 return MatchOperand_Success; 4144 } 4145 4146 static void addOptionalImmOperand( 4147 MCInst& Inst, const OperandVector& Operands, 4148 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4149 AMDGPUOperand::ImmTy ImmT, 4150 int64_t Default = 0) { 4151 auto i = OptionalIdx.find(ImmT); 4152 if (i != OptionalIdx.end()) { 4153 unsigned Idx = i->second; 4154 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4155 } else { 4156 Inst.addOperand(MCOperand::createImm(Default)); 4157 } 4158 } 4159 4160 OperandMatchResultTy 4161 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4162 if (getLexer().isNot(AsmToken::Identifier)) { 4163 return MatchOperand_NoMatch; 4164 } 4165 StringRef Tok = Parser.getTok().getString(); 4166 if (Tok != Prefix) { 4167 return MatchOperand_NoMatch; 4168 } 4169 4170 Parser.Lex(); 4171 if (getLexer().isNot(AsmToken::Colon)) { 4172 return MatchOperand_ParseFail; 4173 } 4174 4175 Parser.Lex(); 4176 if (getLexer().isNot(AsmToken::Identifier)) { 4177 return MatchOperand_ParseFail; 4178 } 4179 4180 Value = Parser.getTok().getString(); 4181 return MatchOperand_Success; 4182 } 4183 4184 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4185 // values to live in a joint format operand in the MCInst encoding. 4186 OperandMatchResultTy 4187 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4188 SMLoc S = Parser.getTok().getLoc(); 4189 int64_t Dfmt = 0, Nfmt = 0; 4190 // dfmt and nfmt can appear in either order, and each is optional. 4191 bool GotDfmt = false, GotNfmt = false; 4192 while (!GotDfmt || !GotNfmt) { 4193 if (!GotDfmt) { 4194 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4195 if (Res != MatchOperand_NoMatch) { 4196 if (Res != MatchOperand_Success) 4197 return Res; 4198 if (Dfmt >= 16) { 4199 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4200 return MatchOperand_ParseFail; 4201 } 4202 GotDfmt = true; 4203 Parser.Lex(); 4204 continue; 4205 } 4206 } 4207 if (!GotNfmt) { 4208 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4209 if (Res != MatchOperand_NoMatch) { 4210 if (Res != MatchOperand_Success) 4211 return Res; 4212 if (Nfmt >= 8) { 4213 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4214 return MatchOperand_ParseFail; 4215 } 4216 GotNfmt = true; 4217 Parser.Lex(); 4218 continue; 4219 } 4220 } 4221 break; 4222 } 4223 if (!GotDfmt && !GotNfmt) 4224 return MatchOperand_NoMatch; 4225 auto Format = Dfmt | Nfmt << 4; 4226 Operands.push_back( 4227 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4228 return MatchOperand_Success; 4229 } 4230 4231 //===----------------------------------------------------------------------===// 4232 // ds 4233 //===----------------------------------------------------------------------===// 4234 4235 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4236 const OperandVector &Operands) { 4237 OptionalImmIndexMap OptionalIdx; 4238 4239 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4240 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4241 4242 // Add the register arguments 4243 if (Op.isReg()) { 4244 Op.addRegOperands(Inst, 1); 4245 continue; 4246 } 4247 4248 // Handle optional arguments 4249 OptionalIdx[Op.getImmTy()] = i; 4250 } 4251 4252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4254 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4255 4256 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4257 } 4258 4259 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4260 bool IsGdsHardcoded) { 4261 OptionalImmIndexMap OptionalIdx; 4262 4263 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4264 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4265 4266 // Add the register arguments 4267 if (Op.isReg()) { 4268 Op.addRegOperands(Inst, 1); 4269 continue; 4270 } 4271 4272 if (Op.isToken() && Op.getToken() == "gds") { 4273 IsGdsHardcoded = true; 4274 continue; 4275 } 4276 4277 // Handle optional arguments 4278 OptionalIdx[Op.getImmTy()] = i; 4279 } 4280 4281 AMDGPUOperand::ImmTy OffsetType = 4282 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4283 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4284 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4285 AMDGPUOperand::ImmTyOffset; 4286 4287 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4288 4289 if (!IsGdsHardcoded) { 4290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4291 } 4292 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4293 } 4294 4295 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4296 OptionalImmIndexMap OptionalIdx; 4297 4298 unsigned OperandIdx[4]; 4299 unsigned EnMask = 0; 4300 int SrcIdx = 0; 4301 4302 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4303 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4304 4305 // Add the register arguments 4306 if (Op.isReg()) { 4307 assert(SrcIdx < 4); 4308 OperandIdx[SrcIdx] = Inst.size(); 4309 Op.addRegOperands(Inst, 1); 4310 ++SrcIdx; 4311 continue; 4312 } 4313 4314 if (Op.isOff()) { 4315 assert(SrcIdx < 4); 4316 OperandIdx[SrcIdx] = Inst.size(); 4317 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4318 ++SrcIdx; 4319 continue; 4320 } 4321 4322 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4323 Op.addImmOperands(Inst, 1); 4324 continue; 4325 } 4326 4327 if (Op.isToken() && Op.getToken() == "done") 4328 continue; 4329 4330 // Handle optional arguments 4331 OptionalIdx[Op.getImmTy()] = i; 4332 } 4333 4334 assert(SrcIdx == 4); 4335 4336 bool Compr = false; 4337 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4338 Compr = true; 4339 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4340 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4341 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4342 } 4343 4344 for (auto i = 0; i < SrcIdx; ++i) { 4345 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4346 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4347 } 4348 } 4349 4350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4352 4353 Inst.addOperand(MCOperand::createImm(EnMask)); 4354 } 4355 4356 //===----------------------------------------------------------------------===// 4357 // s_waitcnt 4358 //===----------------------------------------------------------------------===// 4359 4360 static bool 4361 encodeCnt( 4362 const AMDGPU::IsaVersion ISA, 4363 int64_t &IntVal, 4364 int64_t CntVal, 4365 bool Saturate, 4366 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4367 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4368 { 4369 bool Failed = false; 4370 4371 IntVal = encode(ISA, IntVal, CntVal); 4372 if (CntVal != decode(ISA, IntVal)) { 4373 if (Saturate) { 4374 IntVal = encode(ISA, IntVal, -1); 4375 } else { 4376 Failed = true; 4377 } 4378 } 4379 return Failed; 4380 } 4381 4382 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4383 StringRef CntName = Parser.getTok().getString(); 4384 int64_t CntVal; 4385 4386 Parser.Lex(); 4387 if (getLexer().isNot(AsmToken::LParen)) 4388 return true; 4389 4390 Parser.Lex(); 4391 if (getLexer().isNot(AsmToken::Integer)) 4392 return true; 4393 4394 SMLoc ValLoc = Parser.getTok().getLoc(); 4395 if (getParser().parseAbsoluteExpression(CntVal)) 4396 return true; 4397 4398 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4399 4400 bool Failed = true; 4401 bool Sat = CntName.endswith("_sat"); 4402 4403 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4404 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4405 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4406 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4407 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4408 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4409 } 4410 4411 if (Failed) { 4412 Error(ValLoc, "too large value for " + CntName); 4413 return true; 4414 } 4415 4416 if (getLexer().isNot(AsmToken::RParen)) { 4417 return true; 4418 } 4419 4420 Parser.Lex(); 4421 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4422 const AsmToken NextToken = getLexer().peekTok(); 4423 if (NextToken.is(AsmToken::Identifier)) { 4424 Parser.Lex(); 4425 } 4426 } 4427 4428 return false; 4429 } 4430 4431 OperandMatchResultTy 4432 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4433 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4434 int64_t Waitcnt = getWaitcntBitMask(ISA); 4435 SMLoc S = Parser.getTok().getLoc(); 4436 4437 switch(getLexer().getKind()) { 4438 default: return MatchOperand_ParseFail; 4439 case AsmToken::Integer: 4440 // The operand can be an integer value. 4441 if (getParser().parseAbsoluteExpression(Waitcnt)) 4442 return MatchOperand_ParseFail; 4443 break; 4444 4445 case AsmToken::Identifier: 4446 do { 4447 if (parseCnt(Waitcnt)) 4448 return MatchOperand_ParseFail; 4449 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4450 break; 4451 } 4452 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4453 return MatchOperand_Success; 4454 } 4455 4456 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4457 int64_t &Width) { 4458 using namespace llvm::AMDGPU::Hwreg; 4459 4460 if (Parser.getTok().getString() != "hwreg") 4461 return true; 4462 Parser.Lex(); 4463 4464 if (getLexer().isNot(AsmToken::LParen)) 4465 return true; 4466 Parser.Lex(); 4467 4468 if (getLexer().is(AsmToken::Identifier)) { 4469 HwReg.IsSymbolic = true; 4470 HwReg.Id = ID_UNKNOWN_; 4471 const StringRef tok = Parser.getTok().getString(); 4472 int Last = ID_SYMBOLIC_LAST_; 4473 if (isSI() || isCI() || isVI()) 4474 Last = ID_SYMBOLIC_FIRST_GFX9_; 4475 else if (isGFX9()) 4476 Last = ID_SYMBOLIC_FIRST_GFX10_; 4477 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4478 if (tok == IdSymbolic[i]) { 4479 HwReg.Id = i; 4480 break; 4481 } 4482 } 4483 Parser.Lex(); 4484 } else { 4485 HwReg.IsSymbolic = false; 4486 if (getLexer().isNot(AsmToken::Integer)) 4487 return true; 4488 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4489 return true; 4490 } 4491 4492 if (getLexer().is(AsmToken::RParen)) { 4493 Parser.Lex(); 4494 return false; 4495 } 4496 4497 // optional params 4498 if (getLexer().isNot(AsmToken::Comma)) 4499 return true; 4500 Parser.Lex(); 4501 4502 if (getLexer().isNot(AsmToken::Integer)) 4503 return true; 4504 if (getParser().parseAbsoluteExpression(Offset)) 4505 return true; 4506 4507 if (getLexer().isNot(AsmToken::Comma)) 4508 return true; 4509 Parser.Lex(); 4510 4511 if (getLexer().isNot(AsmToken::Integer)) 4512 return true; 4513 if (getParser().parseAbsoluteExpression(Width)) 4514 return true; 4515 4516 if (getLexer().isNot(AsmToken::RParen)) 4517 return true; 4518 Parser.Lex(); 4519 4520 return false; 4521 } 4522 4523 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4524 using namespace llvm::AMDGPU::Hwreg; 4525 4526 int64_t Imm16Val = 0; 4527 SMLoc S = Parser.getTok().getLoc(); 4528 4529 switch(getLexer().getKind()) { 4530 default: return MatchOperand_NoMatch; 4531 case AsmToken::Integer: 4532 // The operand can be an integer value. 4533 if (getParser().parseAbsoluteExpression(Imm16Val)) 4534 return MatchOperand_NoMatch; 4535 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4536 Error(S, "invalid immediate: only 16-bit values are legal"); 4537 // Do not return error code, but create an imm operand anyway and proceed 4538 // to the next operand, if any. That avoids unneccessary error messages. 4539 } 4540 break; 4541 4542 case AsmToken::Identifier: { 4543 OperandInfoTy HwReg(ID_UNKNOWN_); 4544 int64_t Offset = OFFSET_DEFAULT_; 4545 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4546 if (parseHwregConstruct(HwReg, Offset, Width)) 4547 return MatchOperand_ParseFail; 4548 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4549 if (HwReg.IsSymbolic) 4550 Error(S, "invalid symbolic name of hardware register"); 4551 else 4552 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4553 } 4554 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4555 Error(S, "invalid bit offset: only 5-bit values are legal"); 4556 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4557 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4558 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4559 } 4560 break; 4561 } 4562 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4563 return MatchOperand_Success; 4564 } 4565 4566 bool AMDGPUOperand::isSWaitCnt() const { 4567 return isImm(); 4568 } 4569 4570 bool AMDGPUOperand::isHwreg() const { 4571 return isImmTy(ImmTyHwreg); 4572 } 4573 4574 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4575 using namespace llvm::AMDGPU::SendMsg; 4576 4577 if (Parser.getTok().getString() != "sendmsg") 4578 return true; 4579 Parser.Lex(); 4580 4581 if (getLexer().isNot(AsmToken::LParen)) 4582 return true; 4583 Parser.Lex(); 4584 4585 if (getLexer().is(AsmToken::Identifier)) { 4586 Msg.IsSymbolic = true; 4587 Msg.Id = ID_UNKNOWN_; 4588 const std::string tok = Parser.getTok().getString(); 4589 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4590 switch(i) { 4591 default: continue; // Omit gaps. 4592 case ID_GS_ALLOC_REQ: 4593 if (isSI() || isCI() || isVI()) 4594 continue; 4595 break; 4596 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4597 case ID_SYSMSG: break; 4598 } 4599 if (tok == IdSymbolic[i]) { 4600 Msg.Id = i; 4601 break; 4602 } 4603 } 4604 Parser.Lex(); 4605 } else { 4606 Msg.IsSymbolic = false; 4607 if (getLexer().isNot(AsmToken::Integer)) 4608 return true; 4609 if (getParser().parseAbsoluteExpression(Msg.Id)) 4610 return true; 4611 if (getLexer().is(AsmToken::Integer)) 4612 if (getParser().parseAbsoluteExpression(Msg.Id)) 4613 Msg.Id = ID_UNKNOWN_; 4614 } 4615 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4616 return false; 4617 4618 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4619 if (getLexer().isNot(AsmToken::RParen)) 4620 return true; 4621 Parser.Lex(); 4622 return false; 4623 } 4624 4625 if (getLexer().isNot(AsmToken::Comma)) 4626 return true; 4627 Parser.Lex(); 4628 4629 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4630 Operation.Id = ID_UNKNOWN_; 4631 if (getLexer().is(AsmToken::Identifier)) { 4632 Operation.IsSymbolic = true; 4633 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4634 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4635 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4636 const StringRef Tok = Parser.getTok().getString(); 4637 for (int i = F; i < L; ++i) { 4638 if (Tok == S[i]) { 4639 Operation.Id = i; 4640 break; 4641 } 4642 } 4643 Parser.Lex(); 4644 } else { 4645 Operation.IsSymbolic = false; 4646 if (getLexer().isNot(AsmToken::Integer)) 4647 return true; 4648 if (getParser().parseAbsoluteExpression(Operation.Id)) 4649 return true; 4650 } 4651 4652 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4653 // Stream id is optional. 4654 if (getLexer().is(AsmToken::RParen)) { 4655 Parser.Lex(); 4656 return false; 4657 } 4658 4659 if (getLexer().isNot(AsmToken::Comma)) 4660 return true; 4661 Parser.Lex(); 4662 4663 if (getLexer().isNot(AsmToken::Integer)) 4664 return true; 4665 if (getParser().parseAbsoluteExpression(StreamId)) 4666 return true; 4667 } 4668 4669 if (getLexer().isNot(AsmToken::RParen)) 4670 return true; 4671 Parser.Lex(); 4672 return false; 4673 } 4674 4675 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4676 if (getLexer().getKind() != AsmToken::Identifier) 4677 return MatchOperand_NoMatch; 4678 4679 StringRef Str = Parser.getTok().getString(); 4680 int Slot = StringSwitch<int>(Str) 4681 .Case("p10", 0) 4682 .Case("p20", 1) 4683 .Case("p0", 2) 4684 .Default(-1); 4685 4686 SMLoc S = Parser.getTok().getLoc(); 4687 if (Slot == -1) 4688 return MatchOperand_ParseFail; 4689 4690 Parser.Lex(); 4691 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4692 AMDGPUOperand::ImmTyInterpSlot)); 4693 return MatchOperand_Success; 4694 } 4695 4696 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4697 if (getLexer().getKind() != AsmToken::Identifier) 4698 return MatchOperand_NoMatch; 4699 4700 StringRef Str = Parser.getTok().getString(); 4701 if (!Str.startswith("attr")) 4702 return MatchOperand_NoMatch; 4703 4704 StringRef Chan = Str.take_back(2); 4705 int AttrChan = StringSwitch<int>(Chan) 4706 .Case(".x", 0) 4707 .Case(".y", 1) 4708 .Case(".z", 2) 4709 .Case(".w", 3) 4710 .Default(-1); 4711 if (AttrChan == -1) 4712 return MatchOperand_ParseFail; 4713 4714 Str = Str.drop_back(2).drop_front(4); 4715 4716 uint8_t Attr; 4717 if (Str.getAsInteger(10, Attr)) 4718 return MatchOperand_ParseFail; 4719 4720 SMLoc S = Parser.getTok().getLoc(); 4721 Parser.Lex(); 4722 if (Attr > 63) { 4723 Error(S, "out of bounds attr"); 4724 return MatchOperand_Success; 4725 } 4726 4727 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4728 4729 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4730 AMDGPUOperand::ImmTyInterpAttr)); 4731 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4732 AMDGPUOperand::ImmTyAttrChan)); 4733 return MatchOperand_Success; 4734 } 4735 4736 void AMDGPUAsmParser::errorExpTgt() { 4737 Error(Parser.getTok().getLoc(), "invalid exp target"); 4738 } 4739 4740 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4741 uint8_t &Val) { 4742 if (Str == "null") { 4743 Val = 9; 4744 return MatchOperand_Success; 4745 } 4746 4747 if (Str.startswith("mrt")) { 4748 Str = Str.drop_front(3); 4749 if (Str == "z") { // == mrtz 4750 Val = 8; 4751 return MatchOperand_Success; 4752 } 4753 4754 if (Str.getAsInteger(10, Val)) 4755 return MatchOperand_ParseFail; 4756 4757 if (Val > 7) 4758 errorExpTgt(); 4759 4760 return MatchOperand_Success; 4761 } 4762 4763 if (Str.startswith("pos")) { 4764 Str = Str.drop_front(3); 4765 if (Str.getAsInteger(10, Val)) 4766 return MatchOperand_ParseFail; 4767 4768 if (Val > 4 || (Val == 4 && !isGFX10())) 4769 errorExpTgt(); 4770 4771 Val += 12; 4772 return MatchOperand_Success; 4773 } 4774 4775 if (isGFX10() && Str == "prim") { 4776 Val = 20; 4777 return MatchOperand_Success; 4778 } 4779 4780 if (Str.startswith("param")) { 4781 Str = Str.drop_front(5); 4782 if (Str.getAsInteger(10, Val)) 4783 return MatchOperand_ParseFail; 4784 4785 if (Val >= 32) 4786 errorExpTgt(); 4787 4788 Val += 32; 4789 return MatchOperand_Success; 4790 } 4791 4792 if (Str.startswith("invalid_target_")) { 4793 Str = Str.drop_front(15); 4794 if (Str.getAsInteger(10, Val)) 4795 return MatchOperand_ParseFail; 4796 4797 errorExpTgt(); 4798 return MatchOperand_Success; 4799 } 4800 4801 return MatchOperand_NoMatch; 4802 } 4803 4804 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4805 uint8_t Val; 4806 StringRef Str = Parser.getTok().getString(); 4807 4808 auto Res = parseExpTgtImpl(Str, Val); 4809 if (Res != MatchOperand_Success) 4810 return Res; 4811 4812 SMLoc S = Parser.getTok().getLoc(); 4813 Parser.Lex(); 4814 4815 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4816 AMDGPUOperand::ImmTyExpTgt)); 4817 return MatchOperand_Success; 4818 } 4819 4820 OperandMatchResultTy 4821 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4822 using namespace llvm::AMDGPU::SendMsg; 4823 4824 int64_t Imm16Val = 0; 4825 SMLoc S = Parser.getTok().getLoc(); 4826 4827 switch(getLexer().getKind()) { 4828 default: 4829 return MatchOperand_NoMatch; 4830 case AsmToken::Integer: 4831 // The operand can be an integer value. 4832 if (getParser().parseAbsoluteExpression(Imm16Val)) 4833 return MatchOperand_NoMatch; 4834 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4835 Error(S, "invalid immediate: only 16-bit values are legal"); 4836 // Do not return error code, but create an imm operand anyway and proceed 4837 // to the next operand, if any. That avoids unneccessary error messages. 4838 } 4839 break; 4840 case AsmToken::Identifier: { 4841 OperandInfoTy Msg(ID_UNKNOWN_); 4842 OperandInfoTy Operation(OP_UNKNOWN_); 4843 int64_t StreamId = STREAM_ID_DEFAULT_; 4844 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4845 return MatchOperand_ParseFail; 4846 do { 4847 // Validate and encode message ID. 4848 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4849 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4850 || Msg.Id == ID_SYSMSG)) { 4851 if (Msg.IsSymbolic) 4852 Error(S, "invalid/unsupported symbolic name of message"); 4853 else 4854 Error(S, "invalid/unsupported code of message"); 4855 break; 4856 } 4857 Imm16Val = (Msg.Id << ID_SHIFT_); 4858 // Validate and encode operation ID. 4859 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4860 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4861 if (Operation.IsSymbolic) 4862 Error(S, "invalid symbolic name of GS_OP"); 4863 else 4864 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4865 break; 4866 } 4867 if (Operation.Id == OP_GS_NOP 4868 && Msg.Id != ID_GS_DONE) { 4869 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4870 break; 4871 } 4872 Imm16Val |= (Operation.Id << OP_SHIFT_); 4873 } 4874 if (Msg.Id == ID_SYSMSG) { 4875 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4876 if (Operation.IsSymbolic) 4877 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4878 else 4879 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4880 break; 4881 } 4882 Imm16Val |= (Operation.Id << OP_SHIFT_); 4883 } 4884 // Validate and encode stream ID. 4885 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4886 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4887 Error(S, "invalid stream id: only 2-bit values are legal"); 4888 break; 4889 } 4890 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4891 } 4892 } while (false); 4893 } 4894 break; 4895 } 4896 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4897 return MatchOperand_Success; 4898 } 4899 4900 bool AMDGPUOperand::isSendMsg() const { 4901 return isImmTy(ImmTySendMsg); 4902 } 4903 4904 //===----------------------------------------------------------------------===// 4905 // parser helpers 4906 //===----------------------------------------------------------------------===// 4907 4908 bool 4909 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4910 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4911 } 4912 4913 bool 4914 AMDGPUAsmParser::isId(const StringRef Id) const { 4915 return isId(getToken(), Id); 4916 } 4917 4918 bool 4919 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4920 return getTokenKind() == Kind; 4921 } 4922 4923 bool 4924 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4925 if (isId(Id)) { 4926 lex(); 4927 return true; 4928 } 4929 return false; 4930 } 4931 4932 bool 4933 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4934 if (isToken(Kind)) { 4935 lex(); 4936 return true; 4937 } 4938 return false; 4939 } 4940 4941 bool 4942 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4943 const StringRef ErrMsg) { 4944 if (!trySkipToken(Kind)) { 4945 Error(getLoc(), ErrMsg); 4946 return false; 4947 } 4948 return true; 4949 } 4950 4951 bool 4952 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4953 return !getParser().parseAbsoluteExpression(Imm); 4954 } 4955 4956 bool 4957 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4958 if (isToken(AsmToken::String)) { 4959 Val = getToken().getStringContents(); 4960 lex(); 4961 return true; 4962 } else { 4963 Error(getLoc(), ErrMsg); 4964 return false; 4965 } 4966 } 4967 4968 AsmToken 4969 AMDGPUAsmParser::getToken() const { 4970 return Parser.getTok(); 4971 } 4972 4973 AsmToken 4974 AMDGPUAsmParser::peekToken() { 4975 return getLexer().peekTok(); 4976 } 4977 4978 void 4979 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4980 auto TokCount = getLexer().peekTokens(Tokens); 4981 4982 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4983 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4984 } 4985 4986 AsmToken::TokenKind 4987 AMDGPUAsmParser::getTokenKind() const { 4988 return getLexer().getKind(); 4989 } 4990 4991 SMLoc 4992 AMDGPUAsmParser::getLoc() const { 4993 return getToken().getLoc(); 4994 } 4995 4996 StringRef 4997 AMDGPUAsmParser::getTokenStr() const { 4998 return getToken().getString(); 4999 } 5000 5001 void 5002 AMDGPUAsmParser::lex() { 5003 Parser.Lex(); 5004 } 5005 5006 //===----------------------------------------------------------------------===// 5007 // swizzle 5008 //===----------------------------------------------------------------------===// 5009 5010 LLVM_READNONE 5011 static unsigned 5012 encodeBitmaskPerm(const unsigned AndMask, 5013 const unsigned OrMask, 5014 const unsigned XorMask) { 5015 using namespace llvm::AMDGPU::Swizzle; 5016 5017 return BITMASK_PERM_ENC | 5018 (AndMask << BITMASK_AND_SHIFT) | 5019 (OrMask << BITMASK_OR_SHIFT) | 5020 (XorMask << BITMASK_XOR_SHIFT); 5021 } 5022 5023 bool 5024 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5025 const unsigned MinVal, 5026 const unsigned MaxVal, 5027 const StringRef ErrMsg) { 5028 for (unsigned i = 0; i < OpNum; ++i) { 5029 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5030 return false; 5031 } 5032 SMLoc ExprLoc = Parser.getTok().getLoc(); 5033 if (!parseExpr(Op[i])) { 5034 return false; 5035 } 5036 if (Op[i] < MinVal || Op[i] > MaxVal) { 5037 Error(ExprLoc, ErrMsg); 5038 return false; 5039 } 5040 } 5041 5042 return true; 5043 } 5044 5045 bool 5046 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5047 using namespace llvm::AMDGPU::Swizzle; 5048 5049 int64_t Lane[LANE_NUM]; 5050 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5051 "expected a 2-bit lane id")) { 5052 Imm = QUAD_PERM_ENC; 5053 for (unsigned I = 0; I < LANE_NUM; ++I) { 5054 Imm |= Lane[I] << (LANE_SHIFT * I); 5055 } 5056 return true; 5057 } 5058 return false; 5059 } 5060 5061 bool 5062 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5063 using namespace llvm::AMDGPU::Swizzle; 5064 5065 SMLoc S = Parser.getTok().getLoc(); 5066 int64_t GroupSize; 5067 int64_t LaneIdx; 5068 5069 if (!parseSwizzleOperands(1, &GroupSize, 5070 2, 32, 5071 "group size must be in the interval [2,32]")) { 5072 return false; 5073 } 5074 if (!isPowerOf2_64(GroupSize)) { 5075 Error(S, "group size must be a power of two"); 5076 return false; 5077 } 5078 if (parseSwizzleOperands(1, &LaneIdx, 5079 0, GroupSize - 1, 5080 "lane id must be in the interval [0,group size - 1]")) { 5081 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5082 return true; 5083 } 5084 return false; 5085 } 5086 5087 bool 5088 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5089 using namespace llvm::AMDGPU::Swizzle; 5090 5091 SMLoc S = Parser.getTok().getLoc(); 5092 int64_t GroupSize; 5093 5094 if (!parseSwizzleOperands(1, &GroupSize, 5095 2, 32, "group size must be in the interval [2,32]")) { 5096 return false; 5097 } 5098 if (!isPowerOf2_64(GroupSize)) { 5099 Error(S, "group size must be a power of two"); 5100 return false; 5101 } 5102 5103 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5104 return true; 5105 } 5106 5107 bool 5108 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5109 using namespace llvm::AMDGPU::Swizzle; 5110 5111 SMLoc S = Parser.getTok().getLoc(); 5112 int64_t GroupSize; 5113 5114 if (!parseSwizzleOperands(1, &GroupSize, 5115 1, 16, "group size must be in the interval [1,16]")) { 5116 return false; 5117 } 5118 if (!isPowerOf2_64(GroupSize)) { 5119 Error(S, "group size must be a power of two"); 5120 return false; 5121 } 5122 5123 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5124 return true; 5125 } 5126 5127 bool 5128 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5129 using namespace llvm::AMDGPU::Swizzle; 5130 5131 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5132 return false; 5133 } 5134 5135 StringRef Ctl; 5136 SMLoc StrLoc = Parser.getTok().getLoc(); 5137 if (!parseString(Ctl)) { 5138 return false; 5139 } 5140 if (Ctl.size() != BITMASK_WIDTH) { 5141 Error(StrLoc, "expected a 5-character mask"); 5142 return false; 5143 } 5144 5145 unsigned AndMask = 0; 5146 unsigned OrMask = 0; 5147 unsigned XorMask = 0; 5148 5149 for (size_t i = 0; i < Ctl.size(); ++i) { 5150 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5151 switch(Ctl[i]) { 5152 default: 5153 Error(StrLoc, "invalid mask"); 5154 return false; 5155 case '0': 5156 break; 5157 case '1': 5158 OrMask |= Mask; 5159 break; 5160 case 'p': 5161 AndMask |= Mask; 5162 break; 5163 case 'i': 5164 AndMask |= Mask; 5165 XorMask |= Mask; 5166 break; 5167 } 5168 } 5169 5170 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5171 return true; 5172 } 5173 5174 bool 5175 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5176 5177 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5178 5179 if (!parseExpr(Imm)) { 5180 return false; 5181 } 5182 if (!isUInt<16>(Imm)) { 5183 Error(OffsetLoc, "expected a 16-bit offset"); 5184 return false; 5185 } 5186 return true; 5187 } 5188 5189 bool 5190 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5191 using namespace llvm::AMDGPU::Swizzle; 5192 5193 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5194 5195 SMLoc ModeLoc = Parser.getTok().getLoc(); 5196 bool Ok = false; 5197 5198 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5199 Ok = parseSwizzleQuadPerm(Imm); 5200 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5201 Ok = parseSwizzleBitmaskPerm(Imm); 5202 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5203 Ok = parseSwizzleBroadcast(Imm); 5204 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5205 Ok = parseSwizzleSwap(Imm); 5206 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5207 Ok = parseSwizzleReverse(Imm); 5208 } else { 5209 Error(ModeLoc, "expected a swizzle mode"); 5210 } 5211 5212 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5213 } 5214 5215 return false; 5216 } 5217 5218 OperandMatchResultTy 5219 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5220 SMLoc S = Parser.getTok().getLoc(); 5221 int64_t Imm = 0; 5222 5223 if (trySkipId("offset")) { 5224 5225 bool Ok = false; 5226 if (skipToken(AsmToken::Colon, "expected a colon")) { 5227 if (trySkipId("swizzle")) { 5228 Ok = parseSwizzleMacro(Imm); 5229 } else { 5230 Ok = parseSwizzleOffset(Imm); 5231 } 5232 } 5233 5234 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5235 5236 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5237 } else { 5238 // Swizzle "offset" operand is optional. 5239 // If it is omitted, try parsing other optional operands. 5240 return parseOptionalOpr(Operands); 5241 } 5242 } 5243 5244 bool 5245 AMDGPUOperand::isSwizzle() const { 5246 return isImmTy(ImmTySwizzle); 5247 } 5248 5249 //===----------------------------------------------------------------------===// 5250 // VGPR Index Mode 5251 //===----------------------------------------------------------------------===// 5252 5253 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5254 5255 using namespace llvm::AMDGPU::VGPRIndexMode; 5256 5257 if (trySkipToken(AsmToken::RParen)) { 5258 return OFF; 5259 } 5260 5261 int64_t Imm = 0; 5262 5263 while (true) { 5264 unsigned Mode = 0; 5265 SMLoc S = Parser.getTok().getLoc(); 5266 5267 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5268 if (trySkipId(IdSymbolic[ModeId])) { 5269 Mode = 1 << ModeId; 5270 break; 5271 } 5272 } 5273 5274 if (Mode == 0) { 5275 Error(S, (Imm == 0)? 5276 "expected a VGPR index mode or a closing parenthesis" : 5277 "expected a VGPR index mode"); 5278 break; 5279 } 5280 5281 if (Imm & Mode) { 5282 Error(S, "duplicate VGPR index mode"); 5283 break; 5284 } 5285 Imm |= Mode; 5286 5287 if (trySkipToken(AsmToken::RParen)) 5288 break; 5289 if (!skipToken(AsmToken::Comma, 5290 "expected a comma or a closing parenthesis")) 5291 break; 5292 } 5293 5294 return Imm; 5295 } 5296 5297 OperandMatchResultTy 5298 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5299 5300 int64_t Imm = 0; 5301 SMLoc S = Parser.getTok().getLoc(); 5302 5303 if (getLexer().getKind() == AsmToken::Identifier && 5304 Parser.getTok().getString() == "gpr_idx" && 5305 getLexer().peekTok().is(AsmToken::LParen)) { 5306 5307 Parser.Lex(); 5308 Parser.Lex(); 5309 5310 // If parse failed, trigger an error but do not return error code 5311 // to avoid excessive error messages. 5312 Imm = parseGPRIdxMacro(); 5313 5314 } else { 5315 if (getParser().parseAbsoluteExpression(Imm)) 5316 return MatchOperand_NoMatch; 5317 if (Imm < 0 || !isUInt<4>(Imm)) { 5318 Error(S, "invalid immediate: only 4-bit values are legal"); 5319 } 5320 } 5321 5322 Operands.push_back( 5323 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5324 return MatchOperand_Success; 5325 } 5326 5327 bool AMDGPUOperand::isGPRIdxMode() const { 5328 return isImmTy(ImmTyGprIdxMode); 5329 } 5330 5331 //===----------------------------------------------------------------------===// 5332 // sopp branch targets 5333 //===----------------------------------------------------------------------===// 5334 5335 OperandMatchResultTy 5336 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5337 SMLoc S = Parser.getTok().getLoc(); 5338 5339 switch (getLexer().getKind()) { 5340 default: return MatchOperand_ParseFail; 5341 case AsmToken::Integer: { 5342 int64_t Imm; 5343 if (getParser().parseAbsoluteExpression(Imm)) 5344 return MatchOperand_ParseFail; 5345 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5346 return MatchOperand_Success; 5347 } 5348 5349 case AsmToken::Identifier: 5350 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5351 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5352 Parser.getTok().getString()), getContext()), S)); 5353 Parser.Lex(); 5354 return MatchOperand_Success; 5355 } 5356 } 5357 5358 //===----------------------------------------------------------------------===// 5359 // mubuf 5360 //===----------------------------------------------------------------------===// 5361 5362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5363 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5364 } 5365 5366 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5367 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5368 } 5369 5370 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5371 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5372 } 5373 5374 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5375 const OperandVector &Operands, 5376 bool IsAtomic, 5377 bool IsAtomicReturn, 5378 bool IsLds) { 5379 bool IsLdsOpcode = IsLds; 5380 bool HasLdsModifier = false; 5381 OptionalImmIndexMap OptionalIdx; 5382 assert(IsAtomicReturn ? IsAtomic : true); 5383 unsigned FirstOperandIdx = 1; 5384 5385 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5387 5388 // Add the register arguments 5389 if (Op.isReg()) { 5390 Op.addRegOperands(Inst, 1); 5391 // Insert a tied src for atomic return dst. 5392 // This cannot be postponed as subsequent calls to 5393 // addImmOperands rely on correct number of MC operands. 5394 if (IsAtomicReturn && i == FirstOperandIdx) 5395 Op.addRegOperands(Inst, 1); 5396 continue; 5397 } 5398 5399 // Handle the case where soffset is an immediate 5400 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5401 Op.addImmOperands(Inst, 1); 5402 continue; 5403 } 5404 5405 HasLdsModifier |= Op.isLDS(); 5406 5407 // Handle tokens like 'offen' which are sometimes hard-coded into the 5408 // asm string. There are no MCInst operands for these. 5409 if (Op.isToken()) { 5410 continue; 5411 } 5412 assert(Op.isImm()); 5413 5414 // Handle optional arguments 5415 OptionalIdx[Op.getImmTy()] = i; 5416 } 5417 5418 // This is a workaround for an llvm quirk which may result in an 5419 // incorrect instruction selection. Lds and non-lds versions of 5420 // MUBUF instructions are identical except that lds versions 5421 // have mandatory 'lds' modifier. However this modifier follows 5422 // optional modifiers and llvm asm matcher regards this 'lds' 5423 // modifier as an optional one. As a result, an lds version 5424 // of opcode may be selected even if it has no 'lds' modifier. 5425 if (IsLdsOpcode && !HasLdsModifier) { 5426 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5427 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5428 Inst.setOpcode(NoLdsOpcode); 5429 IsLdsOpcode = false; 5430 } 5431 } 5432 5433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5434 if (!IsAtomic) { // glc is hard-coded. 5435 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5436 } 5437 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5438 5439 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5441 } 5442 5443 if (isGFX10()) 5444 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5445 } 5446 5447 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5448 OptionalImmIndexMap OptionalIdx; 5449 5450 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5451 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5452 5453 // Add the register arguments 5454 if (Op.isReg()) { 5455 Op.addRegOperands(Inst, 1); 5456 continue; 5457 } 5458 5459 // Handle the case where soffset is an immediate 5460 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5461 Op.addImmOperands(Inst, 1); 5462 continue; 5463 } 5464 5465 // Handle tokens like 'offen' which are sometimes hard-coded into the 5466 // asm string. There are no MCInst operands for these. 5467 if (Op.isToken()) { 5468 continue; 5469 } 5470 assert(Op.isImm()); 5471 5472 // Handle optional arguments 5473 OptionalIdx[Op.getImmTy()] = i; 5474 } 5475 5476 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5477 AMDGPUOperand::ImmTyOffset); 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5480 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5481 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5482 5483 if (isGFX10()) 5484 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5485 } 5486 5487 //===----------------------------------------------------------------------===// 5488 // mimg 5489 //===----------------------------------------------------------------------===// 5490 5491 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5492 bool IsAtomic) { 5493 unsigned I = 1; 5494 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5495 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5496 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5497 } 5498 5499 if (IsAtomic) { 5500 // Add src, same as dst 5501 assert(Desc.getNumDefs() == 1); 5502 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5503 } 5504 5505 OptionalImmIndexMap OptionalIdx; 5506 5507 for (unsigned E = Operands.size(); I != E; ++I) { 5508 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5509 5510 // Add the register arguments 5511 if (Op.isReg()) { 5512 Op.addRegOperands(Inst, 1); 5513 } else if (Op.isImmModifier()) { 5514 OptionalIdx[Op.getImmTy()] = I; 5515 } else if (!Op.isToken()) { 5516 llvm_unreachable("unexpected operand type"); 5517 } 5518 } 5519 5520 bool IsGFX10 = isGFX10(); 5521 5522 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5523 if (IsGFX10) 5524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5526 if (IsGFX10) 5527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5533 if (!IsGFX10) 5534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5536 } 5537 5538 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5539 cvtMIMG(Inst, Operands, true); 5540 } 5541 5542 //===----------------------------------------------------------------------===// 5543 // smrd 5544 //===----------------------------------------------------------------------===// 5545 5546 bool AMDGPUOperand::isSMRDOffset8() const { 5547 return isImm() && isUInt<8>(getImm()); 5548 } 5549 5550 bool AMDGPUOperand::isSMRDOffset20() const { 5551 return isImm() && isUInt<20>(getImm()); 5552 } 5553 5554 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5555 // 32-bit literals are only supported on CI and we only want to use them 5556 // when the offset is > 8-bits. 5557 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5558 } 5559 5560 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5561 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5562 } 5563 5564 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5565 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5566 } 5567 5568 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5569 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5570 } 5571 5572 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5573 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5574 } 5575 5576 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5577 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5578 } 5579 5580 //===----------------------------------------------------------------------===// 5581 // vop3 5582 //===----------------------------------------------------------------------===// 5583 5584 static bool ConvertOmodMul(int64_t &Mul) { 5585 if (Mul != 1 && Mul != 2 && Mul != 4) 5586 return false; 5587 5588 Mul >>= 1; 5589 return true; 5590 } 5591 5592 static bool ConvertOmodDiv(int64_t &Div) { 5593 if (Div == 1) { 5594 Div = 0; 5595 return true; 5596 } 5597 5598 if (Div == 2) { 5599 Div = 3; 5600 return true; 5601 } 5602 5603 return false; 5604 } 5605 5606 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5607 if (BoundCtrl == 0) { 5608 BoundCtrl = 1; 5609 return true; 5610 } 5611 5612 if (BoundCtrl == -1) { 5613 BoundCtrl = 0; 5614 return true; 5615 } 5616 5617 return false; 5618 } 5619 5620 // Note: the order in this table matches the order of operands in AsmString. 5621 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5622 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5623 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5624 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5625 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5626 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5627 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5628 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5629 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5630 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5631 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5632 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5633 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5634 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5635 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5636 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5637 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5638 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5639 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5640 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5641 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5642 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5643 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5644 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5645 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5646 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5647 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5648 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5649 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5650 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5651 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5652 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5653 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5654 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5655 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5656 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5657 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5658 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5659 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5660 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5661 }; 5662 5663 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5664 unsigned size = Operands.size(); 5665 assert(size > 0); 5666 5667 OperandMatchResultTy res = parseOptionalOpr(Operands); 5668 5669 // This is a hack to enable hardcoded mandatory operands which follow 5670 // optional operands. 5671 // 5672 // Current design assumes that all operands after the first optional operand 5673 // are also optional. However implementation of some instructions violates 5674 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5675 // 5676 // To alleviate this problem, we have to (implicitly) parse extra operands 5677 // to make sure autogenerated parser of custom operands never hit hardcoded 5678 // mandatory operands. 5679 5680 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5681 5682 // We have parsed the first optional operand. 5683 // Parse as many operands as necessary to skip all mandatory operands. 5684 5685 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5686 if (res != MatchOperand_Success || 5687 getLexer().is(AsmToken::EndOfStatement)) break; 5688 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5689 res = parseOptionalOpr(Operands); 5690 } 5691 } 5692 5693 return res; 5694 } 5695 5696 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5697 OperandMatchResultTy res; 5698 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5699 // try to parse any optional operand here 5700 if (Op.IsBit) { 5701 res = parseNamedBit(Op.Name, Operands, Op.Type); 5702 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5703 res = parseOModOperand(Operands); 5704 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5705 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5706 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5707 res = parseSDWASel(Operands, Op.Name, Op.Type); 5708 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5709 res = parseSDWADstUnused(Operands); 5710 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5711 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5712 Op.Type == AMDGPUOperand::ImmTyNegLo || 5713 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5714 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5715 Op.ConvertResult); 5716 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5717 res = parseDim(Operands); 5718 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5719 res = parseDfmtNfmt(Operands); 5720 } else { 5721 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5722 } 5723 if (res != MatchOperand_NoMatch) { 5724 return res; 5725 } 5726 } 5727 return MatchOperand_NoMatch; 5728 } 5729 5730 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5731 StringRef Name = Parser.getTok().getString(); 5732 if (Name == "mul") { 5733 return parseIntWithPrefix("mul", Operands, 5734 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5735 } 5736 5737 if (Name == "div") { 5738 return parseIntWithPrefix("div", Operands, 5739 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5740 } 5741 5742 return MatchOperand_NoMatch; 5743 } 5744 5745 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5746 cvtVOP3P(Inst, Operands); 5747 5748 int Opc = Inst.getOpcode(); 5749 5750 int SrcNum; 5751 const int Ops[] = { AMDGPU::OpName::src0, 5752 AMDGPU::OpName::src1, 5753 AMDGPU::OpName::src2 }; 5754 for (SrcNum = 0; 5755 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5756 ++SrcNum); 5757 assert(SrcNum > 0); 5758 5759 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5760 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5761 5762 if ((OpSel & (1 << SrcNum)) != 0) { 5763 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5764 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5765 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5766 } 5767 } 5768 5769 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5770 // 1. This operand is input modifiers 5771 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5772 // 2. This is not last operand 5773 && Desc.NumOperands > (OpNum + 1) 5774 // 3. Next operand is register class 5775 && Desc.OpInfo[OpNum + 1].RegClass != -1 5776 // 4. Next register is not tied to any other operand 5777 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5778 } 5779 5780 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5781 { 5782 OptionalImmIndexMap OptionalIdx; 5783 unsigned Opc = Inst.getOpcode(); 5784 5785 unsigned I = 1; 5786 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5787 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5788 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5789 } 5790 5791 for (unsigned E = Operands.size(); I != E; ++I) { 5792 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5793 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5794 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5795 } else if (Op.isInterpSlot() || 5796 Op.isInterpAttr() || 5797 Op.isAttrChan()) { 5798 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5799 } else if (Op.isImmModifier()) { 5800 OptionalIdx[Op.getImmTy()] = I; 5801 } else { 5802 llvm_unreachable("unhandled operand type"); 5803 } 5804 } 5805 5806 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5808 } 5809 5810 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5812 } 5813 5814 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5816 } 5817 } 5818 5819 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5820 OptionalImmIndexMap &OptionalIdx) { 5821 unsigned Opc = Inst.getOpcode(); 5822 5823 unsigned I = 1; 5824 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5825 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5826 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5827 } 5828 5829 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5830 // This instruction has src modifiers 5831 for (unsigned E = Operands.size(); I != E; ++I) { 5832 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5833 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5834 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5835 } else if (Op.isImmModifier()) { 5836 OptionalIdx[Op.getImmTy()] = I; 5837 } else if (Op.isRegOrImm()) { 5838 Op.addRegOrImmOperands(Inst, 1); 5839 } else { 5840 llvm_unreachable("unhandled operand type"); 5841 } 5842 } 5843 } else { 5844 // No src modifiers 5845 for (unsigned E = Operands.size(); I != E; ++I) { 5846 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5847 if (Op.isMod()) { 5848 OptionalIdx[Op.getImmTy()] = I; 5849 } else { 5850 Op.addRegOrImmOperands(Inst, 1); 5851 } 5852 } 5853 } 5854 5855 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5856 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5857 } 5858 5859 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5860 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5861 } 5862 5863 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 5864 // it has src2 register operand that is tied to dst operand 5865 // we don't allow modifiers for this operand in assembler so src2_modifiers 5866 // should be 0. 5867 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 5868 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 5869 Opc == AMDGPU::V_MAC_F32_e64_vi || 5870 Opc == AMDGPU::V_MAC_F16_e64_vi || 5871 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 5872 Opc == AMDGPU::V_FMAC_F32_e64_vi || 5873 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 5874 auto it = Inst.begin(); 5875 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5876 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5877 ++it; 5878 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5879 } 5880 } 5881 5882 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5883 OptionalImmIndexMap OptionalIdx; 5884 cvtVOP3(Inst, Operands, OptionalIdx); 5885 } 5886 5887 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5888 const OperandVector &Operands) { 5889 OptionalImmIndexMap OptIdx; 5890 const int Opc = Inst.getOpcode(); 5891 const MCInstrDesc &Desc = MII.get(Opc); 5892 5893 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5894 5895 cvtVOP3(Inst, Operands, OptIdx); 5896 5897 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5898 assert(!IsPacked); 5899 Inst.addOperand(Inst.getOperand(0)); 5900 } 5901 5902 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5903 // instruction, and then figure out where to actually put the modifiers 5904 5905 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5906 5907 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5908 if (OpSelHiIdx != -1) { 5909 int DefaultVal = IsPacked ? -1 : 0; 5910 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5911 DefaultVal); 5912 } 5913 5914 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5915 if (NegLoIdx != -1) { 5916 assert(IsPacked); 5917 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5918 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5919 } 5920 5921 const int Ops[] = { AMDGPU::OpName::src0, 5922 AMDGPU::OpName::src1, 5923 AMDGPU::OpName::src2 }; 5924 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5925 AMDGPU::OpName::src1_modifiers, 5926 AMDGPU::OpName::src2_modifiers }; 5927 5928 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5929 5930 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5931 unsigned OpSelHi = 0; 5932 unsigned NegLo = 0; 5933 unsigned NegHi = 0; 5934 5935 if (OpSelHiIdx != -1) { 5936 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5937 } 5938 5939 if (NegLoIdx != -1) { 5940 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5941 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5942 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5943 } 5944 5945 for (int J = 0; J < 3; ++J) { 5946 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5947 if (OpIdx == -1) 5948 break; 5949 5950 uint32_t ModVal = 0; 5951 5952 if ((OpSel & (1 << J)) != 0) 5953 ModVal |= SISrcMods::OP_SEL_0; 5954 5955 if ((OpSelHi & (1 << J)) != 0) 5956 ModVal |= SISrcMods::OP_SEL_1; 5957 5958 if ((NegLo & (1 << J)) != 0) 5959 ModVal |= SISrcMods::NEG; 5960 5961 if ((NegHi & (1 << J)) != 0) 5962 ModVal |= SISrcMods::NEG_HI; 5963 5964 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5965 5966 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5967 } 5968 } 5969 5970 //===----------------------------------------------------------------------===// 5971 // dpp 5972 //===----------------------------------------------------------------------===// 5973 5974 bool AMDGPUOperand::isDPPCtrl() const { 5975 using namespace AMDGPU::DPP; 5976 5977 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5978 if (result) { 5979 int64_t Imm = getImm(); 5980 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5981 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5982 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5983 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5984 (Imm == DppCtrl::WAVE_SHL1) || 5985 (Imm == DppCtrl::WAVE_ROL1) || 5986 (Imm == DppCtrl::WAVE_SHR1) || 5987 (Imm == DppCtrl::WAVE_ROR1) || 5988 (Imm == DppCtrl::ROW_MIRROR) || 5989 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5990 (Imm == DppCtrl::BCAST15) || 5991 (Imm == DppCtrl::BCAST31); 5992 } 5993 return false; 5994 } 5995 5996 bool AMDGPUOperand::isS16Imm() const { 5997 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5998 } 5999 6000 bool AMDGPUOperand::isU16Imm() const { 6001 return isImm() && isUInt<16>(getImm()); 6002 } 6003 6004 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6005 if (!isGFX10()) 6006 return MatchOperand_NoMatch; 6007 6008 SMLoc S = Parser.getTok().getLoc(); 6009 6010 if (getLexer().isNot(AsmToken::Identifier)) 6011 return MatchOperand_NoMatch; 6012 if (getLexer().getTok().getString() != "dim") 6013 return MatchOperand_NoMatch; 6014 6015 Parser.Lex(); 6016 if (getLexer().isNot(AsmToken::Colon)) 6017 return MatchOperand_ParseFail; 6018 6019 Parser.Lex(); 6020 6021 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6022 // integer. 6023 std::string Token; 6024 if (getLexer().is(AsmToken::Integer)) { 6025 SMLoc Loc = getLexer().getTok().getEndLoc(); 6026 Token = getLexer().getTok().getString(); 6027 Parser.Lex(); 6028 if (getLexer().getTok().getLoc() != Loc) 6029 return MatchOperand_ParseFail; 6030 } 6031 if (getLexer().isNot(AsmToken::Identifier)) 6032 return MatchOperand_ParseFail; 6033 Token += getLexer().getTok().getString(); 6034 6035 StringRef DimId = Token; 6036 if (DimId.startswith("SQ_RSRC_IMG_")) 6037 DimId = DimId.substr(12); 6038 6039 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6040 if (!DimInfo) 6041 return MatchOperand_ParseFail; 6042 6043 Parser.Lex(); 6044 6045 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6046 AMDGPUOperand::ImmTyDim)); 6047 return MatchOperand_Success; 6048 } 6049 6050 OperandMatchResultTy 6051 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6052 using namespace AMDGPU::DPP; 6053 6054 SMLoc S = Parser.getTok().getLoc(); 6055 StringRef Prefix; 6056 int64_t Int; 6057 6058 if (getLexer().getKind() == AsmToken::Identifier) { 6059 Prefix = Parser.getTok().getString(); 6060 } else { 6061 return MatchOperand_NoMatch; 6062 } 6063 6064 if (Prefix == "row_mirror") { 6065 Int = DppCtrl::ROW_MIRROR; 6066 Parser.Lex(); 6067 } else if (Prefix == "row_half_mirror") { 6068 Int = DppCtrl::ROW_HALF_MIRROR; 6069 Parser.Lex(); 6070 } else { 6071 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6072 if (Prefix != "quad_perm" 6073 && Prefix != "row_shl" 6074 && Prefix != "row_shr" 6075 && Prefix != "row_ror" 6076 && Prefix != "wave_shl" 6077 && Prefix != "wave_rol" 6078 && Prefix != "wave_shr" 6079 && Prefix != "wave_ror" 6080 && Prefix != "row_bcast") { 6081 return MatchOperand_NoMatch; 6082 } 6083 6084 Parser.Lex(); 6085 if (getLexer().isNot(AsmToken::Colon)) 6086 return MatchOperand_ParseFail; 6087 6088 if (Prefix == "quad_perm") { 6089 // quad_perm:[%d,%d,%d,%d] 6090 Parser.Lex(); 6091 if (getLexer().isNot(AsmToken::LBrac)) 6092 return MatchOperand_ParseFail; 6093 Parser.Lex(); 6094 6095 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6096 return MatchOperand_ParseFail; 6097 6098 for (int i = 0; i < 3; ++i) { 6099 if (getLexer().isNot(AsmToken::Comma)) 6100 return MatchOperand_ParseFail; 6101 Parser.Lex(); 6102 6103 int64_t Temp; 6104 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6105 return MatchOperand_ParseFail; 6106 const int shift = i*2 + 2; 6107 Int += (Temp << shift); 6108 } 6109 6110 if (getLexer().isNot(AsmToken::RBrac)) 6111 return MatchOperand_ParseFail; 6112 Parser.Lex(); 6113 } else { 6114 // sel:%d 6115 Parser.Lex(); 6116 if (getParser().parseAbsoluteExpression(Int)) 6117 return MatchOperand_ParseFail; 6118 6119 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6120 Int |= DppCtrl::ROW_SHL0; 6121 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6122 Int |= DppCtrl::ROW_SHR0; 6123 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6124 Int |= DppCtrl::ROW_ROR0; 6125 } else if (Prefix == "wave_shl" && 1 == Int) { 6126 Int = DppCtrl::WAVE_SHL1; 6127 } else if (Prefix == "wave_rol" && 1 == Int) { 6128 Int = DppCtrl::WAVE_ROL1; 6129 } else if (Prefix == "wave_shr" && 1 == Int) { 6130 Int = DppCtrl::WAVE_SHR1; 6131 } else if (Prefix == "wave_ror" && 1 == Int) { 6132 Int = DppCtrl::WAVE_ROR1; 6133 } else if (Prefix == "row_bcast") { 6134 if (Int == 15) { 6135 Int = DppCtrl::BCAST15; 6136 } else if (Int == 31) { 6137 Int = DppCtrl::BCAST31; 6138 } else { 6139 return MatchOperand_ParseFail; 6140 } 6141 } else { 6142 return MatchOperand_ParseFail; 6143 } 6144 } 6145 } 6146 6147 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6148 return MatchOperand_Success; 6149 } 6150 6151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6152 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6153 } 6154 6155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6156 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6157 } 6158 6159 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6160 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6161 } 6162 6163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6164 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6165 } 6166 6167 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 6168 OptionalImmIndexMap OptionalIdx; 6169 6170 unsigned I = 1; 6171 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6172 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6173 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6174 } 6175 6176 for (unsigned E = Operands.size(); I != E; ++I) { 6177 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6178 MCOI::TIED_TO); 6179 if (TiedTo != -1) { 6180 assert((unsigned)TiedTo < Inst.getNumOperands()); 6181 // handle tied old or src2 for MAC instructions 6182 Inst.addOperand(Inst.getOperand(TiedTo)); 6183 } 6184 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6185 // Add the register arguments 6186 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6187 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6188 // Skip it. 6189 continue; 6190 } 6191 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6192 Op.addRegWithFPInputModsOperands(Inst, 2); 6193 } else if (Op.isDPPCtrl()) { 6194 Op.addImmOperands(Inst, 1); 6195 } else if (Op.isImm()) { 6196 // Handle optional arguments 6197 OptionalIdx[Op.getImmTy()] = I; 6198 } else { 6199 llvm_unreachable("Invalid operand type"); 6200 } 6201 } 6202 6203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6206 } 6207 6208 //===----------------------------------------------------------------------===// 6209 // sdwa 6210 //===----------------------------------------------------------------------===// 6211 6212 OperandMatchResultTy 6213 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6214 AMDGPUOperand::ImmTy Type) { 6215 using namespace llvm::AMDGPU::SDWA; 6216 6217 SMLoc S = Parser.getTok().getLoc(); 6218 StringRef Value; 6219 OperandMatchResultTy res; 6220 6221 res = parseStringWithPrefix(Prefix, Value); 6222 if (res != MatchOperand_Success) { 6223 return res; 6224 } 6225 6226 int64_t Int; 6227 Int = StringSwitch<int64_t>(Value) 6228 .Case("BYTE_0", SdwaSel::BYTE_0) 6229 .Case("BYTE_1", SdwaSel::BYTE_1) 6230 .Case("BYTE_2", SdwaSel::BYTE_2) 6231 .Case("BYTE_3", SdwaSel::BYTE_3) 6232 .Case("WORD_0", SdwaSel::WORD_0) 6233 .Case("WORD_1", SdwaSel::WORD_1) 6234 .Case("DWORD", SdwaSel::DWORD) 6235 .Default(0xffffffff); 6236 Parser.Lex(); // eat last token 6237 6238 if (Int == 0xffffffff) { 6239 return MatchOperand_ParseFail; 6240 } 6241 6242 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6243 return MatchOperand_Success; 6244 } 6245 6246 OperandMatchResultTy 6247 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6248 using namespace llvm::AMDGPU::SDWA; 6249 6250 SMLoc S = Parser.getTok().getLoc(); 6251 StringRef Value; 6252 OperandMatchResultTy res; 6253 6254 res = parseStringWithPrefix("dst_unused", Value); 6255 if (res != MatchOperand_Success) { 6256 return res; 6257 } 6258 6259 int64_t Int; 6260 Int = StringSwitch<int64_t>(Value) 6261 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6262 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6263 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6264 .Default(0xffffffff); 6265 Parser.Lex(); // eat last token 6266 6267 if (Int == 0xffffffff) { 6268 return MatchOperand_ParseFail; 6269 } 6270 6271 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6272 return MatchOperand_Success; 6273 } 6274 6275 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6276 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6277 } 6278 6279 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6280 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6281 } 6282 6283 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6284 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6285 } 6286 6287 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6288 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6289 } 6290 6291 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6292 uint64_t BasicInstType, bool skipVcc) { 6293 using namespace llvm::AMDGPU::SDWA; 6294 6295 OptionalImmIndexMap OptionalIdx; 6296 bool skippedVcc = false; 6297 6298 unsigned I = 1; 6299 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6300 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6301 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6302 } 6303 6304 for (unsigned E = Operands.size(); I != E; ++I) { 6305 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6306 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6307 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6308 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6309 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6310 // Skip VCC only if we didn't skip it on previous iteration. 6311 if (BasicInstType == SIInstrFlags::VOP2 && 6312 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6313 skippedVcc = true; 6314 continue; 6315 } else if (BasicInstType == SIInstrFlags::VOPC && 6316 Inst.getNumOperands() == 0) { 6317 skippedVcc = true; 6318 continue; 6319 } 6320 } 6321 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6322 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6323 } else if (Op.isImm()) { 6324 // Handle optional arguments 6325 OptionalIdx[Op.getImmTy()] = I; 6326 } else { 6327 llvm_unreachable("Invalid operand type"); 6328 } 6329 skippedVcc = false; 6330 } 6331 6332 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6333 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6334 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6335 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6336 switch (BasicInstType) { 6337 case SIInstrFlags::VOP1: 6338 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6339 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6341 } 6342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6345 break; 6346 6347 case SIInstrFlags::VOP2: 6348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6349 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6351 } 6352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6355 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6356 break; 6357 6358 case SIInstrFlags::VOPC: 6359 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6360 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6363 break; 6364 6365 default: 6366 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6367 } 6368 } 6369 6370 // special case v_mac_{f16, f32}: 6371 // it has src2 register operand that is tied to dst operand 6372 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6373 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6374 auto it = Inst.begin(); 6375 std::advance( 6376 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6377 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6378 } 6379 } 6380 6381 /// Force static initialization. 6382 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6383 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6384 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6385 } 6386 6387 #define GET_REGISTER_MATCHER 6388 #define GET_MATCHER_IMPLEMENTATION 6389 #define GET_MNEMONIC_SPELL_CHECKER 6390 #include "AMDGPUGenAsmMatcher.inc" 6391 6392 // This fuction should be defined after auto-generated include so that we have 6393 // MatchClassKind enum defined 6394 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6395 unsigned Kind) { 6396 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6397 // But MatchInstructionImpl() expects to meet token and fails to validate 6398 // operand. This method checks if we are given immediate operand but expect to 6399 // get corresponding token. 6400 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6401 switch (Kind) { 6402 case MCK_addr64: 6403 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6404 case MCK_gds: 6405 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6406 case MCK_lds: 6407 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6408 case MCK_glc: 6409 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6410 case MCK_idxen: 6411 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6412 case MCK_offen: 6413 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6414 case MCK_SSrcB32: 6415 // When operands have expression values, they will return true for isToken, 6416 // because it is not possible to distinguish between a token and an 6417 // expression at parse time. MatchInstructionImpl() will always try to 6418 // match an operand as a token, when isToken returns true, and when the 6419 // name of the expression is not a valid token, the match will fail, 6420 // so we need to handle it here. 6421 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6422 case MCK_SSrcF32: 6423 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6424 case MCK_SoppBrTarget: 6425 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6426 case MCK_VReg32OrOff: 6427 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6428 case MCK_InterpSlot: 6429 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6430 case MCK_Attr: 6431 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6432 case MCK_AttrChan: 6433 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6434 default: 6435 return Match_InvalidOperand; 6436 } 6437 } 6438 6439 //===----------------------------------------------------------------------===// 6440 // endpgm 6441 //===----------------------------------------------------------------------===// 6442 6443 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6444 SMLoc S = Parser.getTok().getLoc(); 6445 int64_t Imm = 0; 6446 6447 if (!parseExpr(Imm)) { 6448 // The operand is optional, if not present default to 0 6449 Imm = 0; 6450 } 6451 6452 if (!isUInt<16>(Imm)) { 6453 Error(S, "expected a 16-bit value"); 6454 return MatchOperand_ParseFail; 6455 } 6456 6457 Operands.push_back( 6458 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6459 return MatchOperand_Success; 6460 } 6461 6462 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6463