1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyDLC, 143 ImmTyGLC, 144 ImmTySLC, 145 ImmTyTFE, 146 ImmTyD16, 147 ImmTyClampSI, 148 ImmTyOModSI, 149 ImmTyDppCtrl, 150 ImmTyDppRowMask, 151 ImmTyDppBankMask, 152 ImmTyDppBoundCtrl, 153 ImmTySdwaDstSel, 154 ImmTySdwaSrc0Sel, 155 ImmTySdwaSrc1Sel, 156 ImmTySdwaDstUnused, 157 ImmTyDMask, 158 ImmTyDim, 159 ImmTyUNorm, 160 ImmTyDA, 161 ImmTyR128A16, 162 ImmTyLWE, 163 ImmTyExpTgt, 164 ImmTyExpCompr, 165 ImmTyExpVM, 166 ImmTyFORMAT, 167 ImmTyHwreg, 168 ImmTyOff, 169 ImmTySendMsg, 170 ImmTyInterpSlot, 171 ImmTyInterpAttr, 172 ImmTyAttrChan, 173 ImmTyOpSel, 174 ImmTyOpSelHi, 175 ImmTyNegLo, 176 ImmTyNegHi, 177 ImmTySwizzle, 178 ImmTyGprIdxMode, 179 ImmTyEndpgm, 180 ImmTyHigh 181 }; 182 183 private: 184 struct TokOp { 185 const char *Data; 186 unsigned Length; 187 }; 188 189 struct ImmOp { 190 int64_t Val; 191 ImmTy Type; 192 bool IsFPImm; 193 Modifiers Mods; 194 }; 195 196 struct RegOp { 197 unsigned RegNo; 198 Modifiers Mods; 199 }; 200 201 union { 202 TokOp Tok; 203 ImmOp Imm; 204 RegOp Reg; 205 const MCExpr *Expr; 206 }; 207 208 public: 209 bool isToken() const override { 210 if (Kind == Token) 211 return true; 212 213 if (Kind != Expression || !Expr) 214 return false; 215 216 // When parsing operands, we can't always tell if something was meant to be 217 // a token, like 'gds', or an expression that references a global variable. 218 // In this case, we assume the string is an expression, and if we need to 219 // interpret is a token, then we treat the symbol name as the token. 220 return isa<MCSymbolRefExpr>(Expr); 221 } 222 223 bool isImm() const override { 224 return Kind == Immediate; 225 } 226 227 bool isInlinableImm(MVT type) const; 228 bool isLiteralImm(MVT type) const; 229 230 bool isRegKind() const { 231 return Kind == Register; 232 } 233 234 bool isReg() const override { 235 return isRegKind() && !hasModifiers(); 236 } 237 238 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 239 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 240 } 241 242 bool isRegOrImmWithInt16InputMods() const { 243 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 244 } 245 246 bool isRegOrImmWithInt32InputMods() const { 247 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 248 } 249 250 bool isRegOrImmWithInt64InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 252 } 253 254 bool isRegOrImmWithFP16InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 256 } 257 258 bool isRegOrImmWithFP32InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 260 } 261 262 bool isRegOrImmWithFP64InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 264 } 265 266 bool isVReg() const { 267 return isRegClass(AMDGPU::VGPR_32RegClassID) || 268 isRegClass(AMDGPU::VReg_64RegClassID) || 269 isRegClass(AMDGPU::VReg_96RegClassID) || 270 isRegClass(AMDGPU::VReg_128RegClassID) || 271 isRegClass(AMDGPU::VReg_256RegClassID) || 272 isRegClass(AMDGPU::VReg_512RegClassID); 273 } 274 275 bool isVReg32() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID); 277 } 278 279 bool isVReg32OrOff() const { 280 return isOff() || isVReg32(); 281 } 282 283 bool isSDWAOperand(MVT type) const; 284 bool isSDWAFP16Operand() const; 285 bool isSDWAFP32Operand() const; 286 bool isSDWAInt16Operand() const; 287 bool isSDWAInt32Operand() const; 288 289 bool isImmTy(ImmTy ImmT) const { 290 return isImm() && Imm.Type == ImmT; 291 } 292 293 bool isImmModifier() const { 294 return isImm() && Imm.Type != ImmTyNone; 295 } 296 297 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 298 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 299 bool isDMask() const { return isImmTy(ImmTyDMask); } 300 bool isDim() const { return isImmTy(ImmTyDim); } 301 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 302 bool isDA() const { return isImmTy(ImmTyDA); } 303 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 304 bool isLWE() const { return isImmTy(ImmTyLWE); } 305 bool isOff() const { return isImmTy(ImmTyOff); } 306 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 307 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 308 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 309 bool isOffen() const { return isImmTy(ImmTyOffen); } 310 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 311 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 312 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 313 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 314 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 315 316 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 317 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 318 bool isGDS() const { return isImmTy(ImmTyGDS); } 319 bool isLDS() const { return isImmTy(ImmTyLDS); } 320 bool isDLC() const { return isImmTy(ImmTyDLC); } 321 bool isGLC() const { return isImmTy(ImmTyGLC); } 322 bool isSLC() const { return isImmTy(ImmTySLC); } 323 bool isTFE() const { return isImmTy(ImmTyTFE); } 324 bool isD16() const { return isImmTy(ImmTyD16); } 325 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 326 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 327 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 328 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 329 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 330 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 331 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 332 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 333 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 334 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 335 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 336 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 337 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 338 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 339 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 340 bool isHigh() const { return isImmTy(ImmTyHigh); } 341 342 bool isMod() const { 343 return isClampSI() || isOModSI(); 344 } 345 346 bool isRegOrImm() const { 347 return isReg() || isImm(); 348 } 349 350 bool isRegClass(unsigned RCID) const; 351 352 bool isInlineValue() const; 353 354 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 355 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 356 } 357 358 bool isSCSrcB16() const { 359 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 360 } 361 362 bool isSCSrcV2B16() const { 363 return isSCSrcB16(); 364 } 365 366 bool isSCSrcB32() const { 367 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 368 } 369 370 bool isSCSrcB64() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 372 } 373 374 bool isSCSrcF16() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 376 } 377 378 bool isSCSrcV2F16() const { 379 return isSCSrcF16(); 380 } 381 382 bool isSCSrcF32() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 384 } 385 386 bool isSCSrcF64() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 388 } 389 390 bool isSSrcB32() const { 391 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 392 } 393 394 bool isSSrcB16() const { 395 return isSCSrcB16() || isLiteralImm(MVT::i16); 396 } 397 398 bool isSSrcV2B16() const { 399 llvm_unreachable("cannot happen"); 400 return isSSrcB16(); 401 } 402 403 bool isSSrcB64() const { 404 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 405 // See isVSrc64(). 406 return isSCSrcB64() || isLiteralImm(MVT::i64); 407 } 408 409 bool isSSrcF32() const { 410 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 411 } 412 413 bool isSSrcF64() const { 414 return isSCSrcB64() || isLiteralImm(MVT::f64); 415 } 416 417 bool isSSrcF16() const { 418 return isSCSrcB16() || isLiteralImm(MVT::f16); 419 } 420 421 bool isSSrcV2F16() const { 422 llvm_unreachable("cannot happen"); 423 return isSSrcF16(); 424 } 425 426 bool isSSrcOrLdsB32() const { 427 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 428 isLiteralImm(MVT::i32) || isExpr(); 429 } 430 431 bool isVCSrcB32() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 433 } 434 435 bool isVCSrcB64() const { 436 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 437 } 438 439 bool isVCSrcB16() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 441 } 442 443 bool isVCSrcV2B16() const { 444 return isVCSrcB16(); 445 } 446 447 bool isVCSrcF32() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 449 } 450 451 bool isVCSrcF64() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 453 } 454 455 bool isVCSrcF16() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 457 } 458 459 bool isVCSrcV2F16() const { 460 return isVCSrcF16(); 461 } 462 463 bool isVSrcB32() const { 464 return isVCSrcF32() || isLiteralImm(MVT::i32); 465 } 466 467 bool isVSrcB64() const { 468 return isVCSrcF64() || isLiteralImm(MVT::i64); 469 } 470 471 bool isVSrcB16() const { 472 return isVCSrcF16() || isLiteralImm(MVT::i16); 473 } 474 475 bool isVSrcV2B16() const { 476 return isVSrcB16() || isLiteralImm(MVT::v2i16); 477 } 478 479 bool isVSrcF32() const { 480 return isVCSrcF32() || isLiteralImm(MVT::f32); 481 } 482 483 bool isVSrcF64() const { 484 return isVCSrcF64() || isLiteralImm(MVT::f64); 485 } 486 487 bool isVSrcF16() const { 488 return isVCSrcF16() || isLiteralImm(MVT::f16); 489 } 490 491 bool isVSrcV2F16() const { 492 return isVSrcF16() || isLiteralImm(MVT::v2f16); 493 } 494 495 bool isKImmFP32() const { 496 return isLiteralImm(MVT::f32); 497 } 498 499 bool isKImmFP16() const { 500 return isLiteralImm(MVT::f16); 501 } 502 503 bool isMem() const override { 504 return false; 505 } 506 507 bool isExpr() const { 508 return Kind == Expression; 509 } 510 511 bool isSoppBrTarget() const { 512 return isExpr() || isImm(); 513 } 514 515 bool isSWaitCnt() const; 516 bool isHwreg() const; 517 bool isSendMsg() const; 518 bool isSwizzle() const; 519 bool isSMRDOffset8() const; 520 bool isSMRDOffset20() const; 521 bool isSMRDLiteralOffset() const; 522 bool isDPPCtrl() const; 523 bool isGPRIdxMode() const; 524 bool isS16Imm() const; 525 bool isU16Imm() const; 526 bool isEndpgm() const; 527 528 StringRef getExpressionAsToken() const { 529 assert(isExpr()); 530 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 531 return S->getSymbol().getName(); 532 } 533 534 StringRef getToken() const { 535 assert(isToken()); 536 537 if (Kind == Expression) 538 return getExpressionAsToken(); 539 540 return StringRef(Tok.Data, Tok.Length); 541 } 542 543 int64_t getImm() const { 544 assert(isImm()); 545 return Imm.Val; 546 } 547 548 ImmTy getImmTy() const { 549 assert(isImm()); 550 return Imm.Type; 551 } 552 553 unsigned getReg() const override { 554 assert(isRegKind()); 555 return Reg.RegNo; 556 } 557 558 SMLoc getStartLoc() const override { 559 return StartLoc; 560 } 561 562 SMLoc getEndLoc() const override { 563 return EndLoc; 564 } 565 566 SMRange getLocRange() const { 567 return SMRange(StartLoc, EndLoc); 568 } 569 570 Modifiers getModifiers() const { 571 assert(isRegKind() || isImmTy(ImmTyNone)); 572 return isRegKind() ? Reg.Mods : Imm.Mods; 573 } 574 575 void setModifiers(Modifiers Mods) { 576 assert(isRegKind() || isImmTy(ImmTyNone)); 577 if (isRegKind()) 578 Reg.Mods = Mods; 579 else 580 Imm.Mods = Mods; 581 } 582 583 bool hasModifiers() const { 584 return getModifiers().hasModifiers(); 585 } 586 587 bool hasFPModifiers() const { 588 return getModifiers().hasFPModifiers(); 589 } 590 591 bool hasIntModifiers() const { 592 return getModifiers().hasIntModifiers(); 593 } 594 595 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 596 597 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 598 599 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 600 601 template <unsigned Bitwidth> 602 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 603 604 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 605 addKImmFPOperands<16>(Inst, N); 606 } 607 608 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 609 addKImmFPOperands<32>(Inst, N); 610 } 611 612 void addRegOperands(MCInst &Inst, unsigned N) const; 613 614 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 615 if (isRegKind()) 616 addRegOperands(Inst, N); 617 else if (isExpr()) 618 Inst.addOperand(MCOperand::createExpr(Expr)); 619 else 620 addImmOperands(Inst, N); 621 } 622 623 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 624 Modifiers Mods = getModifiers(); 625 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 626 if (isRegKind()) { 627 addRegOperands(Inst, N); 628 } else { 629 addImmOperands(Inst, N, false); 630 } 631 } 632 633 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 634 assert(!hasIntModifiers()); 635 addRegOrImmWithInputModsOperands(Inst, N); 636 } 637 638 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 639 assert(!hasFPModifiers()); 640 addRegOrImmWithInputModsOperands(Inst, N); 641 } 642 643 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 644 Modifiers Mods = getModifiers(); 645 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 646 assert(isRegKind()); 647 addRegOperands(Inst, N); 648 } 649 650 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 651 assert(!hasIntModifiers()); 652 addRegWithInputModsOperands(Inst, N); 653 } 654 655 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 656 assert(!hasFPModifiers()); 657 addRegWithInputModsOperands(Inst, N); 658 } 659 660 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 661 if (isImm()) 662 addImmOperands(Inst, N); 663 else { 664 assert(isExpr()); 665 Inst.addOperand(MCOperand::createExpr(Expr)); 666 } 667 } 668 669 static void printImmTy(raw_ostream& OS, ImmTy Type) { 670 switch (Type) { 671 case ImmTyNone: OS << "None"; break; 672 case ImmTyGDS: OS << "GDS"; break; 673 case ImmTyLDS: OS << "LDS"; break; 674 case ImmTyOffen: OS << "Offen"; break; 675 case ImmTyIdxen: OS << "Idxen"; break; 676 case ImmTyAddr64: OS << "Addr64"; break; 677 case ImmTyOffset: OS << "Offset"; break; 678 case ImmTyInstOffset: OS << "InstOffset"; break; 679 case ImmTyOffset0: OS << "Offset0"; break; 680 case ImmTyOffset1: OS << "Offset1"; break; 681 case ImmTyDLC: OS << "DLC"; break; 682 case ImmTyGLC: OS << "GLC"; break; 683 case ImmTySLC: OS << "SLC"; break; 684 case ImmTyTFE: OS << "TFE"; break; 685 case ImmTyD16: OS << "D16"; break; 686 case ImmTyFORMAT: OS << "FORMAT"; break; 687 case ImmTyClampSI: OS << "ClampSI"; break; 688 case ImmTyOModSI: OS << "OModSI"; break; 689 case ImmTyDppCtrl: OS << "DppCtrl"; break; 690 case ImmTyDppRowMask: OS << "DppRowMask"; break; 691 case ImmTyDppBankMask: OS << "DppBankMask"; break; 692 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 693 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 694 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 695 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 696 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 697 case ImmTyDMask: OS << "DMask"; break; 698 case ImmTyDim: OS << "Dim"; break; 699 case ImmTyUNorm: OS << "UNorm"; break; 700 case ImmTyDA: OS << "DA"; break; 701 case ImmTyR128A16: OS << "R128A16"; break; 702 case ImmTyLWE: OS << "LWE"; break; 703 case ImmTyOff: OS << "Off"; break; 704 case ImmTyExpTgt: OS << "ExpTgt"; break; 705 case ImmTyExpCompr: OS << "ExpCompr"; break; 706 case ImmTyExpVM: OS << "ExpVM"; break; 707 case ImmTyHwreg: OS << "Hwreg"; break; 708 case ImmTySendMsg: OS << "SendMsg"; break; 709 case ImmTyInterpSlot: OS << "InterpSlot"; break; 710 case ImmTyInterpAttr: OS << "InterpAttr"; break; 711 case ImmTyAttrChan: OS << "AttrChan"; break; 712 case ImmTyOpSel: OS << "OpSel"; break; 713 case ImmTyOpSelHi: OS << "OpSelHi"; break; 714 case ImmTyNegLo: OS << "NegLo"; break; 715 case ImmTyNegHi: OS << "NegHi"; break; 716 case ImmTySwizzle: OS << "Swizzle"; break; 717 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 718 case ImmTyHigh: OS << "High"; break; 719 case ImmTyEndpgm: 720 OS << "Endpgm"; 721 break; 722 } 723 } 724 725 void print(raw_ostream &OS) const override { 726 switch (Kind) { 727 case Register: 728 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 729 break; 730 case Immediate: 731 OS << '<' << getImm(); 732 if (getImmTy() != ImmTyNone) { 733 OS << " type: "; printImmTy(OS, getImmTy()); 734 } 735 OS << " mods: " << Imm.Mods << '>'; 736 break; 737 case Token: 738 OS << '\'' << getToken() << '\''; 739 break; 740 case Expression: 741 OS << "<expr " << *Expr << '>'; 742 break; 743 } 744 } 745 746 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 747 int64_t Val, SMLoc Loc, 748 ImmTy Type = ImmTyNone, 749 bool IsFPImm = false) { 750 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 751 Op->Imm.Val = Val; 752 Op->Imm.IsFPImm = IsFPImm; 753 Op->Imm.Type = Type; 754 Op->Imm.Mods = Modifiers(); 755 Op->StartLoc = Loc; 756 Op->EndLoc = Loc; 757 return Op; 758 } 759 760 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 761 StringRef Str, SMLoc Loc, 762 bool HasExplicitEncodingSize = true) { 763 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 764 Res->Tok.Data = Str.data(); 765 Res->Tok.Length = Str.size(); 766 Res->StartLoc = Loc; 767 Res->EndLoc = Loc; 768 return Res; 769 } 770 771 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 772 unsigned RegNo, SMLoc S, 773 SMLoc E) { 774 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 775 Op->Reg.RegNo = RegNo; 776 Op->Reg.Mods = Modifiers(); 777 Op->StartLoc = S; 778 Op->EndLoc = E; 779 return Op; 780 } 781 782 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 783 const class MCExpr *Expr, SMLoc S) { 784 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 785 Op->Expr = Expr; 786 Op->StartLoc = S; 787 Op->EndLoc = S; 788 return Op; 789 } 790 }; 791 792 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 793 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 794 return OS; 795 } 796 797 //===----------------------------------------------------------------------===// 798 // AsmParser 799 //===----------------------------------------------------------------------===// 800 801 // Holds info related to the current kernel, e.g. count of SGPRs used. 802 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 803 // .amdgpu_hsa_kernel or at EOF. 804 class KernelScopeInfo { 805 int SgprIndexUnusedMin = -1; 806 int VgprIndexUnusedMin = -1; 807 MCContext *Ctx = nullptr; 808 809 void usesSgprAt(int i) { 810 if (i >= SgprIndexUnusedMin) { 811 SgprIndexUnusedMin = ++i; 812 if (Ctx) { 813 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 814 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 815 } 816 } 817 } 818 819 void usesVgprAt(int i) { 820 if (i >= VgprIndexUnusedMin) { 821 VgprIndexUnusedMin = ++i; 822 if (Ctx) { 823 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 824 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 825 } 826 } 827 } 828 829 public: 830 KernelScopeInfo() = default; 831 832 void initialize(MCContext &Context) { 833 Ctx = &Context; 834 usesSgprAt(SgprIndexUnusedMin = -1); 835 usesVgprAt(VgprIndexUnusedMin = -1); 836 } 837 838 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 839 switch (RegKind) { 840 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 841 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 842 default: break; 843 } 844 } 845 }; 846 847 class AMDGPUAsmParser : public MCTargetAsmParser { 848 MCAsmParser &Parser; 849 850 // Number of extra operands parsed after the first optional operand. 851 // This may be necessary to skip hardcoded mandatory operands. 852 static const unsigned MAX_OPR_LOOKAHEAD = 8; 853 854 unsigned ForcedEncodingSize = 0; 855 bool ForcedDPP = false; 856 bool ForcedSDWA = false; 857 KernelScopeInfo KernelScope; 858 859 /// @name Auto-generated Match Functions 860 /// { 861 862 #define GET_ASSEMBLER_HEADER 863 #include "AMDGPUGenAsmMatcher.inc" 864 865 /// } 866 867 private: 868 bool ParseAsAbsoluteExpression(uint32_t &Ret); 869 bool OutOfRangeError(SMRange Range); 870 /// Calculate VGPR/SGPR blocks required for given target, reserved 871 /// registers, and user-specified NextFreeXGPR values. 872 /// 873 /// \param Features [in] Target features, used for bug corrections. 874 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 875 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 876 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 877 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 878 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 879 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 880 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 881 /// \param VGPRBlocks [out] Result VGPR block count. 882 /// \param SGPRBlocks [out] Result SGPR block count. 883 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 884 bool FlatScrUsed, bool XNACKUsed, 885 unsigned NextFreeVGPR, SMRange VGPRRange, 886 unsigned NextFreeSGPR, SMRange SGPRRange, 887 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 888 bool ParseDirectiveAMDGCNTarget(); 889 bool ParseDirectiveAMDHSAKernel(); 890 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 891 bool ParseDirectiveHSACodeObjectVersion(); 892 bool ParseDirectiveHSACodeObjectISA(); 893 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 894 bool ParseDirectiveAMDKernelCodeT(); 895 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 896 bool ParseDirectiveAMDGPUHsaKernel(); 897 898 bool ParseDirectiveISAVersion(); 899 bool ParseDirectiveHSAMetadata(); 900 bool ParseDirectivePALMetadataBegin(); 901 bool ParseDirectivePALMetadata(); 902 903 /// Common code to parse out a block of text (typically YAML) between start and 904 /// end directives. 905 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 906 const char *AssemblerDirectiveEnd, 907 std::string &CollectString); 908 909 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 910 RegisterKind RegKind, unsigned Reg1, 911 unsigned RegNum); 912 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 913 unsigned& RegNum, unsigned& RegWidth, 914 unsigned *DwordRegIndex); 915 bool isRegister(); 916 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 917 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 918 void initializeGprCountSymbol(RegisterKind RegKind); 919 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 920 unsigned RegWidth); 921 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 922 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 923 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 924 bool IsGdsHardcoded); 925 926 public: 927 enum AMDGPUMatchResultTy { 928 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 929 }; 930 enum OperandMode { 931 OperandMode_Default, 932 OperandMode_NSA, 933 }; 934 935 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 936 937 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 938 const MCInstrInfo &MII, 939 const MCTargetOptions &Options) 940 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 941 MCAsmParserExtension::Initialize(Parser); 942 943 if (getFeatureBits().none()) { 944 // Set default features. 945 copySTI().ToggleFeature("southern-islands"); 946 } 947 948 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 949 950 { 951 // TODO: make those pre-defined variables read-only. 952 // Currently there is none suitable machinery in the core llvm-mc for this. 953 // MCSymbol::isRedefinable is intended for another purpose, and 954 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 955 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 956 MCContext &Ctx = getContext(); 957 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 958 MCSymbol *Sym = 959 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 963 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 964 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 965 } else { 966 MCSymbol *Sym = 967 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 968 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 969 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 970 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 971 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 972 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 973 } 974 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 975 initializeGprCountSymbol(IS_VGPR); 976 initializeGprCountSymbol(IS_SGPR); 977 } else 978 KernelScope.initialize(getContext()); 979 } 980 } 981 982 bool hasXNACK() const { 983 return AMDGPU::hasXNACK(getSTI()); 984 } 985 986 bool hasMIMG_R128() const { 987 return AMDGPU::hasMIMG_R128(getSTI()); 988 } 989 990 bool hasPackedD16() const { 991 return AMDGPU::hasPackedD16(getSTI()); 992 } 993 994 bool isSI() const { 995 return AMDGPU::isSI(getSTI()); 996 } 997 998 bool isCI() const { 999 return AMDGPU::isCI(getSTI()); 1000 } 1001 1002 bool isVI() const { 1003 return AMDGPU::isVI(getSTI()); 1004 } 1005 1006 bool isGFX9() const { 1007 return AMDGPU::isGFX9(getSTI()); 1008 } 1009 1010 bool isGFX10() const { 1011 return AMDGPU::isGFX10(getSTI()); 1012 } 1013 1014 bool hasInv2PiInlineImm() const { 1015 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1016 } 1017 1018 bool hasFlatOffsets() const { 1019 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1020 } 1021 1022 bool hasSGPR102_SGPR103() const { 1023 return !isVI() && !isGFX9(); 1024 } 1025 1026 bool hasSGPR104_SGPR105() const { 1027 return isGFX10(); 1028 } 1029 1030 bool hasIntClamp() const { 1031 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1032 } 1033 1034 AMDGPUTargetStreamer &getTargetStreamer() { 1035 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1036 return static_cast<AMDGPUTargetStreamer &>(TS); 1037 } 1038 1039 const MCRegisterInfo *getMRI() const { 1040 // We need this const_cast because for some reason getContext() is not const 1041 // in MCAsmParser. 1042 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1043 } 1044 1045 const MCInstrInfo *getMII() const { 1046 return &MII; 1047 } 1048 1049 const FeatureBitset &getFeatureBits() const { 1050 return getSTI().getFeatureBits(); 1051 } 1052 1053 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1054 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1055 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1056 1057 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1058 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1059 bool isForcedDPP() const { return ForcedDPP; } 1060 bool isForcedSDWA() const { return ForcedSDWA; } 1061 ArrayRef<unsigned> getMatchedVariants() const; 1062 1063 std::unique_ptr<AMDGPUOperand> parseRegister(); 1064 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1065 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1066 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1067 unsigned Kind) override; 1068 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1069 OperandVector &Operands, MCStreamer &Out, 1070 uint64_t &ErrorInfo, 1071 bool MatchingInlineAsm) override; 1072 bool ParseDirective(AsmToken DirectiveID) override; 1073 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1074 OperandMode Mode = OperandMode_Default); 1075 StringRef parseMnemonicSuffix(StringRef Name); 1076 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1077 SMLoc NameLoc, OperandVector &Operands) override; 1078 //bool ProcessInstruction(MCInst &Inst); 1079 1080 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1081 1082 OperandMatchResultTy 1083 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1084 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1085 bool (*ConvertResult)(int64_t &) = nullptr); 1086 1087 OperandMatchResultTy parseOperandArrayWithPrefix( 1088 const char *Prefix, 1089 OperandVector &Operands, 1090 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1091 bool (*ConvertResult)(int64_t&) = nullptr); 1092 1093 OperandMatchResultTy 1094 parseNamedBit(const char *Name, OperandVector &Operands, 1095 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1096 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1097 StringRef &Value); 1098 1099 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1100 bool parseSP3NegModifier(); 1101 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1102 OperandMatchResultTy parseReg(OperandVector &Operands); 1103 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1104 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1105 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1106 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1107 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1108 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1109 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1110 1111 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1112 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1113 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1114 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1115 1116 bool parseCnt(int64_t &IntVal); 1117 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1118 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1119 1120 private: 1121 struct OperandInfoTy { 1122 int64_t Id; 1123 bool IsSymbolic = false; 1124 1125 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1126 }; 1127 1128 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1129 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1130 1131 void errorExpTgt(); 1132 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1133 1134 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1135 bool validateSOPLiteral(const MCInst &Inst) const; 1136 bool validateConstantBusLimitations(const MCInst &Inst); 1137 bool validateEarlyClobberLimitations(const MCInst &Inst); 1138 bool validateIntClampSupported(const MCInst &Inst); 1139 bool validateMIMGAtomicDMask(const MCInst &Inst); 1140 bool validateMIMGGatherDMask(const MCInst &Inst); 1141 bool validateMIMGDataSize(const MCInst &Inst); 1142 bool validateMIMGAddrSize(const MCInst &Inst); 1143 bool validateMIMGD16(const MCInst &Inst); 1144 bool validateMIMGDim(const MCInst &Inst); 1145 bool validateLdsDirect(const MCInst &Inst); 1146 bool validateVOP3Literal(const MCInst &Inst) const; 1147 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1148 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1149 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1150 1151 bool isId(const StringRef Id) const; 1152 bool isId(const AsmToken &Token, const StringRef Id) const; 1153 bool isToken(const AsmToken::TokenKind Kind) const; 1154 bool trySkipId(const StringRef Id); 1155 bool trySkipToken(const AsmToken::TokenKind Kind); 1156 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1157 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1158 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1159 AsmToken::TokenKind getTokenKind() const; 1160 bool parseExpr(int64_t &Imm); 1161 StringRef getTokenStr() const; 1162 AsmToken peekToken(); 1163 AsmToken getToken() const; 1164 SMLoc getLoc() const; 1165 void lex(); 1166 1167 public: 1168 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1169 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1170 1171 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1172 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1173 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1174 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1175 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1176 1177 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1178 const unsigned MinVal, 1179 const unsigned MaxVal, 1180 const StringRef ErrMsg); 1181 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1182 bool parseSwizzleOffset(int64_t &Imm); 1183 bool parseSwizzleMacro(int64_t &Imm); 1184 bool parseSwizzleQuadPerm(int64_t &Imm); 1185 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1186 bool parseSwizzleBroadcast(int64_t &Imm); 1187 bool parseSwizzleSwap(int64_t &Imm); 1188 bool parseSwizzleReverse(int64_t &Imm); 1189 1190 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1191 int64_t parseGPRIdxMacro(); 1192 1193 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1194 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1195 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1196 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1197 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1198 1199 AMDGPUOperand::Ptr defaultDLC() const; 1200 AMDGPUOperand::Ptr defaultGLC() const; 1201 AMDGPUOperand::Ptr defaultSLC() const; 1202 1203 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1204 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1205 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1206 AMDGPUOperand::Ptr defaultOffsetU12() const; 1207 AMDGPUOperand::Ptr defaultOffsetS13() const; 1208 1209 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1210 1211 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1212 OptionalImmIndexMap &OptionalIdx); 1213 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1214 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1215 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1216 1217 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1218 1219 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1220 bool IsAtomic = false); 1221 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1222 1223 OperandMatchResultTy parseDim(OperandVector &Operands); 1224 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1225 AMDGPUOperand::Ptr defaultRowMask() const; 1226 AMDGPUOperand::Ptr defaultBankMask() const; 1227 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1228 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1229 1230 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1231 AMDGPUOperand::ImmTy Type); 1232 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1233 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1234 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1235 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1236 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1237 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1238 uint64_t BasicInstType, bool skipVcc = false); 1239 1240 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1241 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1242 }; 1243 1244 struct OptionalOperand { 1245 const char *Name; 1246 AMDGPUOperand::ImmTy Type; 1247 bool IsBit; 1248 bool (*ConvertResult)(int64_t&); 1249 }; 1250 1251 } // end anonymous namespace 1252 1253 // May be called with integer type with equivalent bitwidth. 1254 static const fltSemantics *getFltSemantics(unsigned Size) { 1255 switch (Size) { 1256 case 4: 1257 return &APFloat::IEEEsingle(); 1258 case 8: 1259 return &APFloat::IEEEdouble(); 1260 case 2: 1261 return &APFloat::IEEEhalf(); 1262 default: 1263 llvm_unreachable("unsupported fp type"); 1264 } 1265 } 1266 1267 static const fltSemantics *getFltSemantics(MVT VT) { 1268 return getFltSemantics(VT.getSizeInBits() / 8); 1269 } 1270 1271 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1272 switch (OperandType) { 1273 case AMDGPU::OPERAND_REG_IMM_INT32: 1274 case AMDGPU::OPERAND_REG_IMM_FP32: 1275 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1276 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1277 return &APFloat::IEEEsingle(); 1278 case AMDGPU::OPERAND_REG_IMM_INT64: 1279 case AMDGPU::OPERAND_REG_IMM_FP64: 1280 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1281 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1282 return &APFloat::IEEEdouble(); 1283 case AMDGPU::OPERAND_REG_IMM_INT16: 1284 case AMDGPU::OPERAND_REG_IMM_FP16: 1285 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1286 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1287 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1288 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1289 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1290 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1291 return &APFloat::IEEEhalf(); 1292 default: 1293 llvm_unreachable("unsupported fp type"); 1294 } 1295 } 1296 1297 //===----------------------------------------------------------------------===// 1298 // Operand 1299 //===----------------------------------------------------------------------===// 1300 1301 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1302 bool Lost; 1303 1304 // Convert literal to single precision 1305 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1306 APFloat::rmNearestTiesToEven, 1307 &Lost); 1308 // We allow precision lost but not overflow or underflow 1309 if (Status != APFloat::opOK && 1310 Lost && 1311 ((Status & APFloat::opOverflow) != 0 || 1312 (Status & APFloat::opUnderflow) != 0)) { 1313 return false; 1314 } 1315 1316 return true; 1317 } 1318 1319 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1320 return isUIntN(Size, Val) || isIntN(Size, Val); 1321 } 1322 1323 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1324 1325 // This is a hack to enable named inline values like 1326 // shared_base with both 32-bit and 64-bit operands. 1327 // Note that these values are defined as 1328 // 32-bit operands only. 1329 if (isInlineValue()) { 1330 return true; 1331 } 1332 1333 if (!isImmTy(ImmTyNone)) { 1334 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1335 return false; 1336 } 1337 // TODO: We should avoid using host float here. It would be better to 1338 // check the float bit values which is what a few other places do. 1339 // We've had bot failures before due to weird NaN support on mips hosts. 1340 1341 APInt Literal(64, Imm.Val); 1342 1343 if (Imm.IsFPImm) { // We got fp literal token 1344 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1345 return AMDGPU::isInlinableLiteral64(Imm.Val, 1346 AsmParser->hasInv2PiInlineImm()); 1347 } 1348 1349 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1350 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1351 return false; 1352 1353 if (type.getScalarSizeInBits() == 16) { 1354 return AMDGPU::isInlinableLiteral16( 1355 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1356 AsmParser->hasInv2PiInlineImm()); 1357 } 1358 1359 // Check if single precision literal is inlinable 1360 return AMDGPU::isInlinableLiteral32( 1361 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1362 AsmParser->hasInv2PiInlineImm()); 1363 } 1364 1365 // We got int literal token. 1366 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1367 return AMDGPU::isInlinableLiteral64(Imm.Val, 1368 AsmParser->hasInv2PiInlineImm()); 1369 } 1370 1371 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1372 return false; 1373 } 1374 1375 if (type.getScalarSizeInBits() == 16) { 1376 return AMDGPU::isInlinableLiteral16( 1377 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1378 AsmParser->hasInv2PiInlineImm()); 1379 } 1380 1381 return AMDGPU::isInlinableLiteral32( 1382 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1383 AsmParser->hasInv2PiInlineImm()); 1384 } 1385 1386 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1387 // Check that this immediate can be added as literal 1388 if (!isImmTy(ImmTyNone)) { 1389 return false; 1390 } 1391 1392 if (!Imm.IsFPImm) { 1393 // We got int literal token. 1394 1395 if (type == MVT::f64 && hasFPModifiers()) { 1396 // Cannot apply fp modifiers to int literals preserving the same semantics 1397 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1398 // disable these cases. 1399 return false; 1400 } 1401 1402 unsigned Size = type.getSizeInBits(); 1403 if (Size == 64) 1404 Size = 32; 1405 1406 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1407 // types. 1408 return isSafeTruncation(Imm.Val, Size); 1409 } 1410 1411 // We got fp literal token 1412 if (type == MVT::f64) { // Expected 64-bit fp operand 1413 // We would set low 64-bits of literal to zeroes but we accept this literals 1414 return true; 1415 } 1416 1417 if (type == MVT::i64) { // Expected 64-bit int operand 1418 // We don't allow fp literals in 64-bit integer instructions. It is 1419 // unclear how we should encode them. 1420 return false; 1421 } 1422 1423 // We allow fp literals with f16x2 operands assuming that the specified 1424 // literal goes into the lower half and the upper half is zero. We also 1425 // require that the literal may be losslesly converted to f16. 1426 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1427 (type == MVT::v2i16)? MVT::i16 : type; 1428 1429 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1430 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1431 } 1432 1433 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1434 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1435 } 1436 1437 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1438 if (AsmParser->isVI()) 1439 return isVReg32(); 1440 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1441 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1442 else 1443 return false; 1444 } 1445 1446 bool AMDGPUOperand::isSDWAFP16Operand() const { 1447 return isSDWAOperand(MVT::f16); 1448 } 1449 1450 bool AMDGPUOperand::isSDWAFP32Operand() const { 1451 return isSDWAOperand(MVT::f32); 1452 } 1453 1454 bool AMDGPUOperand::isSDWAInt16Operand() const { 1455 return isSDWAOperand(MVT::i16); 1456 } 1457 1458 bool AMDGPUOperand::isSDWAInt32Operand() const { 1459 return isSDWAOperand(MVT::i32); 1460 } 1461 1462 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1463 { 1464 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1465 assert(Size == 2 || Size == 4 || Size == 8); 1466 1467 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1468 1469 if (Imm.Mods.Abs) { 1470 Val &= ~FpSignMask; 1471 } 1472 if (Imm.Mods.Neg) { 1473 Val ^= FpSignMask; 1474 } 1475 1476 return Val; 1477 } 1478 1479 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1480 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1481 Inst.getNumOperands())) { 1482 addLiteralImmOperand(Inst, Imm.Val, 1483 ApplyModifiers & 1484 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1485 } else { 1486 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1487 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1488 } 1489 } 1490 1491 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1492 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1493 auto OpNum = Inst.getNumOperands(); 1494 // Check that this operand accepts literals 1495 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1496 1497 if (ApplyModifiers) { 1498 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1499 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1500 Val = applyInputFPModifiers(Val, Size); 1501 } 1502 1503 APInt Literal(64, Val); 1504 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1505 1506 if (Imm.IsFPImm) { // We got fp literal token 1507 switch (OpTy) { 1508 case AMDGPU::OPERAND_REG_IMM_INT64: 1509 case AMDGPU::OPERAND_REG_IMM_FP64: 1510 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1511 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1512 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1513 AsmParser->hasInv2PiInlineImm())) { 1514 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1515 return; 1516 } 1517 1518 // Non-inlineable 1519 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1520 // For fp operands we check if low 32 bits are zeros 1521 if (Literal.getLoBits(32) != 0) { 1522 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1523 "Can't encode literal as exact 64-bit floating-point operand. " 1524 "Low 32-bits will be set to zero"); 1525 } 1526 1527 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1528 return; 1529 } 1530 1531 // We don't allow fp literals in 64-bit integer instructions. It is 1532 // unclear how we should encode them. This case should be checked earlier 1533 // in predicate methods (isLiteralImm()) 1534 llvm_unreachable("fp literal in 64-bit integer instruction."); 1535 1536 case AMDGPU::OPERAND_REG_IMM_INT32: 1537 case AMDGPU::OPERAND_REG_IMM_FP32: 1538 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1539 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1540 case AMDGPU::OPERAND_REG_IMM_INT16: 1541 case AMDGPU::OPERAND_REG_IMM_FP16: 1542 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1543 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1544 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1545 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1546 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1547 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1548 bool lost; 1549 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1550 // Convert literal to single precision 1551 FPLiteral.convert(*getOpFltSemantics(OpTy), 1552 APFloat::rmNearestTiesToEven, &lost); 1553 // We allow precision lost but not overflow or underflow. This should be 1554 // checked earlier in isLiteralImm() 1555 1556 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1557 Inst.addOperand(MCOperand::createImm(ImmVal)); 1558 return; 1559 } 1560 default: 1561 llvm_unreachable("invalid operand size"); 1562 } 1563 1564 return; 1565 } 1566 1567 // We got int literal token. 1568 // Only sign extend inline immediates. 1569 switch (OpTy) { 1570 case AMDGPU::OPERAND_REG_IMM_INT32: 1571 case AMDGPU::OPERAND_REG_IMM_FP32: 1572 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1573 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1574 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1575 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1576 if (isSafeTruncation(Val, 32) && 1577 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1578 AsmParser->hasInv2PiInlineImm())) { 1579 Inst.addOperand(MCOperand::createImm(Val)); 1580 return; 1581 } 1582 1583 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1584 return; 1585 1586 case AMDGPU::OPERAND_REG_IMM_INT64: 1587 case AMDGPU::OPERAND_REG_IMM_FP64: 1588 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1589 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1590 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1591 Inst.addOperand(MCOperand::createImm(Val)); 1592 return; 1593 } 1594 1595 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1596 return; 1597 1598 case AMDGPU::OPERAND_REG_IMM_INT16: 1599 case AMDGPU::OPERAND_REG_IMM_FP16: 1600 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1601 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1602 if (isSafeTruncation(Val, 16) && 1603 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1604 AsmParser->hasInv2PiInlineImm())) { 1605 Inst.addOperand(MCOperand::createImm(Val)); 1606 return; 1607 } 1608 1609 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1610 return; 1611 1612 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1613 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1614 assert(isSafeTruncation(Val, 16)); 1615 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1616 AsmParser->hasInv2PiInlineImm())); 1617 1618 Inst.addOperand(MCOperand::createImm(Val)); 1619 return; 1620 } 1621 default: 1622 llvm_unreachable("invalid operand size"); 1623 } 1624 } 1625 1626 template <unsigned Bitwidth> 1627 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1628 APInt Literal(64, Imm.Val); 1629 1630 if (!Imm.IsFPImm) { 1631 // We got int literal token. 1632 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1633 return; 1634 } 1635 1636 bool Lost; 1637 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1638 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1639 APFloat::rmNearestTiesToEven, &Lost); 1640 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1641 } 1642 1643 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1644 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1645 } 1646 1647 static bool isInlineValue(unsigned Reg) { 1648 switch (Reg) { 1649 case AMDGPU::SRC_SHARED_BASE: 1650 case AMDGPU::SRC_SHARED_LIMIT: 1651 case AMDGPU::SRC_PRIVATE_BASE: 1652 case AMDGPU::SRC_PRIVATE_LIMIT: 1653 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1654 return true; 1655 default: 1656 return false; 1657 } 1658 } 1659 1660 bool AMDGPUOperand::isInlineValue() const { 1661 return isRegKind() && ::isInlineValue(getReg()); 1662 } 1663 1664 //===----------------------------------------------------------------------===// 1665 // AsmParser 1666 //===----------------------------------------------------------------------===// 1667 1668 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1669 if (Is == IS_VGPR) { 1670 switch (RegWidth) { 1671 default: return -1; 1672 case 1: return AMDGPU::VGPR_32RegClassID; 1673 case 2: return AMDGPU::VReg_64RegClassID; 1674 case 3: return AMDGPU::VReg_96RegClassID; 1675 case 4: return AMDGPU::VReg_128RegClassID; 1676 case 8: return AMDGPU::VReg_256RegClassID; 1677 case 16: return AMDGPU::VReg_512RegClassID; 1678 } 1679 } else if (Is == IS_TTMP) { 1680 switch (RegWidth) { 1681 default: return -1; 1682 case 1: return AMDGPU::TTMP_32RegClassID; 1683 case 2: return AMDGPU::TTMP_64RegClassID; 1684 case 4: return AMDGPU::TTMP_128RegClassID; 1685 case 8: return AMDGPU::TTMP_256RegClassID; 1686 case 16: return AMDGPU::TTMP_512RegClassID; 1687 } 1688 } else if (Is == IS_SGPR) { 1689 switch (RegWidth) { 1690 default: return -1; 1691 case 1: return AMDGPU::SGPR_32RegClassID; 1692 case 2: return AMDGPU::SGPR_64RegClassID; 1693 case 4: return AMDGPU::SGPR_128RegClassID; 1694 case 8: return AMDGPU::SGPR_256RegClassID; 1695 case 16: return AMDGPU::SGPR_512RegClassID; 1696 } 1697 } 1698 return -1; 1699 } 1700 1701 static unsigned getSpecialRegForName(StringRef RegName) { 1702 return StringSwitch<unsigned>(RegName) 1703 .Case("exec", AMDGPU::EXEC) 1704 .Case("vcc", AMDGPU::VCC) 1705 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1706 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1707 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1708 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1709 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1710 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1711 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1712 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1713 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1714 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1715 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1716 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1717 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1718 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1719 .Case("m0", AMDGPU::M0) 1720 .Case("scc", AMDGPU::SCC) 1721 .Case("tba", AMDGPU::TBA) 1722 .Case("tma", AMDGPU::TMA) 1723 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1724 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1725 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1726 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1727 .Case("vcc_lo", AMDGPU::VCC_LO) 1728 .Case("vcc_hi", AMDGPU::VCC_HI) 1729 .Case("exec_lo", AMDGPU::EXEC_LO) 1730 .Case("exec_hi", AMDGPU::EXEC_HI) 1731 .Case("tma_lo", AMDGPU::TMA_LO) 1732 .Case("tma_hi", AMDGPU::TMA_HI) 1733 .Case("tba_lo", AMDGPU::TBA_LO) 1734 .Case("tba_hi", AMDGPU::TBA_HI) 1735 .Case("null", AMDGPU::SGPR_NULL) 1736 .Default(0); 1737 } 1738 1739 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1740 SMLoc &EndLoc) { 1741 auto R = parseRegister(); 1742 if (!R) return true; 1743 assert(R->isReg()); 1744 RegNo = R->getReg(); 1745 StartLoc = R->getStartLoc(); 1746 EndLoc = R->getEndLoc(); 1747 return false; 1748 } 1749 1750 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1751 RegisterKind RegKind, unsigned Reg1, 1752 unsigned RegNum) { 1753 switch (RegKind) { 1754 case IS_SPECIAL: 1755 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1756 Reg = AMDGPU::EXEC; 1757 RegWidth = 2; 1758 return true; 1759 } 1760 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1761 Reg = AMDGPU::FLAT_SCR; 1762 RegWidth = 2; 1763 return true; 1764 } 1765 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1766 Reg = AMDGPU::XNACK_MASK; 1767 RegWidth = 2; 1768 return true; 1769 } 1770 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1771 Reg = AMDGPU::VCC; 1772 RegWidth = 2; 1773 return true; 1774 } 1775 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1776 Reg = AMDGPU::TBA; 1777 RegWidth = 2; 1778 return true; 1779 } 1780 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1781 Reg = AMDGPU::TMA; 1782 RegWidth = 2; 1783 return true; 1784 } 1785 return false; 1786 case IS_VGPR: 1787 case IS_SGPR: 1788 case IS_TTMP: 1789 if (Reg1 != Reg + RegWidth) { 1790 return false; 1791 } 1792 RegWidth++; 1793 return true; 1794 default: 1795 llvm_unreachable("unexpected register kind"); 1796 } 1797 } 1798 1799 static const StringRef Registers[] = { 1800 { "v" }, 1801 { "s" }, 1802 { "ttmp" }, 1803 }; 1804 1805 bool 1806 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1807 const AsmToken &NextToken) const { 1808 1809 // A list of consecutive registers: [s0,s1,s2,s3] 1810 if (Token.is(AsmToken::LBrac)) 1811 return true; 1812 1813 if (!Token.is(AsmToken::Identifier)) 1814 return false; 1815 1816 // A single register like s0 or a range of registers like s[0:1] 1817 1818 StringRef RegName = Token.getString(); 1819 1820 for (StringRef Reg : Registers) { 1821 if (RegName.startswith(Reg)) { 1822 if (Reg.size() < RegName.size()) { 1823 unsigned RegNum; 1824 // A single register with an index: rXX 1825 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1826 return true; 1827 } else { 1828 // A range of registers: r[XX:YY]. 1829 if (NextToken.is(AsmToken::LBrac)) 1830 return true; 1831 } 1832 } 1833 } 1834 1835 return getSpecialRegForName(RegName); 1836 } 1837 1838 bool 1839 AMDGPUAsmParser::isRegister() 1840 { 1841 return isRegister(getToken(), peekToken()); 1842 } 1843 1844 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1845 unsigned &RegNum, unsigned &RegWidth, 1846 unsigned *DwordRegIndex) { 1847 if (DwordRegIndex) { *DwordRegIndex = 0; } 1848 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1849 if (getLexer().is(AsmToken::Identifier)) { 1850 StringRef RegName = Parser.getTok().getString(); 1851 if ((Reg = getSpecialRegForName(RegName))) { 1852 Parser.Lex(); 1853 RegKind = IS_SPECIAL; 1854 } else { 1855 unsigned RegNumIndex = 0; 1856 if (RegName[0] == 'v') { 1857 RegNumIndex = 1; 1858 RegKind = IS_VGPR; 1859 } else if (RegName[0] == 's') { 1860 RegNumIndex = 1; 1861 RegKind = IS_SGPR; 1862 } else if (RegName.startswith("ttmp")) { 1863 RegNumIndex = strlen("ttmp"); 1864 RegKind = IS_TTMP; 1865 } else { 1866 return false; 1867 } 1868 if (RegName.size() > RegNumIndex) { 1869 // Single 32-bit register: vXX. 1870 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1871 return false; 1872 Parser.Lex(); 1873 RegWidth = 1; 1874 } else { 1875 // Range of registers: v[XX:YY]. ":YY" is optional. 1876 Parser.Lex(); 1877 int64_t RegLo, RegHi; 1878 if (getLexer().isNot(AsmToken::LBrac)) 1879 return false; 1880 Parser.Lex(); 1881 1882 if (getParser().parseAbsoluteExpression(RegLo)) 1883 return false; 1884 1885 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1886 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1887 return false; 1888 Parser.Lex(); 1889 1890 if (isRBrace) { 1891 RegHi = RegLo; 1892 } else { 1893 if (getParser().parseAbsoluteExpression(RegHi)) 1894 return false; 1895 1896 if (getLexer().isNot(AsmToken::RBrac)) 1897 return false; 1898 Parser.Lex(); 1899 } 1900 RegNum = (unsigned) RegLo; 1901 RegWidth = (RegHi - RegLo) + 1; 1902 } 1903 } 1904 } else if (getLexer().is(AsmToken::LBrac)) { 1905 // List of consecutive registers: [s0,s1,s2,s3] 1906 Parser.Lex(); 1907 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1908 return false; 1909 if (RegWidth != 1) 1910 return false; 1911 RegisterKind RegKind1; 1912 unsigned Reg1, RegNum1, RegWidth1; 1913 do { 1914 if (getLexer().is(AsmToken::Comma)) { 1915 Parser.Lex(); 1916 } else if (getLexer().is(AsmToken::RBrac)) { 1917 Parser.Lex(); 1918 break; 1919 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1920 if (RegWidth1 != 1) { 1921 return false; 1922 } 1923 if (RegKind1 != RegKind) { 1924 return false; 1925 } 1926 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1927 return false; 1928 } 1929 } else { 1930 return false; 1931 } 1932 } while (true); 1933 } else { 1934 return false; 1935 } 1936 switch (RegKind) { 1937 case IS_SPECIAL: 1938 RegNum = 0; 1939 RegWidth = 1; 1940 break; 1941 case IS_VGPR: 1942 case IS_SGPR: 1943 case IS_TTMP: 1944 { 1945 unsigned Size = 1; 1946 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1947 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1948 Size = std::min(RegWidth, 4u); 1949 } 1950 if (RegNum % Size != 0) 1951 return false; 1952 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1953 RegNum = RegNum / Size; 1954 int RCID = getRegClass(RegKind, RegWidth); 1955 if (RCID == -1) 1956 return false; 1957 const MCRegisterClass RC = TRI->getRegClass(RCID); 1958 if (RegNum >= RC.getNumRegs()) 1959 return false; 1960 Reg = RC.getRegister(RegNum); 1961 break; 1962 } 1963 1964 default: 1965 llvm_unreachable("unexpected register kind"); 1966 } 1967 1968 if (!subtargetHasRegister(*TRI, Reg)) 1969 return false; 1970 return true; 1971 } 1972 1973 Optional<StringRef> 1974 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1975 switch (RegKind) { 1976 case IS_VGPR: 1977 return StringRef(".amdgcn.next_free_vgpr"); 1978 case IS_SGPR: 1979 return StringRef(".amdgcn.next_free_sgpr"); 1980 default: 1981 return None; 1982 } 1983 } 1984 1985 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1986 auto SymbolName = getGprCountSymbolName(RegKind); 1987 assert(SymbolName && "initializing invalid register kind"); 1988 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1989 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1990 } 1991 1992 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1993 unsigned DwordRegIndex, 1994 unsigned RegWidth) { 1995 // Symbols are only defined for GCN targets 1996 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1997 return true; 1998 1999 auto SymbolName = getGprCountSymbolName(RegKind); 2000 if (!SymbolName) 2001 return true; 2002 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2003 2004 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2005 int64_t OldCount; 2006 2007 if (!Sym->isVariable()) 2008 return !Error(getParser().getTok().getLoc(), 2009 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2010 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2011 return !Error( 2012 getParser().getTok().getLoc(), 2013 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2014 2015 if (OldCount <= NewMax) 2016 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2017 2018 return true; 2019 } 2020 2021 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2022 const auto &Tok = Parser.getTok(); 2023 SMLoc StartLoc = Tok.getLoc(); 2024 SMLoc EndLoc = Tok.getEndLoc(); 2025 RegisterKind RegKind; 2026 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2027 2028 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2029 //FIXME: improve error messages (bug 41303). 2030 Error(StartLoc, "not a valid operand."); 2031 return nullptr; 2032 } 2033 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2034 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2035 return nullptr; 2036 } else 2037 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2038 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2039 } 2040 2041 bool 2042 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2043 if (HasSP3AbsModifier) { 2044 // This is a workaround for handling expressions 2045 // as arguments of SP3 'abs' modifier, for example: 2046 // |1.0| 2047 // |-1| 2048 // |1+x| 2049 // This syntax is not compatible with syntax of standard 2050 // MC expressions (due to the trailing '|'). 2051 2052 SMLoc EndLoc; 2053 const MCExpr *Expr; 2054 SMLoc StartLoc = getLoc(); 2055 2056 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2057 return true; 2058 } 2059 2060 if (!Expr->evaluateAsAbsolute(Val)) 2061 return Error(StartLoc, "expected absolute expression"); 2062 2063 return false; 2064 } 2065 2066 return getParser().parseAbsoluteExpression(Val); 2067 } 2068 2069 OperandMatchResultTy 2070 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2071 // TODO: add syntactic sugar for 1/(2*PI) 2072 2073 const auto& Tok = getToken(); 2074 const auto& NextTok = peekToken(); 2075 bool IsReal = Tok.is(AsmToken::Real); 2076 SMLoc S = Tok.getLoc(); 2077 bool Negate = false; 2078 2079 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2080 lex(); 2081 IsReal = true; 2082 Negate = true; 2083 } 2084 2085 if (IsReal) { 2086 // Floating-point expressions are not supported. 2087 // Can only allow floating-point literals with an 2088 // optional sign. 2089 2090 StringRef Num = getTokenStr(); 2091 lex(); 2092 2093 APFloat RealVal(APFloat::IEEEdouble()); 2094 auto roundMode = APFloat::rmNearestTiesToEven; 2095 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2096 return MatchOperand_ParseFail; 2097 } 2098 if (Negate) 2099 RealVal.changeSign(); 2100 2101 Operands.push_back( 2102 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2103 AMDGPUOperand::ImmTyNone, true)); 2104 2105 return MatchOperand_Success; 2106 2107 // FIXME: Should enable arbitrary expressions here 2108 } else if (Tok.is(AsmToken::Integer) || 2109 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2110 2111 int64_t IntVal; 2112 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2113 return MatchOperand_ParseFail; 2114 2115 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2116 return MatchOperand_Success; 2117 } 2118 2119 return MatchOperand_NoMatch; 2120 } 2121 2122 OperandMatchResultTy 2123 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2124 if (!isRegister()) 2125 return MatchOperand_NoMatch; 2126 2127 if (auto R = parseRegister()) { 2128 assert(R->isReg()); 2129 Operands.push_back(std::move(R)); 2130 return MatchOperand_Success; 2131 } 2132 return MatchOperand_ParseFail; 2133 } 2134 2135 OperandMatchResultTy 2136 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2137 auto res = parseReg(Operands); 2138 return (res == MatchOperand_NoMatch)? 2139 parseImm(Operands, HasSP3AbsMod) : 2140 res; 2141 } 2142 2143 // Check if the current token is an SP3 'neg' modifier. 2144 // Currently this modifier is allowed in the following context: 2145 // 2146 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2147 // 2. Before an 'abs' modifier: -abs(...) 2148 // 3. Before an SP3 'abs' modifier: -|...| 2149 // 2150 // In all other cases "-" is handled as a part 2151 // of an expression that follows the sign. 2152 // 2153 // Note: When "-" is followed by an integer literal, 2154 // this is interpreted as integer negation rather 2155 // than a floating-point NEG modifier applied to N. 2156 // Beside being contr-intuitive, such use of floating-point 2157 // NEG modifier would have resulted in different meaning 2158 // of integer literals used with VOP1/2/C and VOP3, 2159 // for example: 2160 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2161 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2162 // Negative fp literals with preceding "-" are 2163 // handled likewise for unifomtity 2164 // 2165 bool 2166 AMDGPUAsmParser::parseSP3NegModifier() { 2167 2168 AsmToken NextToken[2]; 2169 peekTokens(NextToken); 2170 2171 if (isToken(AsmToken::Minus) && 2172 (isRegister(NextToken[0], NextToken[1]) || 2173 NextToken[0].is(AsmToken::Pipe) || 2174 isId(NextToken[0], "abs"))) { 2175 lex(); 2176 return true; 2177 } 2178 2179 return false; 2180 } 2181 2182 OperandMatchResultTy 2183 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2184 bool AllowImm) { 2185 bool Neg, SP3Neg; 2186 bool Abs, SP3Abs; 2187 SMLoc Loc; 2188 2189 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2190 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2191 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2192 return MatchOperand_ParseFail; 2193 } 2194 2195 SP3Neg = parseSP3NegModifier(); 2196 2197 Loc = getLoc(); 2198 Neg = trySkipId("neg"); 2199 if (Neg && SP3Neg) { 2200 Error(Loc, "expected register or immediate"); 2201 return MatchOperand_ParseFail; 2202 } 2203 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2204 return MatchOperand_ParseFail; 2205 2206 Abs = trySkipId("abs"); 2207 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2208 return MatchOperand_ParseFail; 2209 2210 Loc = getLoc(); 2211 SP3Abs = trySkipToken(AsmToken::Pipe); 2212 if (Abs && SP3Abs) { 2213 Error(Loc, "expected register or immediate"); 2214 return MatchOperand_ParseFail; 2215 } 2216 2217 OperandMatchResultTy Res; 2218 if (AllowImm) { 2219 Res = parseRegOrImm(Operands, SP3Abs); 2220 } else { 2221 Res = parseReg(Operands); 2222 } 2223 if (Res != MatchOperand_Success) { 2224 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2225 } 2226 2227 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2228 return MatchOperand_ParseFail; 2229 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2230 return MatchOperand_ParseFail; 2231 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2232 return MatchOperand_ParseFail; 2233 2234 AMDGPUOperand::Modifiers Mods; 2235 Mods.Abs = Abs || SP3Abs; 2236 Mods.Neg = Neg || SP3Neg; 2237 2238 if (Mods.hasFPModifiers()) { 2239 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2240 Op.setModifiers(Mods); 2241 } 2242 return MatchOperand_Success; 2243 } 2244 2245 OperandMatchResultTy 2246 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2247 bool AllowImm) { 2248 bool Sext = trySkipId("sext"); 2249 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2250 return MatchOperand_ParseFail; 2251 2252 OperandMatchResultTy Res; 2253 if (AllowImm) { 2254 Res = parseRegOrImm(Operands); 2255 } else { 2256 Res = parseReg(Operands); 2257 } 2258 if (Res != MatchOperand_Success) { 2259 return Sext? MatchOperand_ParseFail : Res; 2260 } 2261 2262 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2263 return MatchOperand_ParseFail; 2264 2265 AMDGPUOperand::Modifiers Mods; 2266 Mods.Sext = Sext; 2267 2268 if (Mods.hasIntModifiers()) { 2269 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2270 Op.setModifiers(Mods); 2271 } 2272 2273 return MatchOperand_Success; 2274 } 2275 2276 OperandMatchResultTy 2277 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2278 return parseRegOrImmWithFPInputMods(Operands, false); 2279 } 2280 2281 OperandMatchResultTy 2282 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2283 return parseRegOrImmWithIntInputMods(Operands, false); 2284 } 2285 2286 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2287 auto Loc = getLoc(); 2288 if (trySkipId("off")) { 2289 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2290 AMDGPUOperand::ImmTyOff, false)); 2291 return MatchOperand_Success; 2292 } 2293 2294 if (!isRegister()) 2295 return MatchOperand_NoMatch; 2296 2297 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2298 if (Reg) { 2299 Operands.push_back(std::move(Reg)); 2300 return MatchOperand_Success; 2301 } 2302 2303 return MatchOperand_ParseFail; 2304 2305 } 2306 2307 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2308 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2309 2310 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2311 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2312 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2313 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2314 return Match_InvalidOperand; 2315 2316 if ((TSFlags & SIInstrFlags::VOP3) && 2317 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2318 getForcedEncodingSize() != 64) 2319 return Match_PreferE32; 2320 2321 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2322 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2323 // v_mac_f32/16 allow only dst_sel == DWORD; 2324 auto OpNum = 2325 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2326 const auto &Op = Inst.getOperand(OpNum); 2327 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2328 return Match_InvalidOperand; 2329 } 2330 } 2331 2332 if (TSFlags & SIInstrFlags::FLAT) { 2333 // FIXME: Produces error without correct column reported. 2334 auto Opcode = Inst.getOpcode(); 2335 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2336 2337 const auto &Op = Inst.getOperand(OpNum); 2338 if (!hasFlatOffsets() && Op.getImm() != 0) 2339 return Match_InvalidOperand; 2340 2341 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2342 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2343 if (isGFX10()) { 2344 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2345 if (!isInt<12>(Op.getImm())) 2346 return Match_InvalidOperand; 2347 } else { 2348 if (!isUInt<11>(Op.getImm())) 2349 return Match_InvalidOperand; 2350 } 2351 } 2352 } 2353 2354 return Match_Success; 2355 } 2356 2357 // What asm variants we should check 2358 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2359 if (getForcedEncodingSize() == 32) { 2360 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2361 return makeArrayRef(Variants); 2362 } 2363 2364 if (isForcedVOP3()) { 2365 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2366 return makeArrayRef(Variants); 2367 } 2368 2369 if (isForcedSDWA()) { 2370 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2371 AMDGPUAsmVariants::SDWA9}; 2372 return makeArrayRef(Variants); 2373 } 2374 2375 if (isForcedDPP()) { 2376 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2377 return makeArrayRef(Variants); 2378 } 2379 2380 static const unsigned Variants[] = { 2381 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2382 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2383 }; 2384 2385 return makeArrayRef(Variants); 2386 } 2387 2388 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2389 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2390 const unsigned Num = Desc.getNumImplicitUses(); 2391 for (unsigned i = 0; i < Num; ++i) { 2392 unsigned Reg = Desc.ImplicitUses[i]; 2393 switch (Reg) { 2394 case AMDGPU::FLAT_SCR: 2395 case AMDGPU::VCC: 2396 case AMDGPU::VCC_LO: 2397 case AMDGPU::VCC_HI: 2398 case AMDGPU::M0: 2399 case AMDGPU::SGPR_NULL: 2400 return Reg; 2401 default: 2402 break; 2403 } 2404 } 2405 return AMDGPU::NoRegister; 2406 } 2407 2408 // NB: This code is correct only when used to check constant 2409 // bus limitations because GFX7 support no f16 inline constants. 2410 // Note that there are no cases when a GFX7 opcode violates 2411 // constant bus limitations due to the use of an f16 constant. 2412 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2413 unsigned OpIdx) const { 2414 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2415 2416 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2417 return false; 2418 } 2419 2420 const MCOperand &MO = Inst.getOperand(OpIdx); 2421 2422 int64_t Val = MO.getImm(); 2423 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2424 2425 switch (OpSize) { // expected operand size 2426 case 8: 2427 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2428 case 4: 2429 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2430 case 2: { 2431 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2432 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2433 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2434 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2435 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2436 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2437 } else { 2438 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2439 } 2440 } 2441 default: 2442 llvm_unreachable("invalid operand size"); 2443 } 2444 } 2445 2446 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2447 const MCOperand &MO = Inst.getOperand(OpIdx); 2448 if (MO.isImm()) { 2449 return !isInlineConstant(Inst, OpIdx); 2450 } 2451 return !MO.isReg() || 2452 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2453 } 2454 2455 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2456 const unsigned Opcode = Inst.getOpcode(); 2457 const MCInstrDesc &Desc = MII.get(Opcode); 2458 unsigned ConstantBusUseCount = 0; 2459 unsigned NumLiterals = 0; 2460 unsigned LiteralSize; 2461 2462 if (Desc.TSFlags & 2463 (SIInstrFlags::VOPC | 2464 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2465 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2466 SIInstrFlags::SDWA)) { 2467 // Check special imm operands (used by madmk, etc) 2468 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2469 ++ConstantBusUseCount; 2470 } 2471 2472 SmallDenseSet<unsigned> SGPRsUsed; 2473 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2474 if (SGPRUsed != AMDGPU::NoRegister) { 2475 SGPRsUsed.insert(SGPRUsed); 2476 ++ConstantBusUseCount; 2477 } 2478 2479 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2480 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2481 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2482 2483 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2484 2485 for (int OpIdx : OpIndices) { 2486 if (OpIdx == -1) break; 2487 2488 const MCOperand &MO = Inst.getOperand(OpIdx); 2489 if (usesConstantBus(Inst, OpIdx)) { 2490 if (MO.isReg()) { 2491 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2492 // Pairs of registers with a partial intersections like these 2493 // s0, s[0:1] 2494 // flat_scratch_lo, flat_scratch 2495 // flat_scratch_lo, flat_scratch_hi 2496 // are theoretically valid but they are disabled anyway. 2497 // Note that this code mimics SIInstrInfo::verifyInstruction 2498 if (!SGPRsUsed.count(Reg)) { 2499 SGPRsUsed.insert(Reg); 2500 ++ConstantBusUseCount; 2501 } 2502 SGPRUsed = Reg; 2503 } else { // Expression or a literal 2504 2505 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2506 continue; // special operand like VINTERP attr_chan 2507 2508 // An instruction may use only one literal. 2509 // This has been validated on the previous step. 2510 // See validateVOP3Literal. 2511 // This literal may be used as more than one operand. 2512 // If all these operands are of the same size, 2513 // this literal counts as one scalar value. 2514 // Otherwise it counts as 2 scalar values. 2515 // See "GFX10 Shader Programming", section 3.6.2.3. 2516 2517 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2518 if (Size < 4) Size = 4; 2519 2520 if (NumLiterals == 0) { 2521 NumLiterals = 1; 2522 LiteralSize = Size; 2523 } else if (LiteralSize != Size) { 2524 NumLiterals = 2; 2525 } 2526 } 2527 } 2528 } 2529 } 2530 ConstantBusUseCount += NumLiterals; 2531 2532 if (isGFX10()) 2533 return ConstantBusUseCount <= 2; 2534 2535 return ConstantBusUseCount <= 1; 2536 } 2537 2538 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2539 const unsigned Opcode = Inst.getOpcode(); 2540 const MCInstrDesc &Desc = MII.get(Opcode); 2541 2542 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2543 if (DstIdx == -1 || 2544 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2545 return true; 2546 } 2547 2548 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2549 2550 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2551 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2552 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2553 2554 assert(DstIdx != -1); 2555 const MCOperand &Dst = Inst.getOperand(DstIdx); 2556 assert(Dst.isReg()); 2557 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2558 2559 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2560 2561 for (int SrcIdx : SrcIndices) { 2562 if (SrcIdx == -1) break; 2563 const MCOperand &Src = Inst.getOperand(SrcIdx); 2564 if (Src.isReg()) { 2565 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2566 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2567 return false; 2568 } 2569 } 2570 } 2571 2572 return true; 2573 } 2574 2575 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2576 2577 const unsigned Opc = Inst.getOpcode(); 2578 const MCInstrDesc &Desc = MII.get(Opc); 2579 2580 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2581 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2582 assert(ClampIdx != -1); 2583 return Inst.getOperand(ClampIdx).getImm() == 0; 2584 } 2585 2586 return true; 2587 } 2588 2589 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2590 2591 const unsigned Opc = Inst.getOpcode(); 2592 const MCInstrDesc &Desc = MII.get(Opc); 2593 2594 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2595 return true; 2596 2597 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2598 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2599 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2600 2601 assert(VDataIdx != -1); 2602 assert(DMaskIdx != -1); 2603 assert(TFEIdx != -1); 2604 2605 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2606 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2607 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2608 if (DMask == 0) 2609 DMask = 1; 2610 2611 unsigned DataSize = 2612 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2613 if (hasPackedD16()) { 2614 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2615 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2616 DataSize = (DataSize + 1) / 2; 2617 } 2618 2619 return (VDataSize / 4) == DataSize + TFESize; 2620 } 2621 2622 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2623 const unsigned Opc = Inst.getOpcode(); 2624 const MCInstrDesc &Desc = MII.get(Opc); 2625 2626 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2627 return true; 2628 2629 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2630 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2631 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2632 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2633 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2634 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2635 2636 assert(VAddr0Idx != -1); 2637 assert(SrsrcIdx != -1); 2638 assert(DimIdx != -1); 2639 assert(SrsrcIdx > VAddr0Idx); 2640 2641 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2642 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2643 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2644 unsigned VAddrSize = 2645 IsNSA ? SrsrcIdx - VAddr0Idx 2646 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2647 2648 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2649 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2650 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2651 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2652 if (!IsNSA) { 2653 if (AddrSize > 8) 2654 AddrSize = 16; 2655 else if (AddrSize > 4) 2656 AddrSize = 8; 2657 } 2658 2659 return VAddrSize == AddrSize; 2660 } 2661 2662 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2663 2664 const unsigned Opc = Inst.getOpcode(); 2665 const MCInstrDesc &Desc = MII.get(Opc); 2666 2667 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2668 return true; 2669 if (!Desc.mayLoad() || !Desc.mayStore()) 2670 return true; // Not atomic 2671 2672 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2673 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2674 2675 // This is an incomplete check because image_atomic_cmpswap 2676 // may only use 0x3 and 0xf while other atomic operations 2677 // may use 0x1 and 0x3. However these limitations are 2678 // verified when we check that dmask matches dst size. 2679 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2680 } 2681 2682 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2683 2684 const unsigned Opc = Inst.getOpcode(); 2685 const MCInstrDesc &Desc = MII.get(Opc); 2686 2687 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2688 return true; 2689 2690 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2691 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2692 2693 // GATHER4 instructions use dmask in a different fashion compared to 2694 // other MIMG instructions. The only useful DMASK values are 2695 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2696 // (red,red,red,red) etc.) The ISA document doesn't mention 2697 // this. 2698 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2699 } 2700 2701 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2702 2703 const unsigned Opc = Inst.getOpcode(); 2704 const MCInstrDesc &Desc = MII.get(Opc); 2705 2706 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2707 return true; 2708 2709 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2710 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2711 if (isCI() || isSI()) 2712 return false; 2713 } 2714 2715 return true; 2716 } 2717 2718 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2719 const unsigned Opc = Inst.getOpcode(); 2720 const MCInstrDesc &Desc = MII.get(Opc); 2721 2722 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2723 return true; 2724 2725 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2726 if (DimIdx < 0) 2727 return true; 2728 2729 long Imm = Inst.getOperand(DimIdx).getImm(); 2730 if (Imm < 0 || Imm >= 8) 2731 return false; 2732 2733 return true; 2734 } 2735 2736 static bool IsRevOpcode(const unsigned Opcode) 2737 { 2738 switch (Opcode) { 2739 case AMDGPU::V_SUBREV_F32_e32: 2740 case AMDGPU::V_SUBREV_F32_e64: 2741 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2742 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2743 case AMDGPU::V_SUBREV_F32_e32_vi: 2744 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2745 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2746 case AMDGPU::V_SUBREV_F32_e64_vi: 2747 2748 case AMDGPU::V_SUBREV_I32_e32: 2749 case AMDGPU::V_SUBREV_I32_e64: 2750 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2751 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2752 2753 case AMDGPU::V_SUBBREV_U32_e32: 2754 case AMDGPU::V_SUBBREV_U32_e64: 2755 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2756 case AMDGPU::V_SUBBREV_U32_e32_vi: 2757 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2758 case AMDGPU::V_SUBBREV_U32_e64_vi: 2759 2760 case AMDGPU::V_SUBREV_U32_e32: 2761 case AMDGPU::V_SUBREV_U32_e64: 2762 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2763 case AMDGPU::V_SUBREV_U32_e32_vi: 2764 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2765 case AMDGPU::V_SUBREV_U32_e64_vi: 2766 2767 case AMDGPU::V_SUBREV_F16_e32: 2768 case AMDGPU::V_SUBREV_F16_e64: 2769 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2770 case AMDGPU::V_SUBREV_F16_e32_vi: 2771 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2772 case AMDGPU::V_SUBREV_F16_e64_vi: 2773 2774 case AMDGPU::V_SUBREV_U16_e32: 2775 case AMDGPU::V_SUBREV_U16_e64: 2776 case AMDGPU::V_SUBREV_U16_e32_vi: 2777 case AMDGPU::V_SUBREV_U16_e64_vi: 2778 2779 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2780 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2781 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2782 2783 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2784 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2785 2786 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2787 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2788 2789 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2790 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2791 2792 case AMDGPU::V_LSHRREV_B32_e32: 2793 case AMDGPU::V_LSHRREV_B32_e64: 2794 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2795 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2796 case AMDGPU::V_LSHRREV_B32_e32_vi: 2797 case AMDGPU::V_LSHRREV_B32_e64_vi: 2798 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2799 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2800 2801 case AMDGPU::V_ASHRREV_I32_e32: 2802 case AMDGPU::V_ASHRREV_I32_e64: 2803 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2804 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2805 case AMDGPU::V_ASHRREV_I32_e32_vi: 2806 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2807 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2808 case AMDGPU::V_ASHRREV_I32_e64_vi: 2809 2810 case AMDGPU::V_LSHLREV_B32_e32: 2811 case AMDGPU::V_LSHLREV_B32_e64: 2812 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2813 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2814 case AMDGPU::V_LSHLREV_B32_e32_vi: 2815 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2816 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2817 case AMDGPU::V_LSHLREV_B32_e64_vi: 2818 2819 case AMDGPU::V_LSHLREV_B16_e32: 2820 case AMDGPU::V_LSHLREV_B16_e64: 2821 case AMDGPU::V_LSHLREV_B16_e32_vi: 2822 case AMDGPU::V_LSHLREV_B16_e64_vi: 2823 case AMDGPU::V_LSHLREV_B16_gfx10: 2824 2825 case AMDGPU::V_LSHRREV_B16_e32: 2826 case AMDGPU::V_LSHRREV_B16_e64: 2827 case AMDGPU::V_LSHRREV_B16_e32_vi: 2828 case AMDGPU::V_LSHRREV_B16_e64_vi: 2829 case AMDGPU::V_LSHRREV_B16_gfx10: 2830 2831 case AMDGPU::V_ASHRREV_I16_e32: 2832 case AMDGPU::V_ASHRREV_I16_e64: 2833 case AMDGPU::V_ASHRREV_I16_e32_vi: 2834 case AMDGPU::V_ASHRREV_I16_e64_vi: 2835 case AMDGPU::V_ASHRREV_I16_gfx10: 2836 2837 case AMDGPU::V_LSHLREV_B64: 2838 case AMDGPU::V_LSHLREV_B64_gfx10: 2839 case AMDGPU::V_LSHLREV_B64_vi: 2840 2841 case AMDGPU::V_LSHRREV_B64: 2842 case AMDGPU::V_LSHRREV_B64_gfx10: 2843 case AMDGPU::V_LSHRREV_B64_vi: 2844 2845 case AMDGPU::V_ASHRREV_I64: 2846 case AMDGPU::V_ASHRREV_I64_gfx10: 2847 case AMDGPU::V_ASHRREV_I64_vi: 2848 2849 case AMDGPU::V_PK_LSHLREV_B16: 2850 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2851 case AMDGPU::V_PK_LSHLREV_B16_vi: 2852 2853 case AMDGPU::V_PK_LSHRREV_B16: 2854 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2855 case AMDGPU::V_PK_LSHRREV_B16_vi: 2856 case AMDGPU::V_PK_ASHRREV_I16: 2857 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2858 case AMDGPU::V_PK_ASHRREV_I16_vi: 2859 return true; 2860 default: 2861 return false; 2862 } 2863 } 2864 2865 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2866 2867 using namespace SIInstrFlags; 2868 const unsigned Opcode = Inst.getOpcode(); 2869 const MCInstrDesc &Desc = MII.get(Opcode); 2870 2871 // lds_direct register is defined so that it can be used 2872 // with 9-bit operands only. Ignore encodings which do not accept these. 2873 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2874 return true; 2875 2876 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2877 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2878 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2879 2880 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2881 2882 // lds_direct cannot be specified as either src1 or src2. 2883 for (int SrcIdx : SrcIndices) { 2884 if (SrcIdx == -1) break; 2885 const MCOperand &Src = Inst.getOperand(SrcIdx); 2886 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2887 return false; 2888 } 2889 } 2890 2891 if (Src0Idx == -1) 2892 return true; 2893 2894 const MCOperand &Src = Inst.getOperand(Src0Idx); 2895 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2896 return true; 2897 2898 // lds_direct is specified as src0. Check additional limitations. 2899 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2900 } 2901 2902 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2903 unsigned Opcode = Inst.getOpcode(); 2904 const MCInstrDesc &Desc = MII.get(Opcode); 2905 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2906 return true; 2907 2908 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2909 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2910 2911 const int OpIndices[] = { Src0Idx, Src1Idx }; 2912 2913 unsigned NumLiterals = 0; 2914 uint32_t LiteralValue; 2915 2916 for (int OpIdx : OpIndices) { 2917 if (OpIdx == -1) break; 2918 2919 const MCOperand &MO = Inst.getOperand(OpIdx); 2920 if (MO.isImm() && 2921 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2922 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2923 !isInlineConstant(Inst, OpIdx)) { 2924 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2925 if (NumLiterals == 0 || LiteralValue != Value) { 2926 LiteralValue = Value; 2927 ++NumLiterals; 2928 } 2929 } 2930 } 2931 2932 return NumLiterals <= 1; 2933 } 2934 2935 // VOP3 literal is only allowed in GFX10+ and only one can be used 2936 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 2937 unsigned Opcode = Inst.getOpcode(); 2938 const MCInstrDesc &Desc = MII.get(Opcode); 2939 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 2940 return true; 2941 2942 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2943 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2944 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2945 2946 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2947 2948 unsigned NumLiterals = 0; 2949 uint32_t LiteralValue; 2950 2951 for (int OpIdx : OpIndices) { 2952 if (OpIdx == -1) break; 2953 2954 const MCOperand &MO = Inst.getOperand(OpIdx); 2955 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 2956 continue; 2957 2958 if (!isInlineConstant(Inst, OpIdx)) { 2959 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2960 if (NumLiterals == 0 || LiteralValue != Value) { 2961 LiteralValue = Value; 2962 ++NumLiterals; 2963 } 2964 } 2965 } 2966 2967 return !NumLiterals || 2968 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 2969 } 2970 2971 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2972 const SMLoc &IDLoc) { 2973 if (!validateLdsDirect(Inst)) { 2974 Error(IDLoc, 2975 "invalid use of lds_direct"); 2976 return false; 2977 } 2978 if (!validateSOPLiteral(Inst)) { 2979 Error(IDLoc, 2980 "only one literal operand is allowed"); 2981 return false; 2982 } 2983 if (!validateVOP3Literal(Inst)) { 2984 Error(IDLoc, 2985 "invalid literal operand"); 2986 return false; 2987 } 2988 if (!validateConstantBusLimitations(Inst)) { 2989 Error(IDLoc, 2990 "invalid operand (violates constant bus restrictions)"); 2991 return false; 2992 } 2993 if (!validateEarlyClobberLimitations(Inst)) { 2994 Error(IDLoc, 2995 "destination must be different than all sources"); 2996 return false; 2997 } 2998 if (!validateIntClampSupported(Inst)) { 2999 Error(IDLoc, 3000 "integer clamping is not supported on this GPU"); 3001 return false; 3002 } 3003 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3004 if (!validateMIMGD16(Inst)) { 3005 Error(IDLoc, 3006 "d16 modifier is not supported on this GPU"); 3007 return false; 3008 } 3009 if (!validateMIMGDim(Inst)) { 3010 Error(IDLoc, "dim modifier is required on this GPU"); 3011 return false; 3012 } 3013 if (!validateMIMGDataSize(Inst)) { 3014 Error(IDLoc, 3015 "image data size does not match dmask and tfe"); 3016 return false; 3017 } 3018 if (!validateMIMGAddrSize(Inst)) { 3019 Error(IDLoc, 3020 "image address size does not match dim and a16"); 3021 return false; 3022 } 3023 if (!validateMIMGAtomicDMask(Inst)) { 3024 Error(IDLoc, 3025 "invalid atomic image dmask"); 3026 return false; 3027 } 3028 if (!validateMIMGGatherDMask(Inst)) { 3029 Error(IDLoc, 3030 "invalid image_gather dmask: only one bit must be set"); 3031 return false; 3032 } 3033 3034 return true; 3035 } 3036 3037 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3038 const FeatureBitset &FBS, 3039 unsigned VariantID = 0); 3040 3041 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3042 OperandVector &Operands, 3043 MCStreamer &Out, 3044 uint64_t &ErrorInfo, 3045 bool MatchingInlineAsm) { 3046 MCInst Inst; 3047 unsigned Result = Match_Success; 3048 for (auto Variant : getMatchedVariants()) { 3049 uint64_t EI; 3050 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3051 Variant); 3052 // We order match statuses from least to most specific. We use most specific 3053 // status as resulting 3054 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3055 if ((R == Match_Success) || 3056 (R == Match_PreferE32) || 3057 (R == Match_MissingFeature && Result != Match_PreferE32) || 3058 (R == Match_InvalidOperand && Result != Match_MissingFeature 3059 && Result != Match_PreferE32) || 3060 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3061 && Result != Match_MissingFeature 3062 && Result != Match_PreferE32)) { 3063 Result = R; 3064 ErrorInfo = EI; 3065 } 3066 if (R == Match_Success) 3067 break; 3068 } 3069 3070 switch (Result) { 3071 default: break; 3072 case Match_Success: 3073 if (!validateInstruction(Inst, IDLoc)) { 3074 return true; 3075 } 3076 Inst.setLoc(IDLoc); 3077 Out.EmitInstruction(Inst, getSTI()); 3078 return false; 3079 3080 case Match_MissingFeature: 3081 return Error(IDLoc, "instruction not supported on this GPU"); 3082 3083 case Match_MnemonicFail: { 3084 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3085 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3086 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3087 return Error(IDLoc, "invalid instruction" + Suggestion, 3088 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3089 } 3090 3091 case Match_InvalidOperand: { 3092 SMLoc ErrorLoc = IDLoc; 3093 if (ErrorInfo != ~0ULL) { 3094 if (ErrorInfo >= Operands.size()) { 3095 return Error(IDLoc, "too few operands for instruction"); 3096 } 3097 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3098 if (ErrorLoc == SMLoc()) 3099 ErrorLoc = IDLoc; 3100 } 3101 return Error(ErrorLoc, "invalid operand for instruction"); 3102 } 3103 3104 case Match_PreferE32: 3105 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3106 "should be encoded as e32"); 3107 } 3108 llvm_unreachable("Implement any new match types added!"); 3109 } 3110 3111 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3112 int64_t Tmp = -1; 3113 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3114 return true; 3115 } 3116 if (getParser().parseAbsoluteExpression(Tmp)) { 3117 return true; 3118 } 3119 Ret = static_cast<uint32_t>(Tmp); 3120 return false; 3121 } 3122 3123 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3124 uint32_t &Minor) { 3125 if (ParseAsAbsoluteExpression(Major)) 3126 return TokError("invalid major version"); 3127 3128 if (getLexer().isNot(AsmToken::Comma)) 3129 return TokError("minor version number required, comma expected"); 3130 Lex(); 3131 3132 if (ParseAsAbsoluteExpression(Minor)) 3133 return TokError("invalid minor version"); 3134 3135 return false; 3136 } 3137 3138 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3139 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3140 return TokError("directive only supported for amdgcn architecture"); 3141 3142 std::string Target; 3143 3144 SMLoc TargetStart = getTok().getLoc(); 3145 if (getParser().parseEscapedString(Target)) 3146 return true; 3147 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3148 3149 std::string ExpectedTarget; 3150 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3151 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3152 3153 if (Target != ExpectedTargetOS.str()) 3154 return getParser().Error(TargetRange.Start, "target must match options", 3155 TargetRange); 3156 3157 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3158 return false; 3159 } 3160 3161 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3162 return getParser().Error(Range.Start, "value out of range", Range); 3163 } 3164 3165 bool AMDGPUAsmParser::calculateGPRBlocks( 3166 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3167 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 3168 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 3169 unsigned &SGPRBlocks) { 3170 // TODO(scott.linder): These calculations are duplicated from 3171 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3172 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3173 3174 unsigned NumVGPRs = NextFreeVGPR; 3175 unsigned NumSGPRs = NextFreeSGPR; 3176 3177 if (Version.Major >= 10) 3178 NumSGPRs = 0; 3179 else { 3180 unsigned MaxAddressableNumSGPRs = 3181 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3182 3183 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3184 NumSGPRs > MaxAddressableNumSGPRs) 3185 return OutOfRangeError(SGPRRange); 3186 3187 NumSGPRs += 3188 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3189 3190 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3191 NumSGPRs > MaxAddressableNumSGPRs) 3192 return OutOfRangeError(SGPRRange); 3193 3194 if (Features.test(FeatureSGPRInitBug)) 3195 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3196 } 3197 3198 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 3199 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3200 3201 return false; 3202 } 3203 3204 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3205 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3206 return TokError("directive only supported for amdgcn architecture"); 3207 3208 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3209 return TokError("directive only supported for amdhsa OS"); 3210 3211 StringRef KernelName; 3212 if (getParser().parseIdentifier(KernelName)) 3213 return true; 3214 3215 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3216 3217 StringSet<> Seen; 3218 3219 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3220 3221 SMRange VGPRRange; 3222 uint64_t NextFreeVGPR = 0; 3223 SMRange SGPRRange; 3224 uint64_t NextFreeSGPR = 0; 3225 unsigned UserSGPRCount = 0; 3226 bool ReserveVCC = true; 3227 bool ReserveFlatScr = true; 3228 bool ReserveXNACK = hasXNACK(); 3229 3230 while (true) { 3231 while (getLexer().is(AsmToken::EndOfStatement)) 3232 Lex(); 3233 3234 if (getLexer().isNot(AsmToken::Identifier)) 3235 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3236 3237 StringRef ID = getTok().getIdentifier(); 3238 SMRange IDRange = getTok().getLocRange(); 3239 Lex(); 3240 3241 if (ID == ".end_amdhsa_kernel") 3242 break; 3243 3244 if (Seen.find(ID) != Seen.end()) 3245 return TokError(".amdhsa_ directives cannot be repeated"); 3246 Seen.insert(ID); 3247 3248 SMLoc ValStart = getTok().getLoc(); 3249 int64_t IVal; 3250 if (getParser().parseAbsoluteExpression(IVal)) 3251 return true; 3252 SMLoc ValEnd = getTok().getLoc(); 3253 SMRange ValRange = SMRange(ValStart, ValEnd); 3254 3255 if (IVal < 0) 3256 return OutOfRangeError(ValRange); 3257 3258 uint64_t Val = IVal; 3259 3260 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3261 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3262 return OutOfRangeError(RANGE); \ 3263 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3264 3265 if (ID == ".amdhsa_group_segment_fixed_size") { 3266 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3267 return OutOfRangeError(ValRange); 3268 KD.group_segment_fixed_size = Val; 3269 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3270 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3271 return OutOfRangeError(ValRange); 3272 KD.private_segment_fixed_size = Val; 3273 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3274 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3275 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3276 Val, ValRange); 3277 UserSGPRCount += 4; 3278 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3279 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3280 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3281 ValRange); 3282 UserSGPRCount += 2; 3283 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3284 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3285 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3286 ValRange); 3287 UserSGPRCount += 2; 3288 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3289 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3290 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3291 Val, ValRange); 3292 UserSGPRCount += 2; 3293 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3294 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3295 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3296 ValRange); 3297 UserSGPRCount += 2; 3298 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3299 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3300 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3301 ValRange); 3302 UserSGPRCount += 2; 3303 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3304 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3305 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3306 Val, ValRange); 3307 UserSGPRCount += 1; 3308 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3309 PARSE_BITS_ENTRY( 3310 KD.compute_pgm_rsrc2, 3311 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3312 ValRange); 3313 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3314 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3315 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3316 ValRange); 3317 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3318 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3319 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3320 ValRange); 3321 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3322 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3323 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3324 ValRange); 3325 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3326 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3327 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3328 ValRange); 3329 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3330 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3331 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3332 ValRange); 3333 } else if (ID == ".amdhsa_next_free_vgpr") { 3334 VGPRRange = ValRange; 3335 NextFreeVGPR = Val; 3336 } else if (ID == ".amdhsa_next_free_sgpr") { 3337 SGPRRange = ValRange; 3338 NextFreeSGPR = Val; 3339 } else if (ID == ".amdhsa_reserve_vcc") { 3340 if (!isUInt<1>(Val)) 3341 return OutOfRangeError(ValRange); 3342 ReserveVCC = Val; 3343 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3344 if (IVersion.Major < 7) 3345 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3346 IDRange); 3347 if (!isUInt<1>(Val)) 3348 return OutOfRangeError(ValRange); 3349 ReserveFlatScr = Val; 3350 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3351 if (IVersion.Major < 8) 3352 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3353 IDRange); 3354 if (!isUInt<1>(Val)) 3355 return OutOfRangeError(ValRange); 3356 ReserveXNACK = Val; 3357 } else if (ID == ".amdhsa_float_round_mode_32") { 3358 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3359 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3360 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3361 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3362 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3363 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3364 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3365 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3366 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3367 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3368 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3369 ValRange); 3370 } else if (ID == ".amdhsa_dx10_clamp") { 3371 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3372 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3373 } else if (ID == ".amdhsa_ieee_mode") { 3374 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3375 Val, ValRange); 3376 } else if (ID == ".amdhsa_fp16_overflow") { 3377 if (IVersion.Major < 9) 3378 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3379 IDRange); 3380 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3381 ValRange); 3382 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3383 if (IVersion.Major < 10) 3384 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3385 IDRange); 3386 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3387 ValRange); 3388 } else if (ID == ".amdhsa_memory_ordered") { 3389 if (IVersion.Major < 10) 3390 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3391 IDRange); 3392 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3393 ValRange); 3394 } else if (ID == ".amdhsa_forward_progress") { 3395 if (IVersion.Major < 10) 3396 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3397 IDRange); 3398 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3399 ValRange); 3400 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3401 PARSE_BITS_ENTRY( 3402 KD.compute_pgm_rsrc2, 3403 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3404 ValRange); 3405 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3406 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3407 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3408 Val, ValRange); 3409 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3410 PARSE_BITS_ENTRY( 3411 KD.compute_pgm_rsrc2, 3412 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3413 ValRange); 3414 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3415 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3416 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3417 Val, ValRange); 3418 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3419 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3420 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3421 Val, ValRange); 3422 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3423 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3424 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3425 Val, ValRange); 3426 } else if (ID == ".amdhsa_exception_int_div_zero") { 3427 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3428 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3429 Val, ValRange); 3430 } else { 3431 return getParser().Error(IDRange.Start, 3432 "unknown .amdhsa_kernel directive", IDRange); 3433 } 3434 3435 #undef PARSE_BITS_ENTRY 3436 } 3437 3438 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3439 return TokError(".amdhsa_next_free_vgpr directive is required"); 3440 3441 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3442 return TokError(".amdhsa_next_free_sgpr directive is required"); 3443 3444 unsigned VGPRBlocks; 3445 unsigned SGPRBlocks; 3446 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3447 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3448 SGPRRange, VGPRBlocks, SGPRBlocks)) 3449 return true; 3450 3451 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3452 VGPRBlocks)) 3453 return OutOfRangeError(VGPRRange); 3454 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3455 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3456 3457 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3458 SGPRBlocks)) 3459 return OutOfRangeError(SGPRRange); 3460 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3461 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3462 SGPRBlocks); 3463 3464 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3465 return TokError("too many user SGPRs enabled"); 3466 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3467 UserSGPRCount); 3468 3469 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3470 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3471 ReserveFlatScr, ReserveXNACK); 3472 return false; 3473 } 3474 3475 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3476 uint32_t Major; 3477 uint32_t Minor; 3478 3479 if (ParseDirectiveMajorMinor(Major, Minor)) 3480 return true; 3481 3482 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3483 return false; 3484 } 3485 3486 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3487 uint32_t Major; 3488 uint32_t Minor; 3489 uint32_t Stepping; 3490 StringRef VendorName; 3491 StringRef ArchName; 3492 3493 // If this directive has no arguments, then use the ISA version for the 3494 // targeted GPU. 3495 if (getLexer().is(AsmToken::EndOfStatement)) { 3496 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3497 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3498 ISA.Stepping, 3499 "AMD", "AMDGPU"); 3500 return false; 3501 } 3502 3503 if (ParseDirectiveMajorMinor(Major, Minor)) 3504 return true; 3505 3506 if (getLexer().isNot(AsmToken::Comma)) 3507 return TokError("stepping version number required, comma expected"); 3508 Lex(); 3509 3510 if (ParseAsAbsoluteExpression(Stepping)) 3511 return TokError("invalid stepping version"); 3512 3513 if (getLexer().isNot(AsmToken::Comma)) 3514 return TokError("vendor name required, comma expected"); 3515 Lex(); 3516 3517 if (getLexer().isNot(AsmToken::String)) 3518 return TokError("invalid vendor name"); 3519 3520 VendorName = getLexer().getTok().getStringContents(); 3521 Lex(); 3522 3523 if (getLexer().isNot(AsmToken::Comma)) 3524 return TokError("arch name required, comma expected"); 3525 Lex(); 3526 3527 if (getLexer().isNot(AsmToken::String)) 3528 return TokError("invalid arch name"); 3529 3530 ArchName = getLexer().getTok().getStringContents(); 3531 Lex(); 3532 3533 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3534 VendorName, ArchName); 3535 return false; 3536 } 3537 3538 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3539 amd_kernel_code_t &Header) { 3540 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3541 // assembly for backwards compatibility. 3542 if (ID == "max_scratch_backing_memory_byte_size") { 3543 Parser.eatToEndOfStatement(); 3544 return false; 3545 } 3546 3547 SmallString<40> ErrStr; 3548 raw_svector_ostream Err(ErrStr); 3549 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3550 return TokError(Err.str()); 3551 } 3552 Lex(); 3553 3554 if (ID == "enable_wgp_mode") { 3555 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3556 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3557 } 3558 3559 if (ID == "enable_mem_ordered") { 3560 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3561 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3562 } 3563 3564 if (ID == "enable_fwd_progress") { 3565 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3566 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3567 } 3568 3569 return false; 3570 } 3571 3572 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3573 amd_kernel_code_t Header; 3574 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3575 3576 while (true) { 3577 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3578 // will set the current token to EndOfStatement. 3579 while(getLexer().is(AsmToken::EndOfStatement)) 3580 Lex(); 3581 3582 if (getLexer().isNot(AsmToken::Identifier)) 3583 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3584 3585 StringRef ID = getLexer().getTok().getIdentifier(); 3586 Lex(); 3587 3588 if (ID == ".end_amd_kernel_code_t") 3589 break; 3590 3591 if (ParseAMDKernelCodeTValue(ID, Header)) 3592 return true; 3593 } 3594 3595 getTargetStreamer().EmitAMDKernelCodeT(Header); 3596 3597 return false; 3598 } 3599 3600 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3601 if (getLexer().isNot(AsmToken::Identifier)) 3602 return TokError("expected symbol name"); 3603 3604 StringRef KernelName = Parser.getTok().getString(); 3605 3606 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3607 ELF::STT_AMDGPU_HSA_KERNEL); 3608 Lex(); 3609 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3610 KernelScope.initialize(getContext()); 3611 return false; 3612 } 3613 3614 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3615 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3616 return Error(getParser().getTok().getLoc(), 3617 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3618 "architectures"); 3619 } 3620 3621 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3622 3623 std::string ISAVersionStringFromSTI; 3624 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3625 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3626 3627 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3628 return Error(getParser().getTok().getLoc(), 3629 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3630 "arguments specified through the command line"); 3631 } 3632 3633 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3634 Lex(); 3635 3636 return false; 3637 } 3638 3639 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3640 const char *AssemblerDirectiveBegin; 3641 const char *AssemblerDirectiveEnd; 3642 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3643 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3644 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3645 HSAMD::V3::AssemblerDirectiveEnd) 3646 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3647 HSAMD::AssemblerDirectiveEnd); 3648 3649 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3650 return Error(getParser().getTok().getLoc(), 3651 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3652 "not available on non-amdhsa OSes")).str()); 3653 } 3654 3655 std::string HSAMetadataString; 3656 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3657 HSAMetadataString)) 3658 return true; 3659 3660 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3661 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3662 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3663 } else { 3664 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3665 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3666 } 3667 3668 return false; 3669 } 3670 3671 /// Common code to parse out a block of text (typically YAML) between start and 3672 /// end directives. 3673 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3674 const char *AssemblerDirectiveEnd, 3675 std::string &CollectString) { 3676 3677 raw_string_ostream CollectStream(CollectString); 3678 3679 getLexer().setSkipSpace(false); 3680 3681 bool FoundEnd = false; 3682 while (!getLexer().is(AsmToken::Eof)) { 3683 while (getLexer().is(AsmToken::Space)) { 3684 CollectStream << getLexer().getTok().getString(); 3685 Lex(); 3686 } 3687 3688 if (getLexer().is(AsmToken::Identifier)) { 3689 StringRef ID = getLexer().getTok().getIdentifier(); 3690 if (ID == AssemblerDirectiveEnd) { 3691 Lex(); 3692 FoundEnd = true; 3693 break; 3694 } 3695 } 3696 3697 CollectStream << Parser.parseStringToEndOfStatement() 3698 << getContext().getAsmInfo()->getSeparatorString(); 3699 3700 Parser.eatToEndOfStatement(); 3701 } 3702 3703 getLexer().setSkipSpace(true); 3704 3705 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3706 return TokError(Twine("expected directive ") + 3707 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3708 } 3709 3710 CollectStream.flush(); 3711 return false; 3712 } 3713 3714 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3715 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3716 std::string String; 3717 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3718 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3719 return true; 3720 3721 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3722 if (!PALMetadata->setFromString(String)) 3723 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3724 return false; 3725 } 3726 3727 /// Parse the assembler directive for old linear-format PAL metadata. 3728 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3729 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3730 return Error(getParser().getTok().getLoc(), 3731 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3732 "not available on non-amdpal OSes")).str()); 3733 } 3734 3735 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3736 PALMetadata->setLegacy(); 3737 for (;;) { 3738 uint32_t Key, Value; 3739 if (ParseAsAbsoluteExpression(Key)) { 3740 return TokError(Twine("invalid value in ") + 3741 Twine(PALMD::AssemblerDirective)); 3742 } 3743 if (getLexer().isNot(AsmToken::Comma)) { 3744 return TokError(Twine("expected an even number of values in ") + 3745 Twine(PALMD::AssemblerDirective)); 3746 } 3747 Lex(); 3748 if (ParseAsAbsoluteExpression(Value)) { 3749 return TokError(Twine("invalid value in ") + 3750 Twine(PALMD::AssemblerDirective)); 3751 } 3752 PALMetadata->setRegister(Key, Value); 3753 if (getLexer().isNot(AsmToken::Comma)) 3754 break; 3755 Lex(); 3756 } 3757 return false; 3758 } 3759 3760 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3761 StringRef IDVal = DirectiveID.getString(); 3762 3763 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3764 if (IDVal == ".amdgcn_target") 3765 return ParseDirectiveAMDGCNTarget(); 3766 3767 if (IDVal == ".amdhsa_kernel") 3768 return ParseDirectiveAMDHSAKernel(); 3769 3770 // TODO: Restructure/combine with PAL metadata directive. 3771 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3772 return ParseDirectiveHSAMetadata(); 3773 } else { 3774 if (IDVal == ".hsa_code_object_version") 3775 return ParseDirectiveHSACodeObjectVersion(); 3776 3777 if (IDVal == ".hsa_code_object_isa") 3778 return ParseDirectiveHSACodeObjectISA(); 3779 3780 if (IDVal == ".amd_kernel_code_t") 3781 return ParseDirectiveAMDKernelCodeT(); 3782 3783 if (IDVal == ".amdgpu_hsa_kernel") 3784 return ParseDirectiveAMDGPUHsaKernel(); 3785 3786 if (IDVal == ".amd_amdgpu_isa") 3787 return ParseDirectiveISAVersion(); 3788 3789 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3790 return ParseDirectiveHSAMetadata(); 3791 } 3792 3793 if (IDVal == PALMD::AssemblerDirectiveBegin) 3794 return ParseDirectivePALMetadataBegin(); 3795 3796 if (IDVal == PALMD::AssemblerDirective) 3797 return ParseDirectivePALMetadata(); 3798 3799 return true; 3800 } 3801 3802 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3803 unsigned RegNo) const { 3804 3805 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3806 R.isValid(); ++R) { 3807 if (*R == RegNo) 3808 return isGFX9() || isGFX10(); 3809 } 3810 3811 // GFX10 has 2 more SGPRs 104 and 105. 3812 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 3813 R.isValid(); ++R) { 3814 if (*R == RegNo) 3815 return hasSGPR104_SGPR105(); 3816 } 3817 3818 switch (RegNo) { 3819 case AMDGPU::TBA: 3820 case AMDGPU::TBA_LO: 3821 case AMDGPU::TBA_HI: 3822 case AMDGPU::TMA: 3823 case AMDGPU::TMA_LO: 3824 case AMDGPU::TMA_HI: 3825 return !isGFX9() && !isGFX10(); 3826 case AMDGPU::XNACK_MASK: 3827 case AMDGPU::XNACK_MASK_LO: 3828 case AMDGPU::XNACK_MASK_HI: 3829 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 3830 case AMDGPU::SGPR_NULL: 3831 return isGFX10(); 3832 default: 3833 break; 3834 } 3835 3836 if (isInlineValue(RegNo)) 3837 return !isCI() && !isSI() && !isVI(); 3838 3839 if (isCI()) 3840 return true; 3841 3842 if (isSI() || isGFX10()) { 3843 // No flat_scr on SI. 3844 // On GFX10 flat scratch is not a valid register operand and can only be 3845 // accessed with s_setreg/s_getreg. 3846 switch (RegNo) { 3847 case AMDGPU::FLAT_SCR: 3848 case AMDGPU::FLAT_SCR_LO: 3849 case AMDGPU::FLAT_SCR_HI: 3850 return false; 3851 default: 3852 return true; 3853 } 3854 } 3855 3856 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3857 // SI/CI have. 3858 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3859 R.isValid(); ++R) { 3860 if (*R == RegNo) 3861 return hasSGPR102_SGPR103(); 3862 } 3863 3864 return true; 3865 } 3866 3867 OperandMatchResultTy 3868 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 3869 OperandMode Mode) { 3870 // Try to parse with a custom parser 3871 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3872 3873 // If we successfully parsed the operand or if there as an error parsing, 3874 // we are done. 3875 // 3876 // If we are parsing after we reach EndOfStatement then this means we 3877 // are appending default values to the Operands list. This is only done 3878 // by custom parser, so we shouldn't continue on to the generic parsing. 3879 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3880 getLexer().is(AsmToken::EndOfStatement)) 3881 return ResTy; 3882 3883 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 3884 unsigned Prefix = Operands.size(); 3885 SMLoc LBraceLoc = getTok().getLoc(); 3886 Parser.Lex(); // eat the '[' 3887 3888 for (;;) { 3889 ResTy = parseReg(Operands); 3890 if (ResTy != MatchOperand_Success) 3891 return ResTy; 3892 3893 if (getLexer().is(AsmToken::RBrac)) 3894 break; 3895 3896 if (getLexer().isNot(AsmToken::Comma)) 3897 return MatchOperand_ParseFail; 3898 Parser.Lex(); 3899 } 3900 3901 if (Operands.size() - Prefix > 1) { 3902 Operands.insert(Operands.begin() + Prefix, 3903 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 3904 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 3905 getTok().getLoc())); 3906 } 3907 3908 Parser.Lex(); // eat the ']' 3909 return MatchOperand_Success; 3910 } 3911 3912 ResTy = parseRegOrImm(Operands); 3913 3914 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3915 return ResTy; 3916 3917 const auto &Tok = Parser.getTok(); 3918 SMLoc S = Tok.getLoc(); 3919 3920 const MCExpr *Expr = nullptr; 3921 if (!Parser.parseExpression(Expr)) { 3922 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3923 return MatchOperand_Success; 3924 } 3925 3926 // Possibly this is an instruction flag like 'gds'. 3927 if (Tok.getKind() == AsmToken::Identifier) { 3928 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3929 Parser.Lex(); 3930 return MatchOperand_Success; 3931 } 3932 3933 return MatchOperand_NoMatch; 3934 } 3935 3936 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3937 // Clear any forced encodings from the previous instruction. 3938 setForcedEncodingSize(0); 3939 setForcedDPP(false); 3940 setForcedSDWA(false); 3941 3942 if (Name.endswith("_e64")) { 3943 setForcedEncodingSize(64); 3944 return Name.substr(0, Name.size() - 4); 3945 } else if (Name.endswith("_e32")) { 3946 setForcedEncodingSize(32); 3947 return Name.substr(0, Name.size() - 4); 3948 } else if (Name.endswith("_dpp")) { 3949 setForcedDPP(true); 3950 return Name.substr(0, Name.size() - 4); 3951 } else if (Name.endswith("_sdwa")) { 3952 setForcedSDWA(true); 3953 return Name.substr(0, Name.size() - 5); 3954 } 3955 return Name; 3956 } 3957 3958 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3959 StringRef Name, 3960 SMLoc NameLoc, OperandVector &Operands) { 3961 // Add the instruction mnemonic 3962 Name = parseMnemonicSuffix(Name); 3963 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3964 3965 bool IsMIMG = Name.startswith("image_"); 3966 3967 while (!getLexer().is(AsmToken::EndOfStatement)) { 3968 OperandMode Mode = OperandMode_Default; 3969 if (IsMIMG && isGFX10() && Operands.size() == 2) 3970 Mode = OperandMode_NSA; 3971 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 3972 3973 // Eat the comma or space if there is one. 3974 if (getLexer().is(AsmToken::Comma)) 3975 Parser.Lex(); 3976 3977 switch (Res) { 3978 case MatchOperand_Success: break; 3979 case MatchOperand_ParseFail: 3980 Error(getLexer().getLoc(), "failed parsing operand."); 3981 while (!getLexer().is(AsmToken::EndOfStatement)) { 3982 Parser.Lex(); 3983 } 3984 return true; 3985 case MatchOperand_NoMatch: 3986 Error(getLexer().getLoc(), "not a valid operand."); 3987 while (!getLexer().is(AsmToken::EndOfStatement)) { 3988 Parser.Lex(); 3989 } 3990 return true; 3991 } 3992 } 3993 3994 return false; 3995 } 3996 3997 //===----------------------------------------------------------------------===// 3998 // Utility functions 3999 //===----------------------------------------------------------------------===// 4000 4001 OperandMatchResultTy 4002 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 4003 switch(getLexer().getKind()) { 4004 default: return MatchOperand_NoMatch; 4005 case AsmToken::Identifier: { 4006 StringRef Name = Parser.getTok().getString(); 4007 if (!Name.equals(Prefix)) { 4008 return MatchOperand_NoMatch; 4009 } 4010 4011 Parser.Lex(); 4012 if (getLexer().isNot(AsmToken::Colon)) 4013 return MatchOperand_ParseFail; 4014 4015 Parser.Lex(); 4016 4017 bool IsMinus = false; 4018 if (getLexer().getKind() == AsmToken::Minus) { 4019 Parser.Lex(); 4020 IsMinus = true; 4021 } 4022 4023 if (getLexer().isNot(AsmToken::Integer)) 4024 return MatchOperand_ParseFail; 4025 4026 if (getParser().parseAbsoluteExpression(Int)) 4027 return MatchOperand_ParseFail; 4028 4029 if (IsMinus) 4030 Int = -Int; 4031 break; 4032 } 4033 } 4034 return MatchOperand_Success; 4035 } 4036 4037 OperandMatchResultTy 4038 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4039 AMDGPUOperand::ImmTy ImmTy, 4040 bool (*ConvertResult)(int64_t&)) { 4041 SMLoc S = Parser.getTok().getLoc(); 4042 int64_t Value = 0; 4043 4044 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4045 if (Res != MatchOperand_Success) 4046 return Res; 4047 4048 if (ConvertResult && !ConvertResult(Value)) { 4049 return MatchOperand_ParseFail; 4050 } 4051 4052 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4053 return MatchOperand_Success; 4054 } 4055 4056 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 4057 const char *Prefix, 4058 OperandVector &Operands, 4059 AMDGPUOperand::ImmTy ImmTy, 4060 bool (*ConvertResult)(int64_t&)) { 4061 StringRef Name = Parser.getTok().getString(); 4062 if (!Name.equals(Prefix)) 4063 return MatchOperand_NoMatch; 4064 4065 Parser.Lex(); 4066 if (getLexer().isNot(AsmToken::Colon)) 4067 return MatchOperand_ParseFail; 4068 4069 Parser.Lex(); 4070 if (getLexer().isNot(AsmToken::LBrac)) 4071 return MatchOperand_ParseFail; 4072 Parser.Lex(); 4073 4074 unsigned Val = 0; 4075 SMLoc S = Parser.getTok().getLoc(); 4076 4077 // FIXME: How to verify the number of elements matches the number of src 4078 // operands? 4079 for (int I = 0; I < 4; ++I) { 4080 if (I != 0) { 4081 if (getLexer().is(AsmToken::RBrac)) 4082 break; 4083 4084 if (getLexer().isNot(AsmToken::Comma)) 4085 return MatchOperand_ParseFail; 4086 Parser.Lex(); 4087 } 4088 4089 if (getLexer().isNot(AsmToken::Integer)) 4090 return MatchOperand_ParseFail; 4091 4092 int64_t Op; 4093 if (getParser().parseAbsoluteExpression(Op)) 4094 return MatchOperand_ParseFail; 4095 4096 if (Op != 0 && Op != 1) 4097 return MatchOperand_ParseFail; 4098 Val |= (Op << I); 4099 } 4100 4101 Parser.Lex(); 4102 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4103 return MatchOperand_Success; 4104 } 4105 4106 OperandMatchResultTy 4107 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4108 AMDGPUOperand::ImmTy ImmTy) { 4109 int64_t Bit = 0; 4110 SMLoc S = Parser.getTok().getLoc(); 4111 4112 // We are at the end of the statement, and this is a default argument, so 4113 // use a default value. 4114 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4115 switch(getLexer().getKind()) { 4116 case AsmToken::Identifier: { 4117 StringRef Tok = Parser.getTok().getString(); 4118 if (Tok == Name) { 4119 if (Tok == "r128" && isGFX9()) 4120 Error(S, "r128 modifier is not supported on this GPU"); 4121 if (Tok == "a16" && !isGFX9()) 4122 Error(S, "a16 modifier is not supported on this GPU"); 4123 Bit = 1; 4124 Parser.Lex(); 4125 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4126 Bit = 0; 4127 Parser.Lex(); 4128 } else { 4129 return MatchOperand_NoMatch; 4130 } 4131 break; 4132 } 4133 default: 4134 return MatchOperand_NoMatch; 4135 } 4136 } 4137 4138 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4139 return MatchOperand_ParseFail; 4140 4141 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4142 return MatchOperand_Success; 4143 } 4144 4145 static void addOptionalImmOperand( 4146 MCInst& Inst, const OperandVector& Operands, 4147 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4148 AMDGPUOperand::ImmTy ImmT, 4149 int64_t Default = 0) { 4150 auto i = OptionalIdx.find(ImmT); 4151 if (i != OptionalIdx.end()) { 4152 unsigned Idx = i->second; 4153 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4154 } else { 4155 Inst.addOperand(MCOperand::createImm(Default)); 4156 } 4157 } 4158 4159 OperandMatchResultTy 4160 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4161 if (getLexer().isNot(AsmToken::Identifier)) { 4162 return MatchOperand_NoMatch; 4163 } 4164 StringRef Tok = Parser.getTok().getString(); 4165 if (Tok != Prefix) { 4166 return MatchOperand_NoMatch; 4167 } 4168 4169 Parser.Lex(); 4170 if (getLexer().isNot(AsmToken::Colon)) { 4171 return MatchOperand_ParseFail; 4172 } 4173 4174 Parser.Lex(); 4175 if (getLexer().isNot(AsmToken::Identifier)) { 4176 return MatchOperand_ParseFail; 4177 } 4178 4179 Value = Parser.getTok().getString(); 4180 return MatchOperand_Success; 4181 } 4182 4183 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4184 // values to live in a joint format operand in the MCInst encoding. 4185 OperandMatchResultTy 4186 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4187 SMLoc S = Parser.getTok().getLoc(); 4188 int64_t Dfmt = 0, Nfmt = 0; 4189 // dfmt and nfmt can appear in either order, and each is optional. 4190 bool GotDfmt = false, GotNfmt = false; 4191 while (!GotDfmt || !GotNfmt) { 4192 if (!GotDfmt) { 4193 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4194 if (Res != MatchOperand_NoMatch) { 4195 if (Res != MatchOperand_Success) 4196 return Res; 4197 if (Dfmt >= 16) { 4198 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4199 return MatchOperand_ParseFail; 4200 } 4201 GotDfmt = true; 4202 Parser.Lex(); 4203 continue; 4204 } 4205 } 4206 if (!GotNfmt) { 4207 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4208 if (Res != MatchOperand_NoMatch) { 4209 if (Res != MatchOperand_Success) 4210 return Res; 4211 if (Nfmt >= 8) { 4212 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4213 return MatchOperand_ParseFail; 4214 } 4215 GotNfmt = true; 4216 Parser.Lex(); 4217 continue; 4218 } 4219 } 4220 break; 4221 } 4222 if (!GotDfmt && !GotNfmt) 4223 return MatchOperand_NoMatch; 4224 auto Format = Dfmt | Nfmt << 4; 4225 Operands.push_back( 4226 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4227 return MatchOperand_Success; 4228 } 4229 4230 //===----------------------------------------------------------------------===// 4231 // ds 4232 //===----------------------------------------------------------------------===// 4233 4234 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4235 const OperandVector &Operands) { 4236 OptionalImmIndexMap OptionalIdx; 4237 4238 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4239 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4240 4241 // Add the register arguments 4242 if (Op.isReg()) { 4243 Op.addRegOperands(Inst, 1); 4244 continue; 4245 } 4246 4247 // Handle optional arguments 4248 OptionalIdx[Op.getImmTy()] = i; 4249 } 4250 4251 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4254 4255 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4256 } 4257 4258 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4259 bool IsGdsHardcoded) { 4260 OptionalImmIndexMap OptionalIdx; 4261 4262 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4263 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4264 4265 // Add the register arguments 4266 if (Op.isReg()) { 4267 Op.addRegOperands(Inst, 1); 4268 continue; 4269 } 4270 4271 if (Op.isToken() && Op.getToken() == "gds") { 4272 IsGdsHardcoded = true; 4273 continue; 4274 } 4275 4276 // Handle optional arguments 4277 OptionalIdx[Op.getImmTy()] = i; 4278 } 4279 4280 AMDGPUOperand::ImmTy OffsetType = 4281 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4282 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4283 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4284 AMDGPUOperand::ImmTyOffset; 4285 4286 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4287 4288 if (!IsGdsHardcoded) { 4289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4290 } 4291 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4292 } 4293 4294 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4295 OptionalImmIndexMap OptionalIdx; 4296 4297 unsigned OperandIdx[4]; 4298 unsigned EnMask = 0; 4299 int SrcIdx = 0; 4300 4301 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4303 4304 // Add the register arguments 4305 if (Op.isReg()) { 4306 assert(SrcIdx < 4); 4307 OperandIdx[SrcIdx] = Inst.size(); 4308 Op.addRegOperands(Inst, 1); 4309 ++SrcIdx; 4310 continue; 4311 } 4312 4313 if (Op.isOff()) { 4314 assert(SrcIdx < 4); 4315 OperandIdx[SrcIdx] = Inst.size(); 4316 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4317 ++SrcIdx; 4318 continue; 4319 } 4320 4321 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4322 Op.addImmOperands(Inst, 1); 4323 continue; 4324 } 4325 4326 if (Op.isToken() && Op.getToken() == "done") 4327 continue; 4328 4329 // Handle optional arguments 4330 OptionalIdx[Op.getImmTy()] = i; 4331 } 4332 4333 assert(SrcIdx == 4); 4334 4335 bool Compr = false; 4336 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4337 Compr = true; 4338 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4339 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4340 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4341 } 4342 4343 for (auto i = 0; i < SrcIdx; ++i) { 4344 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4345 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4346 } 4347 } 4348 4349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4351 4352 Inst.addOperand(MCOperand::createImm(EnMask)); 4353 } 4354 4355 //===----------------------------------------------------------------------===// 4356 // s_waitcnt 4357 //===----------------------------------------------------------------------===// 4358 4359 static bool 4360 encodeCnt( 4361 const AMDGPU::IsaVersion ISA, 4362 int64_t &IntVal, 4363 int64_t CntVal, 4364 bool Saturate, 4365 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4366 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4367 { 4368 bool Failed = false; 4369 4370 IntVal = encode(ISA, IntVal, CntVal); 4371 if (CntVal != decode(ISA, IntVal)) { 4372 if (Saturate) { 4373 IntVal = encode(ISA, IntVal, -1); 4374 } else { 4375 Failed = true; 4376 } 4377 } 4378 return Failed; 4379 } 4380 4381 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4382 StringRef CntName = Parser.getTok().getString(); 4383 int64_t CntVal; 4384 4385 Parser.Lex(); 4386 if (getLexer().isNot(AsmToken::LParen)) 4387 return true; 4388 4389 Parser.Lex(); 4390 if (getLexer().isNot(AsmToken::Integer)) 4391 return true; 4392 4393 SMLoc ValLoc = Parser.getTok().getLoc(); 4394 if (getParser().parseAbsoluteExpression(CntVal)) 4395 return true; 4396 4397 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4398 4399 bool Failed = true; 4400 bool Sat = CntName.endswith("_sat"); 4401 4402 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4403 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4404 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4405 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4406 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4407 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4408 } 4409 4410 if (Failed) { 4411 Error(ValLoc, "too large value for " + CntName); 4412 return true; 4413 } 4414 4415 if (getLexer().isNot(AsmToken::RParen)) { 4416 return true; 4417 } 4418 4419 Parser.Lex(); 4420 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4421 const AsmToken NextToken = getLexer().peekTok(); 4422 if (NextToken.is(AsmToken::Identifier)) { 4423 Parser.Lex(); 4424 } 4425 } 4426 4427 return false; 4428 } 4429 4430 OperandMatchResultTy 4431 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4432 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4433 int64_t Waitcnt = getWaitcntBitMask(ISA); 4434 SMLoc S = Parser.getTok().getLoc(); 4435 4436 switch(getLexer().getKind()) { 4437 default: return MatchOperand_ParseFail; 4438 case AsmToken::Integer: 4439 // The operand can be an integer value. 4440 if (getParser().parseAbsoluteExpression(Waitcnt)) 4441 return MatchOperand_ParseFail; 4442 break; 4443 4444 case AsmToken::Identifier: 4445 do { 4446 if (parseCnt(Waitcnt)) 4447 return MatchOperand_ParseFail; 4448 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4449 break; 4450 } 4451 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4452 return MatchOperand_Success; 4453 } 4454 4455 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4456 int64_t &Width) { 4457 using namespace llvm::AMDGPU::Hwreg; 4458 4459 if (Parser.getTok().getString() != "hwreg") 4460 return true; 4461 Parser.Lex(); 4462 4463 if (getLexer().isNot(AsmToken::LParen)) 4464 return true; 4465 Parser.Lex(); 4466 4467 if (getLexer().is(AsmToken::Identifier)) { 4468 HwReg.IsSymbolic = true; 4469 HwReg.Id = ID_UNKNOWN_; 4470 const StringRef tok = Parser.getTok().getString(); 4471 int Last = ID_SYMBOLIC_LAST_; 4472 if (isSI() || isCI() || isVI()) 4473 Last = ID_SYMBOLIC_FIRST_GFX9_; 4474 else if (isGFX9()) 4475 Last = ID_SYMBOLIC_FIRST_GFX10_; 4476 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4477 if (tok == IdSymbolic[i]) { 4478 HwReg.Id = i; 4479 break; 4480 } 4481 } 4482 Parser.Lex(); 4483 } else { 4484 HwReg.IsSymbolic = false; 4485 if (getLexer().isNot(AsmToken::Integer)) 4486 return true; 4487 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4488 return true; 4489 } 4490 4491 if (getLexer().is(AsmToken::RParen)) { 4492 Parser.Lex(); 4493 return false; 4494 } 4495 4496 // optional params 4497 if (getLexer().isNot(AsmToken::Comma)) 4498 return true; 4499 Parser.Lex(); 4500 4501 if (getLexer().isNot(AsmToken::Integer)) 4502 return true; 4503 if (getParser().parseAbsoluteExpression(Offset)) 4504 return true; 4505 4506 if (getLexer().isNot(AsmToken::Comma)) 4507 return true; 4508 Parser.Lex(); 4509 4510 if (getLexer().isNot(AsmToken::Integer)) 4511 return true; 4512 if (getParser().parseAbsoluteExpression(Width)) 4513 return true; 4514 4515 if (getLexer().isNot(AsmToken::RParen)) 4516 return true; 4517 Parser.Lex(); 4518 4519 return false; 4520 } 4521 4522 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4523 using namespace llvm::AMDGPU::Hwreg; 4524 4525 int64_t Imm16Val = 0; 4526 SMLoc S = Parser.getTok().getLoc(); 4527 4528 switch(getLexer().getKind()) { 4529 default: return MatchOperand_NoMatch; 4530 case AsmToken::Integer: 4531 // The operand can be an integer value. 4532 if (getParser().parseAbsoluteExpression(Imm16Val)) 4533 return MatchOperand_NoMatch; 4534 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4535 Error(S, "invalid immediate: only 16-bit values are legal"); 4536 // Do not return error code, but create an imm operand anyway and proceed 4537 // to the next operand, if any. That avoids unneccessary error messages. 4538 } 4539 break; 4540 4541 case AsmToken::Identifier: { 4542 OperandInfoTy HwReg(ID_UNKNOWN_); 4543 int64_t Offset = OFFSET_DEFAULT_; 4544 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4545 if (parseHwregConstruct(HwReg, Offset, Width)) 4546 return MatchOperand_ParseFail; 4547 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4548 if (HwReg.IsSymbolic) 4549 Error(S, "invalid symbolic name of hardware register"); 4550 else 4551 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4552 } 4553 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4554 Error(S, "invalid bit offset: only 5-bit values are legal"); 4555 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4556 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4557 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4558 } 4559 break; 4560 } 4561 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4562 return MatchOperand_Success; 4563 } 4564 4565 bool AMDGPUOperand::isSWaitCnt() const { 4566 return isImm(); 4567 } 4568 4569 bool AMDGPUOperand::isHwreg() const { 4570 return isImmTy(ImmTyHwreg); 4571 } 4572 4573 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4574 using namespace llvm::AMDGPU::SendMsg; 4575 4576 if (Parser.getTok().getString() != "sendmsg") 4577 return true; 4578 Parser.Lex(); 4579 4580 if (getLexer().isNot(AsmToken::LParen)) 4581 return true; 4582 Parser.Lex(); 4583 4584 if (getLexer().is(AsmToken::Identifier)) { 4585 Msg.IsSymbolic = true; 4586 Msg.Id = ID_UNKNOWN_; 4587 const std::string tok = Parser.getTok().getString(); 4588 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4589 switch(i) { 4590 default: continue; // Omit gaps. 4591 case ID_GS_ALLOC_REQ: 4592 if (isSI() || isCI() || isVI()) 4593 continue; 4594 break; 4595 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4596 case ID_SYSMSG: break; 4597 } 4598 if (tok == IdSymbolic[i]) { 4599 Msg.Id = i; 4600 break; 4601 } 4602 } 4603 Parser.Lex(); 4604 } else { 4605 Msg.IsSymbolic = false; 4606 if (getLexer().isNot(AsmToken::Integer)) 4607 return true; 4608 if (getParser().parseAbsoluteExpression(Msg.Id)) 4609 return true; 4610 if (getLexer().is(AsmToken::Integer)) 4611 if (getParser().parseAbsoluteExpression(Msg.Id)) 4612 Msg.Id = ID_UNKNOWN_; 4613 } 4614 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4615 return false; 4616 4617 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4618 if (getLexer().isNot(AsmToken::RParen)) 4619 return true; 4620 Parser.Lex(); 4621 return false; 4622 } 4623 4624 if (getLexer().isNot(AsmToken::Comma)) 4625 return true; 4626 Parser.Lex(); 4627 4628 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4629 Operation.Id = ID_UNKNOWN_; 4630 if (getLexer().is(AsmToken::Identifier)) { 4631 Operation.IsSymbolic = true; 4632 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4633 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4634 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4635 const StringRef Tok = Parser.getTok().getString(); 4636 for (int i = F; i < L; ++i) { 4637 if (Tok == S[i]) { 4638 Operation.Id = i; 4639 break; 4640 } 4641 } 4642 Parser.Lex(); 4643 } else { 4644 Operation.IsSymbolic = false; 4645 if (getLexer().isNot(AsmToken::Integer)) 4646 return true; 4647 if (getParser().parseAbsoluteExpression(Operation.Id)) 4648 return true; 4649 } 4650 4651 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4652 // Stream id is optional. 4653 if (getLexer().is(AsmToken::RParen)) { 4654 Parser.Lex(); 4655 return false; 4656 } 4657 4658 if (getLexer().isNot(AsmToken::Comma)) 4659 return true; 4660 Parser.Lex(); 4661 4662 if (getLexer().isNot(AsmToken::Integer)) 4663 return true; 4664 if (getParser().parseAbsoluteExpression(StreamId)) 4665 return true; 4666 } 4667 4668 if (getLexer().isNot(AsmToken::RParen)) 4669 return true; 4670 Parser.Lex(); 4671 return false; 4672 } 4673 4674 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4675 if (getLexer().getKind() != AsmToken::Identifier) 4676 return MatchOperand_NoMatch; 4677 4678 StringRef Str = Parser.getTok().getString(); 4679 int Slot = StringSwitch<int>(Str) 4680 .Case("p10", 0) 4681 .Case("p20", 1) 4682 .Case("p0", 2) 4683 .Default(-1); 4684 4685 SMLoc S = Parser.getTok().getLoc(); 4686 if (Slot == -1) 4687 return MatchOperand_ParseFail; 4688 4689 Parser.Lex(); 4690 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4691 AMDGPUOperand::ImmTyInterpSlot)); 4692 return MatchOperand_Success; 4693 } 4694 4695 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4696 if (getLexer().getKind() != AsmToken::Identifier) 4697 return MatchOperand_NoMatch; 4698 4699 StringRef Str = Parser.getTok().getString(); 4700 if (!Str.startswith("attr")) 4701 return MatchOperand_NoMatch; 4702 4703 StringRef Chan = Str.take_back(2); 4704 int AttrChan = StringSwitch<int>(Chan) 4705 .Case(".x", 0) 4706 .Case(".y", 1) 4707 .Case(".z", 2) 4708 .Case(".w", 3) 4709 .Default(-1); 4710 if (AttrChan == -1) 4711 return MatchOperand_ParseFail; 4712 4713 Str = Str.drop_back(2).drop_front(4); 4714 4715 uint8_t Attr; 4716 if (Str.getAsInteger(10, Attr)) 4717 return MatchOperand_ParseFail; 4718 4719 SMLoc S = Parser.getTok().getLoc(); 4720 Parser.Lex(); 4721 if (Attr > 63) { 4722 Error(S, "out of bounds attr"); 4723 return MatchOperand_Success; 4724 } 4725 4726 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4727 4728 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4729 AMDGPUOperand::ImmTyInterpAttr)); 4730 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4731 AMDGPUOperand::ImmTyAttrChan)); 4732 return MatchOperand_Success; 4733 } 4734 4735 void AMDGPUAsmParser::errorExpTgt() { 4736 Error(Parser.getTok().getLoc(), "invalid exp target"); 4737 } 4738 4739 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4740 uint8_t &Val) { 4741 if (Str == "null") { 4742 Val = 9; 4743 return MatchOperand_Success; 4744 } 4745 4746 if (Str.startswith("mrt")) { 4747 Str = Str.drop_front(3); 4748 if (Str == "z") { // == mrtz 4749 Val = 8; 4750 return MatchOperand_Success; 4751 } 4752 4753 if (Str.getAsInteger(10, Val)) 4754 return MatchOperand_ParseFail; 4755 4756 if (Val > 7) 4757 errorExpTgt(); 4758 4759 return MatchOperand_Success; 4760 } 4761 4762 if (Str.startswith("pos")) { 4763 Str = Str.drop_front(3); 4764 if (Str.getAsInteger(10, Val)) 4765 return MatchOperand_ParseFail; 4766 4767 if (Val > 4 || (Val == 4 && !isGFX10())) 4768 errorExpTgt(); 4769 4770 Val += 12; 4771 return MatchOperand_Success; 4772 } 4773 4774 if (isGFX10() && Str == "prim") { 4775 Val = 20; 4776 return MatchOperand_Success; 4777 } 4778 4779 if (Str.startswith("param")) { 4780 Str = Str.drop_front(5); 4781 if (Str.getAsInteger(10, Val)) 4782 return MatchOperand_ParseFail; 4783 4784 if (Val >= 32) 4785 errorExpTgt(); 4786 4787 Val += 32; 4788 return MatchOperand_Success; 4789 } 4790 4791 if (Str.startswith("invalid_target_")) { 4792 Str = Str.drop_front(15); 4793 if (Str.getAsInteger(10, Val)) 4794 return MatchOperand_ParseFail; 4795 4796 errorExpTgt(); 4797 return MatchOperand_Success; 4798 } 4799 4800 return MatchOperand_NoMatch; 4801 } 4802 4803 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4804 uint8_t Val; 4805 StringRef Str = Parser.getTok().getString(); 4806 4807 auto Res = parseExpTgtImpl(Str, Val); 4808 if (Res != MatchOperand_Success) 4809 return Res; 4810 4811 SMLoc S = Parser.getTok().getLoc(); 4812 Parser.Lex(); 4813 4814 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4815 AMDGPUOperand::ImmTyExpTgt)); 4816 return MatchOperand_Success; 4817 } 4818 4819 OperandMatchResultTy 4820 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4821 using namespace llvm::AMDGPU::SendMsg; 4822 4823 int64_t Imm16Val = 0; 4824 SMLoc S = Parser.getTok().getLoc(); 4825 4826 switch(getLexer().getKind()) { 4827 default: 4828 return MatchOperand_NoMatch; 4829 case AsmToken::Integer: 4830 // The operand can be an integer value. 4831 if (getParser().parseAbsoluteExpression(Imm16Val)) 4832 return MatchOperand_NoMatch; 4833 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4834 Error(S, "invalid immediate: only 16-bit values are legal"); 4835 // Do not return error code, but create an imm operand anyway and proceed 4836 // to the next operand, if any. That avoids unneccessary error messages. 4837 } 4838 break; 4839 case AsmToken::Identifier: { 4840 OperandInfoTy Msg(ID_UNKNOWN_); 4841 OperandInfoTy Operation(OP_UNKNOWN_); 4842 int64_t StreamId = STREAM_ID_DEFAULT_; 4843 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4844 return MatchOperand_ParseFail; 4845 do { 4846 // Validate and encode message ID. 4847 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4848 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4849 || Msg.Id == ID_SYSMSG)) { 4850 if (Msg.IsSymbolic) 4851 Error(S, "invalid/unsupported symbolic name of message"); 4852 else 4853 Error(S, "invalid/unsupported code of message"); 4854 break; 4855 } 4856 Imm16Val = (Msg.Id << ID_SHIFT_); 4857 // Validate and encode operation ID. 4858 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4859 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4860 if (Operation.IsSymbolic) 4861 Error(S, "invalid symbolic name of GS_OP"); 4862 else 4863 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4864 break; 4865 } 4866 if (Operation.Id == OP_GS_NOP 4867 && Msg.Id != ID_GS_DONE) { 4868 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4869 break; 4870 } 4871 Imm16Val |= (Operation.Id << OP_SHIFT_); 4872 } 4873 if (Msg.Id == ID_SYSMSG) { 4874 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4875 if (Operation.IsSymbolic) 4876 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4877 else 4878 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4879 break; 4880 } 4881 Imm16Val |= (Operation.Id << OP_SHIFT_); 4882 } 4883 // Validate and encode stream ID. 4884 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4885 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4886 Error(S, "invalid stream id: only 2-bit values are legal"); 4887 break; 4888 } 4889 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4890 } 4891 } while (false); 4892 } 4893 break; 4894 } 4895 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4896 return MatchOperand_Success; 4897 } 4898 4899 bool AMDGPUOperand::isSendMsg() const { 4900 return isImmTy(ImmTySendMsg); 4901 } 4902 4903 //===----------------------------------------------------------------------===// 4904 // parser helpers 4905 //===----------------------------------------------------------------------===// 4906 4907 bool 4908 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4909 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4910 } 4911 4912 bool 4913 AMDGPUAsmParser::isId(const StringRef Id) const { 4914 return isId(getToken(), Id); 4915 } 4916 4917 bool 4918 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4919 return getTokenKind() == Kind; 4920 } 4921 4922 bool 4923 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4924 if (isId(Id)) { 4925 lex(); 4926 return true; 4927 } 4928 return false; 4929 } 4930 4931 bool 4932 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4933 if (isToken(Kind)) { 4934 lex(); 4935 return true; 4936 } 4937 return false; 4938 } 4939 4940 bool 4941 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4942 const StringRef ErrMsg) { 4943 if (!trySkipToken(Kind)) { 4944 Error(getLoc(), ErrMsg); 4945 return false; 4946 } 4947 return true; 4948 } 4949 4950 bool 4951 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4952 return !getParser().parseAbsoluteExpression(Imm); 4953 } 4954 4955 bool 4956 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4957 if (isToken(AsmToken::String)) { 4958 Val = getToken().getStringContents(); 4959 lex(); 4960 return true; 4961 } else { 4962 Error(getLoc(), ErrMsg); 4963 return false; 4964 } 4965 } 4966 4967 AsmToken 4968 AMDGPUAsmParser::getToken() const { 4969 return Parser.getTok(); 4970 } 4971 4972 AsmToken 4973 AMDGPUAsmParser::peekToken() { 4974 return getLexer().peekTok(); 4975 } 4976 4977 void 4978 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4979 auto TokCount = getLexer().peekTokens(Tokens); 4980 4981 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4982 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4983 } 4984 4985 AsmToken::TokenKind 4986 AMDGPUAsmParser::getTokenKind() const { 4987 return getLexer().getKind(); 4988 } 4989 4990 SMLoc 4991 AMDGPUAsmParser::getLoc() const { 4992 return getToken().getLoc(); 4993 } 4994 4995 StringRef 4996 AMDGPUAsmParser::getTokenStr() const { 4997 return getToken().getString(); 4998 } 4999 5000 void 5001 AMDGPUAsmParser::lex() { 5002 Parser.Lex(); 5003 } 5004 5005 //===----------------------------------------------------------------------===// 5006 // swizzle 5007 //===----------------------------------------------------------------------===// 5008 5009 LLVM_READNONE 5010 static unsigned 5011 encodeBitmaskPerm(const unsigned AndMask, 5012 const unsigned OrMask, 5013 const unsigned XorMask) { 5014 using namespace llvm::AMDGPU::Swizzle; 5015 5016 return BITMASK_PERM_ENC | 5017 (AndMask << BITMASK_AND_SHIFT) | 5018 (OrMask << BITMASK_OR_SHIFT) | 5019 (XorMask << BITMASK_XOR_SHIFT); 5020 } 5021 5022 bool 5023 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5024 const unsigned MinVal, 5025 const unsigned MaxVal, 5026 const StringRef ErrMsg) { 5027 for (unsigned i = 0; i < OpNum; ++i) { 5028 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5029 return false; 5030 } 5031 SMLoc ExprLoc = Parser.getTok().getLoc(); 5032 if (!parseExpr(Op[i])) { 5033 return false; 5034 } 5035 if (Op[i] < MinVal || Op[i] > MaxVal) { 5036 Error(ExprLoc, ErrMsg); 5037 return false; 5038 } 5039 } 5040 5041 return true; 5042 } 5043 5044 bool 5045 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5046 using namespace llvm::AMDGPU::Swizzle; 5047 5048 int64_t Lane[LANE_NUM]; 5049 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5050 "expected a 2-bit lane id")) { 5051 Imm = QUAD_PERM_ENC; 5052 for (unsigned I = 0; I < LANE_NUM; ++I) { 5053 Imm |= Lane[I] << (LANE_SHIFT * I); 5054 } 5055 return true; 5056 } 5057 return false; 5058 } 5059 5060 bool 5061 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5062 using namespace llvm::AMDGPU::Swizzle; 5063 5064 SMLoc S = Parser.getTok().getLoc(); 5065 int64_t GroupSize; 5066 int64_t LaneIdx; 5067 5068 if (!parseSwizzleOperands(1, &GroupSize, 5069 2, 32, 5070 "group size must be in the interval [2,32]")) { 5071 return false; 5072 } 5073 if (!isPowerOf2_64(GroupSize)) { 5074 Error(S, "group size must be a power of two"); 5075 return false; 5076 } 5077 if (parseSwizzleOperands(1, &LaneIdx, 5078 0, GroupSize - 1, 5079 "lane id must be in the interval [0,group size - 1]")) { 5080 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5081 return true; 5082 } 5083 return false; 5084 } 5085 5086 bool 5087 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5088 using namespace llvm::AMDGPU::Swizzle; 5089 5090 SMLoc S = Parser.getTok().getLoc(); 5091 int64_t GroupSize; 5092 5093 if (!parseSwizzleOperands(1, &GroupSize, 5094 2, 32, "group size must be in the interval [2,32]")) { 5095 return false; 5096 } 5097 if (!isPowerOf2_64(GroupSize)) { 5098 Error(S, "group size must be a power of two"); 5099 return false; 5100 } 5101 5102 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5103 return true; 5104 } 5105 5106 bool 5107 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5108 using namespace llvm::AMDGPU::Swizzle; 5109 5110 SMLoc S = Parser.getTok().getLoc(); 5111 int64_t GroupSize; 5112 5113 if (!parseSwizzleOperands(1, &GroupSize, 5114 1, 16, "group size must be in the interval [1,16]")) { 5115 return false; 5116 } 5117 if (!isPowerOf2_64(GroupSize)) { 5118 Error(S, "group size must be a power of two"); 5119 return false; 5120 } 5121 5122 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5123 return true; 5124 } 5125 5126 bool 5127 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5128 using namespace llvm::AMDGPU::Swizzle; 5129 5130 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5131 return false; 5132 } 5133 5134 StringRef Ctl; 5135 SMLoc StrLoc = Parser.getTok().getLoc(); 5136 if (!parseString(Ctl)) { 5137 return false; 5138 } 5139 if (Ctl.size() != BITMASK_WIDTH) { 5140 Error(StrLoc, "expected a 5-character mask"); 5141 return false; 5142 } 5143 5144 unsigned AndMask = 0; 5145 unsigned OrMask = 0; 5146 unsigned XorMask = 0; 5147 5148 for (size_t i = 0; i < Ctl.size(); ++i) { 5149 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5150 switch(Ctl[i]) { 5151 default: 5152 Error(StrLoc, "invalid mask"); 5153 return false; 5154 case '0': 5155 break; 5156 case '1': 5157 OrMask |= Mask; 5158 break; 5159 case 'p': 5160 AndMask |= Mask; 5161 break; 5162 case 'i': 5163 AndMask |= Mask; 5164 XorMask |= Mask; 5165 break; 5166 } 5167 } 5168 5169 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5170 return true; 5171 } 5172 5173 bool 5174 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5175 5176 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5177 5178 if (!parseExpr(Imm)) { 5179 return false; 5180 } 5181 if (!isUInt<16>(Imm)) { 5182 Error(OffsetLoc, "expected a 16-bit offset"); 5183 return false; 5184 } 5185 return true; 5186 } 5187 5188 bool 5189 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5190 using namespace llvm::AMDGPU::Swizzle; 5191 5192 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5193 5194 SMLoc ModeLoc = Parser.getTok().getLoc(); 5195 bool Ok = false; 5196 5197 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5198 Ok = parseSwizzleQuadPerm(Imm); 5199 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5200 Ok = parseSwizzleBitmaskPerm(Imm); 5201 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5202 Ok = parseSwizzleBroadcast(Imm); 5203 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5204 Ok = parseSwizzleSwap(Imm); 5205 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5206 Ok = parseSwizzleReverse(Imm); 5207 } else { 5208 Error(ModeLoc, "expected a swizzle mode"); 5209 } 5210 5211 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5212 } 5213 5214 return false; 5215 } 5216 5217 OperandMatchResultTy 5218 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5219 SMLoc S = Parser.getTok().getLoc(); 5220 int64_t Imm = 0; 5221 5222 if (trySkipId("offset")) { 5223 5224 bool Ok = false; 5225 if (skipToken(AsmToken::Colon, "expected a colon")) { 5226 if (trySkipId("swizzle")) { 5227 Ok = parseSwizzleMacro(Imm); 5228 } else { 5229 Ok = parseSwizzleOffset(Imm); 5230 } 5231 } 5232 5233 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5234 5235 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5236 } else { 5237 // Swizzle "offset" operand is optional. 5238 // If it is omitted, try parsing other optional operands. 5239 return parseOptionalOpr(Operands); 5240 } 5241 } 5242 5243 bool 5244 AMDGPUOperand::isSwizzle() const { 5245 return isImmTy(ImmTySwizzle); 5246 } 5247 5248 //===----------------------------------------------------------------------===// 5249 // VGPR Index Mode 5250 //===----------------------------------------------------------------------===// 5251 5252 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5253 5254 using namespace llvm::AMDGPU::VGPRIndexMode; 5255 5256 if (trySkipToken(AsmToken::RParen)) { 5257 return OFF; 5258 } 5259 5260 int64_t Imm = 0; 5261 5262 while (true) { 5263 unsigned Mode = 0; 5264 SMLoc S = Parser.getTok().getLoc(); 5265 5266 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5267 if (trySkipId(IdSymbolic[ModeId])) { 5268 Mode = 1 << ModeId; 5269 break; 5270 } 5271 } 5272 5273 if (Mode == 0) { 5274 Error(S, (Imm == 0)? 5275 "expected a VGPR index mode or a closing parenthesis" : 5276 "expected a VGPR index mode"); 5277 break; 5278 } 5279 5280 if (Imm & Mode) { 5281 Error(S, "duplicate VGPR index mode"); 5282 break; 5283 } 5284 Imm |= Mode; 5285 5286 if (trySkipToken(AsmToken::RParen)) 5287 break; 5288 if (!skipToken(AsmToken::Comma, 5289 "expected a comma or a closing parenthesis")) 5290 break; 5291 } 5292 5293 return Imm; 5294 } 5295 5296 OperandMatchResultTy 5297 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5298 5299 int64_t Imm = 0; 5300 SMLoc S = Parser.getTok().getLoc(); 5301 5302 if (getLexer().getKind() == AsmToken::Identifier && 5303 Parser.getTok().getString() == "gpr_idx" && 5304 getLexer().peekTok().is(AsmToken::LParen)) { 5305 5306 Parser.Lex(); 5307 Parser.Lex(); 5308 5309 // If parse failed, trigger an error but do not return error code 5310 // to avoid excessive error messages. 5311 Imm = parseGPRIdxMacro(); 5312 5313 } else { 5314 if (getParser().parseAbsoluteExpression(Imm)) 5315 return MatchOperand_NoMatch; 5316 if (Imm < 0 || !isUInt<4>(Imm)) { 5317 Error(S, "invalid immediate: only 4-bit values are legal"); 5318 } 5319 } 5320 5321 Operands.push_back( 5322 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5323 return MatchOperand_Success; 5324 } 5325 5326 bool AMDGPUOperand::isGPRIdxMode() const { 5327 return isImmTy(ImmTyGprIdxMode); 5328 } 5329 5330 //===----------------------------------------------------------------------===// 5331 // sopp branch targets 5332 //===----------------------------------------------------------------------===// 5333 5334 OperandMatchResultTy 5335 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5336 SMLoc S = Parser.getTok().getLoc(); 5337 5338 switch (getLexer().getKind()) { 5339 default: return MatchOperand_ParseFail; 5340 case AsmToken::Integer: { 5341 int64_t Imm; 5342 if (getParser().parseAbsoluteExpression(Imm)) 5343 return MatchOperand_ParseFail; 5344 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5345 return MatchOperand_Success; 5346 } 5347 5348 case AsmToken::Identifier: 5349 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5350 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5351 Parser.getTok().getString()), getContext()), S)); 5352 Parser.Lex(); 5353 return MatchOperand_Success; 5354 } 5355 } 5356 5357 //===----------------------------------------------------------------------===// 5358 // mubuf 5359 //===----------------------------------------------------------------------===// 5360 5361 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5362 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5363 } 5364 5365 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5366 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5367 } 5368 5369 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5370 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5371 } 5372 5373 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5374 const OperandVector &Operands, 5375 bool IsAtomic, 5376 bool IsAtomicReturn, 5377 bool IsLds) { 5378 bool IsLdsOpcode = IsLds; 5379 bool HasLdsModifier = false; 5380 OptionalImmIndexMap OptionalIdx; 5381 assert(IsAtomicReturn ? IsAtomic : true); 5382 unsigned FirstOperandIdx = 1; 5383 5384 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5385 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5386 5387 // Add the register arguments 5388 if (Op.isReg()) { 5389 Op.addRegOperands(Inst, 1); 5390 // Insert a tied src for atomic return dst. 5391 // This cannot be postponed as subsequent calls to 5392 // addImmOperands rely on correct number of MC operands. 5393 if (IsAtomicReturn && i == FirstOperandIdx) 5394 Op.addRegOperands(Inst, 1); 5395 continue; 5396 } 5397 5398 // Handle the case where soffset is an immediate 5399 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5400 Op.addImmOperands(Inst, 1); 5401 continue; 5402 } 5403 5404 HasLdsModifier |= Op.isLDS(); 5405 5406 // Handle tokens like 'offen' which are sometimes hard-coded into the 5407 // asm string. There are no MCInst operands for these. 5408 if (Op.isToken()) { 5409 continue; 5410 } 5411 assert(Op.isImm()); 5412 5413 // Handle optional arguments 5414 OptionalIdx[Op.getImmTy()] = i; 5415 } 5416 5417 // This is a workaround for an llvm quirk which may result in an 5418 // incorrect instruction selection. Lds and non-lds versions of 5419 // MUBUF instructions are identical except that lds versions 5420 // have mandatory 'lds' modifier. However this modifier follows 5421 // optional modifiers and llvm asm matcher regards this 'lds' 5422 // modifier as an optional one. As a result, an lds version 5423 // of opcode may be selected even if it has no 'lds' modifier. 5424 if (IsLdsOpcode && !HasLdsModifier) { 5425 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5426 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5427 Inst.setOpcode(NoLdsOpcode); 5428 IsLdsOpcode = false; 5429 } 5430 } 5431 5432 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5433 if (!IsAtomic) { // glc is hard-coded. 5434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5435 } 5436 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5437 5438 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5439 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5440 } 5441 5442 if (isGFX10()) 5443 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5444 } 5445 5446 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5447 OptionalImmIndexMap OptionalIdx; 5448 5449 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5451 5452 // Add the register arguments 5453 if (Op.isReg()) { 5454 Op.addRegOperands(Inst, 1); 5455 continue; 5456 } 5457 5458 // Handle the case where soffset is an immediate 5459 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5460 Op.addImmOperands(Inst, 1); 5461 continue; 5462 } 5463 5464 // Handle tokens like 'offen' which are sometimes hard-coded into the 5465 // asm string. There are no MCInst operands for these. 5466 if (Op.isToken()) { 5467 continue; 5468 } 5469 assert(Op.isImm()); 5470 5471 // Handle optional arguments 5472 OptionalIdx[Op.getImmTy()] = i; 5473 } 5474 5475 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5476 AMDGPUOperand::ImmTyOffset); 5477 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5480 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5481 5482 if (isGFX10()) 5483 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5484 } 5485 5486 //===----------------------------------------------------------------------===// 5487 // mimg 5488 //===----------------------------------------------------------------------===// 5489 5490 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5491 bool IsAtomic) { 5492 unsigned I = 1; 5493 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5494 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5495 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5496 } 5497 5498 if (IsAtomic) { 5499 // Add src, same as dst 5500 assert(Desc.getNumDefs() == 1); 5501 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5502 } 5503 5504 OptionalImmIndexMap OptionalIdx; 5505 5506 for (unsigned E = Operands.size(); I != E; ++I) { 5507 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5508 5509 // Add the register arguments 5510 if (Op.isReg()) { 5511 Op.addRegOperands(Inst, 1); 5512 } else if (Op.isImmModifier()) { 5513 OptionalIdx[Op.getImmTy()] = I; 5514 } else if (!Op.isToken()) { 5515 llvm_unreachable("unexpected operand type"); 5516 } 5517 } 5518 5519 bool IsGFX10 = isGFX10(); 5520 5521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5522 if (IsGFX10) 5523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5525 if (IsGFX10) 5526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5532 if (!IsGFX10) 5533 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5535 } 5536 5537 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5538 cvtMIMG(Inst, Operands, true); 5539 } 5540 5541 //===----------------------------------------------------------------------===// 5542 // smrd 5543 //===----------------------------------------------------------------------===// 5544 5545 bool AMDGPUOperand::isSMRDOffset8() const { 5546 return isImm() && isUInt<8>(getImm()); 5547 } 5548 5549 bool AMDGPUOperand::isSMRDOffset20() const { 5550 return isImm() && isUInt<20>(getImm()); 5551 } 5552 5553 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5554 // 32-bit literals are only supported on CI and we only want to use them 5555 // when the offset is > 8-bits. 5556 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5557 } 5558 5559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5560 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5561 } 5562 5563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5564 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5565 } 5566 5567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5568 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5569 } 5570 5571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5572 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5573 } 5574 5575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5576 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5577 } 5578 5579 //===----------------------------------------------------------------------===// 5580 // vop3 5581 //===----------------------------------------------------------------------===// 5582 5583 static bool ConvertOmodMul(int64_t &Mul) { 5584 if (Mul != 1 && Mul != 2 && Mul != 4) 5585 return false; 5586 5587 Mul >>= 1; 5588 return true; 5589 } 5590 5591 static bool ConvertOmodDiv(int64_t &Div) { 5592 if (Div == 1) { 5593 Div = 0; 5594 return true; 5595 } 5596 5597 if (Div == 2) { 5598 Div = 3; 5599 return true; 5600 } 5601 5602 return false; 5603 } 5604 5605 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5606 if (BoundCtrl == 0) { 5607 BoundCtrl = 1; 5608 return true; 5609 } 5610 5611 if (BoundCtrl == -1) { 5612 BoundCtrl = 0; 5613 return true; 5614 } 5615 5616 return false; 5617 } 5618 5619 // Note: the order in this table matches the order of operands in AsmString. 5620 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5621 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5622 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5623 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5624 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5625 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5626 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5627 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5628 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5629 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5630 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5631 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5632 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5633 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5634 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5635 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5636 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5637 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5638 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5639 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5640 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5641 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5642 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5643 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5644 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5645 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5646 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5647 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5648 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5649 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5650 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5651 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5652 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5653 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5654 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5655 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5656 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5657 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5658 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5659 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5660 }; 5661 5662 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5663 unsigned size = Operands.size(); 5664 assert(size > 0); 5665 5666 OperandMatchResultTy res = parseOptionalOpr(Operands); 5667 5668 // This is a hack to enable hardcoded mandatory operands which follow 5669 // optional operands. 5670 // 5671 // Current design assumes that all operands after the first optional operand 5672 // are also optional. However implementation of some instructions violates 5673 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5674 // 5675 // To alleviate this problem, we have to (implicitly) parse extra operands 5676 // to make sure autogenerated parser of custom operands never hit hardcoded 5677 // mandatory operands. 5678 5679 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5680 5681 // We have parsed the first optional operand. 5682 // Parse as many operands as necessary to skip all mandatory operands. 5683 5684 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5685 if (res != MatchOperand_Success || 5686 getLexer().is(AsmToken::EndOfStatement)) break; 5687 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5688 res = parseOptionalOpr(Operands); 5689 } 5690 } 5691 5692 return res; 5693 } 5694 5695 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5696 OperandMatchResultTy res; 5697 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5698 // try to parse any optional operand here 5699 if (Op.IsBit) { 5700 res = parseNamedBit(Op.Name, Operands, Op.Type); 5701 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5702 res = parseOModOperand(Operands); 5703 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5704 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5705 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5706 res = parseSDWASel(Operands, Op.Name, Op.Type); 5707 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5708 res = parseSDWADstUnused(Operands); 5709 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5710 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5711 Op.Type == AMDGPUOperand::ImmTyNegLo || 5712 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5713 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5714 Op.ConvertResult); 5715 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5716 res = parseDim(Operands); 5717 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5718 res = parseDfmtNfmt(Operands); 5719 } else { 5720 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5721 } 5722 if (res != MatchOperand_NoMatch) { 5723 return res; 5724 } 5725 } 5726 return MatchOperand_NoMatch; 5727 } 5728 5729 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5730 StringRef Name = Parser.getTok().getString(); 5731 if (Name == "mul") { 5732 return parseIntWithPrefix("mul", Operands, 5733 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5734 } 5735 5736 if (Name == "div") { 5737 return parseIntWithPrefix("div", Operands, 5738 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5739 } 5740 5741 return MatchOperand_NoMatch; 5742 } 5743 5744 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5745 cvtVOP3P(Inst, Operands); 5746 5747 int Opc = Inst.getOpcode(); 5748 5749 int SrcNum; 5750 const int Ops[] = { AMDGPU::OpName::src0, 5751 AMDGPU::OpName::src1, 5752 AMDGPU::OpName::src2 }; 5753 for (SrcNum = 0; 5754 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5755 ++SrcNum); 5756 assert(SrcNum > 0); 5757 5758 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5759 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5760 5761 if ((OpSel & (1 << SrcNum)) != 0) { 5762 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5763 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5764 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5765 } 5766 } 5767 5768 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5769 // 1. This operand is input modifiers 5770 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5771 // 2. This is not last operand 5772 && Desc.NumOperands > (OpNum + 1) 5773 // 3. Next operand is register class 5774 && Desc.OpInfo[OpNum + 1].RegClass != -1 5775 // 4. Next register is not tied to any other operand 5776 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5777 } 5778 5779 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5780 { 5781 OptionalImmIndexMap OptionalIdx; 5782 unsigned Opc = Inst.getOpcode(); 5783 5784 unsigned I = 1; 5785 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5786 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5787 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5788 } 5789 5790 for (unsigned E = Operands.size(); I != E; ++I) { 5791 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5792 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5793 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5794 } else if (Op.isInterpSlot() || 5795 Op.isInterpAttr() || 5796 Op.isAttrChan()) { 5797 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5798 } else if (Op.isImmModifier()) { 5799 OptionalIdx[Op.getImmTy()] = I; 5800 } else { 5801 llvm_unreachable("unhandled operand type"); 5802 } 5803 } 5804 5805 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5807 } 5808 5809 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5811 } 5812 5813 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5815 } 5816 } 5817 5818 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5819 OptionalImmIndexMap &OptionalIdx) { 5820 unsigned Opc = Inst.getOpcode(); 5821 5822 unsigned I = 1; 5823 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5824 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5825 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5826 } 5827 5828 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5829 // This instruction has src modifiers 5830 for (unsigned E = Operands.size(); I != E; ++I) { 5831 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5832 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5833 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5834 } else if (Op.isImmModifier()) { 5835 OptionalIdx[Op.getImmTy()] = I; 5836 } else if (Op.isRegOrImm()) { 5837 Op.addRegOrImmOperands(Inst, 1); 5838 } else { 5839 llvm_unreachable("unhandled operand type"); 5840 } 5841 } 5842 } else { 5843 // No src modifiers 5844 for (unsigned E = Operands.size(); I != E; ++I) { 5845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5846 if (Op.isMod()) { 5847 OptionalIdx[Op.getImmTy()] = I; 5848 } else { 5849 Op.addRegOrImmOperands(Inst, 1); 5850 } 5851 } 5852 } 5853 5854 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5855 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5856 } 5857 5858 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5859 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5860 } 5861 5862 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 5863 // it has src2 register operand that is tied to dst operand 5864 // we don't allow modifiers for this operand in assembler so src2_modifiers 5865 // should be 0. 5866 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 5867 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 5868 Opc == AMDGPU::V_MAC_F32_e64_vi || 5869 Opc == AMDGPU::V_MAC_F16_e64_vi || 5870 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 5871 Opc == AMDGPU::V_FMAC_F32_e64_vi || 5872 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 5873 auto it = Inst.begin(); 5874 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5875 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5876 ++it; 5877 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5878 } 5879 } 5880 5881 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5882 OptionalImmIndexMap OptionalIdx; 5883 cvtVOP3(Inst, Operands, OptionalIdx); 5884 } 5885 5886 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5887 const OperandVector &Operands) { 5888 OptionalImmIndexMap OptIdx; 5889 const int Opc = Inst.getOpcode(); 5890 const MCInstrDesc &Desc = MII.get(Opc); 5891 5892 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5893 5894 cvtVOP3(Inst, Operands, OptIdx); 5895 5896 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5897 assert(!IsPacked); 5898 Inst.addOperand(Inst.getOperand(0)); 5899 } 5900 5901 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5902 // instruction, and then figure out where to actually put the modifiers 5903 5904 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5905 5906 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5907 if (OpSelHiIdx != -1) { 5908 int DefaultVal = IsPacked ? -1 : 0; 5909 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5910 DefaultVal); 5911 } 5912 5913 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5914 if (NegLoIdx != -1) { 5915 assert(IsPacked); 5916 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5917 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5918 } 5919 5920 const int Ops[] = { AMDGPU::OpName::src0, 5921 AMDGPU::OpName::src1, 5922 AMDGPU::OpName::src2 }; 5923 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5924 AMDGPU::OpName::src1_modifiers, 5925 AMDGPU::OpName::src2_modifiers }; 5926 5927 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5928 5929 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5930 unsigned OpSelHi = 0; 5931 unsigned NegLo = 0; 5932 unsigned NegHi = 0; 5933 5934 if (OpSelHiIdx != -1) { 5935 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5936 } 5937 5938 if (NegLoIdx != -1) { 5939 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5940 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5941 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5942 } 5943 5944 for (int J = 0; J < 3; ++J) { 5945 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5946 if (OpIdx == -1) 5947 break; 5948 5949 uint32_t ModVal = 0; 5950 5951 if ((OpSel & (1 << J)) != 0) 5952 ModVal |= SISrcMods::OP_SEL_0; 5953 5954 if ((OpSelHi & (1 << J)) != 0) 5955 ModVal |= SISrcMods::OP_SEL_1; 5956 5957 if ((NegLo & (1 << J)) != 0) 5958 ModVal |= SISrcMods::NEG; 5959 5960 if ((NegHi & (1 << J)) != 0) 5961 ModVal |= SISrcMods::NEG_HI; 5962 5963 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5964 5965 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5966 } 5967 } 5968 5969 //===----------------------------------------------------------------------===// 5970 // dpp 5971 //===----------------------------------------------------------------------===// 5972 5973 bool AMDGPUOperand::isDPPCtrl() const { 5974 using namespace AMDGPU::DPP; 5975 5976 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5977 if (result) { 5978 int64_t Imm = getImm(); 5979 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5980 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5981 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5982 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5983 (Imm == DppCtrl::WAVE_SHL1) || 5984 (Imm == DppCtrl::WAVE_ROL1) || 5985 (Imm == DppCtrl::WAVE_SHR1) || 5986 (Imm == DppCtrl::WAVE_ROR1) || 5987 (Imm == DppCtrl::ROW_MIRROR) || 5988 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5989 (Imm == DppCtrl::BCAST15) || 5990 (Imm == DppCtrl::BCAST31); 5991 } 5992 return false; 5993 } 5994 5995 bool AMDGPUOperand::isS16Imm() const { 5996 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5997 } 5998 5999 bool AMDGPUOperand::isU16Imm() const { 6000 return isImm() && isUInt<16>(getImm()); 6001 } 6002 6003 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6004 if (!isGFX10()) 6005 return MatchOperand_NoMatch; 6006 6007 SMLoc S = Parser.getTok().getLoc(); 6008 6009 if (getLexer().isNot(AsmToken::Identifier)) 6010 return MatchOperand_NoMatch; 6011 if (getLexer().getTok().getString() != "dim") 6012 return MatchOperand_NoMatch; 6013 6014 Parser.Lex(); 6015 if (getLexer().isNot(AsmToken::Colon)) 6016 return MatchOperand_ParseFail; 6017 6018 Parser.Lex(); 6019 6020 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6021 // integer. 6022 std::string Token; 6023 if (getLexer().is(AsmToken::Integer)) { 6024 SMLoc Loc = getLexer().getTok().getEndLoc(); 6025 Token = getLexer().getTok().getString(); 6026 Parser.Lex(); 6027 if (getLexer().getTok().getLoc() != Loc) 6028 return MatchOperand_ParseFail; 6029 } 6030 if (getLexer().isNot(AsmToken::Identifier)) 6031 return MatchOperand_ParseFail; 6032 Token += getLexer().getTok().getString(); 6033 6034 StringRef DimId = Token; 6035 if (DimId.startswith("SQ_RSRC_IMG_")) 6036 DimId = DimId.substr(12); 6037 6038 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6039 if (!DimInfo) 6040 return MatchOperand_ParseFail; 6041 6042 Parser.Lex(); 6043 6044 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6045 AMDGPUOperand::ImmTyDim)); 6046 return MatchOperand_Success; 6047 } 6048 6049 OperandMatchResultTy 6050 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6051 using namespace AMDGPU::DPP; 6052 6053 SMLoc S = Parser.getTok().getLoc(); 6054 StringRef Prefix; 6055 int64_t Int; 6056 6057 if (getLexer().getKind() == AsmToken::Identifier) { 6058 Prefix = Parser.getTok().getString(); 6059 } else { 6060 return MatchOperand_NoMatch; 6061 } 6062 6063 if (Prefix == "row_mirror") { 6064 Int = DppCtrl::ROW_MIRROR; 6065 Parser.Lex(); 6066 } else if (Prefix == "row_half_mirror") { 6067 Int = DppCtrl::ROW_HALF_MIRROR; 6068 Parser.Lex(); 6069 } else { 6070 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6071 if (Prefix != "quad_perm" 6072 && Prefix != "row_shl" 6073 && Prefix != "row_shr" 6074 && Prefix != "row_ror" 6075 && Prefix != "wave_shl" 6076 && Prefix != "wave_rol" 6077 && Prefix != "wave_shr" 6078 && Prefix != "wave_ror" 6079 && Prefix != "row_bcast") { 6080 return MatchOperand_NoMatch; 6081 } 6082 6083 Parser.Lex(); 6084 if (getLexer().isNot(AsmToken::Colon)) 6085 return MatchOperand_ParseFail; 6086 6087 if (Prefix == "quad_perm") { 6088 // quad_perm:[%d,%d,%d,%d] 6089 Parser.Lex(); 6090 if (getLexer().isNot(AsmToken::LBrac)) 6091 return MatchOperand_ParseFail; 6092 Parser.Lex(); 6093 6094 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6095 return MatchOperand_ParseFail; 6096 6097 for (int i = 0; i < 3; ++i) { 6098 if (getLexer().isNot(AsmToken::Comma)) 6099 return MatchOperand_ParseFail; 6100 Parser.Lex(); 6101 6102 int64_t Temp; 6103 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6104 return MatchOperand_ParseFail; 6105 const int shift = i*2 + 2; 6106 Int += (Temp << shift); 6107 } 6108 6109 if (getLexer().isNot(AsmToken::RBrac)) 6110 return MatchOperand_ParseFail; 6111 Parser.Lex(); 6112 } else { 6113 // sel:%d 6114 Parser.Lex(); 6115 if (getParser().parseAbsoluteExpression(Int)) 6116 return MatchOperand_ParseFail; 6117 6118 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6119 Int |= DppCtrl::ROW_SHL0; 6120 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6121 Int |= DppCtrl::ROW_SHR0; 6122 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6123 Int |= DppCtrl::ROW_ROR0; 6124 } else if (Prefix == "wave_shl" && 1 == Int) { 6125 Int = DppCtrl::WAVE_SHL1; 6126 } else if (Prefix == "wave_rol" && 1 == Int) { 6127 Int = DppCtrl::WAVE_ROL1; 6128 } else if (Prefix == "wave_shr" && 1 == Int) { 6129 Int = DppCtrl::WAVE_SHR1; 6130 } else if (Prefix == "wave_ror" && 1 == Int) { 6131 Int = DppCtrl::WAVE_ROR1; 6132 } else if (Prefix == "row_bcast") { 6133 if (Int == 15) { 6134 Int = DppCtrl::BCAST15; 6135 } else if (Int == 31) { 6136 Int = DppCtrl::BCAST31; 6137 } else { 6138 return MatchOperand_ParseFail; 6139 } 6140 } else { 6141 return MatchOperand_ParseFail; 6142 } 6143 } 6144 } 6145 6146 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6147 return MatchOperand_Success; 6148 } 6149 6150 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6151 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6152 } 6153 6154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6155 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6156 } 6157 6158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6159 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6160 } 6161 6162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6163 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6164 } 6165 6166 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 6167 OptionalImmIndexMap OptionalIdx; 6168 6169 unsigned I = 1; 6170 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6171 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6172 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6173 } 6174 6175 for (unsigned E = Operands.size(); I != E; ++I) { 6176 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6177 MCOI::TIED_TO); 6178 if (TiedTo != -1) { 6179 assert((unsigned)TiedTo < Inst.getNumOperands()); 6180 // handle tied old or src2 for MAC instructions 6181 Inst.addOperand(Inst.getOperand(TiedTo)); 6182 } 6183 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6184 // Add the register arguments 6185 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6186 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6187 // Skip it. 6188 continue; 6189 } 6190 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6191 Op.addRegWithFPInputModsOperands(Inst, 2); 6192 } else if (Op.isDPPCtrl()) { 6193 Op.addImmOperands(Inst, 1); 6194 } else if (Op.isImm()) { 6195 // Handle optional arguments 6196 OptionalIdx[Op.getImmTy()] = I; 6197 } else { 6198 llvm_unreachable("Invalid operand type"); 6199 } 6200 } 6201 6202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6205 } 6206 6207 //===----------------------------------------------------------------------===// 6208 // sdwa 6209 //===----------------------------------------------------------------------===// 6210 6211 OperandMatchResultTy 6212 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6213 AMDGPUOperand::ImmTy Type) { 6214 using namespace llvm::AMDGPU::SDWA; 6215 6216 SMLoc S = Parser.getTok().getLoc(); 6217 StringRef Value; 6218 OperandMatchResultTy res; 6219 6220 res = parseStringWithPrefix(Prefix, Value); 6221 if (res != MatchOperand_Success) { 6222 return res; 6223 } 6224 6225 int64_t Int; 6226 Int = StringSwitch<int64_t>(Value) 6227 .Case("BYTE_0", SdwaSel::BYTE_0) 6228 .Case("BYTE_1", SdwaSel::BYTE_1) 6229 .Case("BYTE_2", SdwaSel::BYTE_2) 6230 .Case("BYTE_3", SdwaSel::BYTE_3) 6231 .Case("WORD_0", SdwaSel::WORD_0) 6232 .Case("WORD_1", SdwaSel::WORD_1) 6233 .Case("DWORD", SdwaSel::DWORD) 6234 .Default(0xffffffff); 6235 Parser.Lex(); // eat last token 6236 6237 if (Int == 0xffffffff) { 6238 return MatchOperand_ParseFail; 6239 } 6240 6241 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6242 return MatchOperand_Success; 6243 } 6244 6245 OperandMatchResultTy 6246 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6247 using namespace llvm::AMDGPU::SDWA; 6248 6249 SMLoc S = Parser.getTok().getLoc(); 6250 StringRef Value; 6251 OperandMatchResultTy res; 6252 6253 res = parseStringWithPrefix("dst_unused", Value); 6254 if (res != MatchOperand_Success) { 6255 return res; 6256 } 6257 6258 int64_t Int; 6259 Int = StringSwitch<int64_t>(Value) 6260 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6261 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6262 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6263 .Default(0xffffffff); 6264 Parser.Lex(); // eat last token 6265 6266 if (Int == 0xffffffff) { 6267 return MatchOperand_ParseFail; 6268 } 6269 6270 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6271 return MatchOperand_Success; 6272 } 6273 6274 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6275 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6276 } 6277 6278 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6279 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6280 } 6281 6282 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6283 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6284 } 6285 6286 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6287 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6288 } 6289 6290 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6291 uint64_t BasicInstType, bool skipVcc) { 6292 using namespace llvm::AMDGPU::SDWA; 6293 6294 OptionalImmIndexMap OptionalIdx; 6295 bool skippedVcc = false; 6296 6297 unsigned I = 1; 6298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6299 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6300 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6301 } 6302 6303 for (unsigned E = Operands.size(); I != E; ++I) { 6304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6305 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6306 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6307 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6308 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6309 // Skip VCC only if we didn't skip it on previous iteration. 6310 if (BasicInstType == SIInstrFlags::VOP2 && 6311 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6312 skippedVcc = true; 6313 continue; 6314 } else if (BasicInstType == SIInstrFlags::VOPC && 6315 Inst.getNumOperands() == 0) { 6316 skippedVcc = true; 6317 continue; 6318 } 6319 } 6320 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6321 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6322 } else if (Op.isImm()) { 6323 // Handle optional arguments 6324 OptionalIdx[Op.getImmTy()] = I; 6325 } else { 6326 llvm_unreachable("Invalid operand type"); 6327 } 6328 skippedVcc = false; 6329 } 6330 6331 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6332 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6333 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6334 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6335 switch (BasicInstType) { 6336 case SIInstrFlags::VOP1: 6337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6338 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6339 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6340 } 6341 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6344 break; 6345 6346 case SIInstrFlags::VOP2: 6347 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6348 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6350 } 6351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6355 break; 6356 6357 case SIInstrFlags::VOPC: 6358 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6360 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6362 break; 6363 6364 default: 6365 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6366 } 6367 } 6368 6369 // special case v_mac_{f16, f32}: 6370 // it has src2 register operand that is tied to dst operand 6371 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6372 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6373 auto it = Inst.begin(); 6374 std::advance( 6375 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6376 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6377 } 6378 } 6379 6380 /// Force static initialization. 6381 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6382 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6383 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6384 } 6385 6386 #define GET_REGISTER_MATCHER 6387 #define GET_MATCHER_IMPLEMENTATION 6388 #define GET_MNEMONIC_SPELL_CHECKER 6389 #include "AMDGPUGenAsmMatcher.inc" 6390 6391 // This fuction should be defined after auto-generated include so that we have 6392 // MatchClassKind enum defined 6393 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6394 unsigned Kind) { 6395 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6396 // But MatchInstructionImpl() expects to meet token and fails to validate 6397 // operand. This method checks if we are given immediate operand but expect to 6398 // get corresponding token. 6399 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6400 switch (Kind) { 6401 case MCK_addr64: 6402 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6403 case MCK_gds: 6404 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6405 case MCK_lds: 6406 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6407 case MCK_glc: 6408 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6409 case MCK_idxen: 6410 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6411 case MCK_offen: 6412 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6413 case MCK_SSrcB32: 6414 // When operands have expression values, they will return true for isToken, 6415 // because it is not possible to distinguish between a token and an 6416 // expression at parse time. MatchInstructionImpl() will always try to 6417 // match an operand as a token, when isToken returns true, and when the 6418 // name of the expression is not a valid token, the match will fail, 6419 // so we need to handle it here. 6420 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6421 case MCK_SSrcF32: 6422 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6423 case MCK_SoppBrTarget: 6424 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6425 case MCK_VReg32OrOff: 6426 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6427 case MCK_InterpSlot: 6428 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6429 case MCK_Attr: 6430 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6431 case MCK_AttrChan: 6432 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6433 default: 6434 return Match_InvalidOperand; 6435 } 6436 } 6437 6438 //===----------------------------------------------------------------------===// 6439 // endpgm 6440 //===----------------------------------------------------------------------===// 6441 6442 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6443 SMLoc S = Parser.getTok().getLoc(); 6444 int64_t Imm = 0; 6445 6446 if (!parseExpr(Imm)) { 6447 // The operand is optional, if not present default to 0 6448 Imm = 0; 6449 } 6450 6451 if (!isUInt<16>(Imm)) { 6452 Error(S, "expected a 16-bit value"); 6453 return MatchOperand_ParseFail; 6454 } 6455 6456 Operands.push_back( 6457 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6458 return MatchOperand_Success; 6459 } 6460 6461 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6462