1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyHigh 177 }; 178 179 struct TokOp { 180 const char *Data; 181 unsigned Length; 182 }; 183 184 struct ImmOp { 185 int64_t Val; 186 ImmTy Type; 187 bool IsFPImm; 188 Modifiers Mods; 189 }; 190 191 struct RegOp { 192 unsigned RegNo; 193 bool IsForcedVOP3; 194 Modifiers Mods; 195 }; 196 197 union { 198 TokOp Tok; 199 ImmOp Imm; 200 RegOp Reg; 201 const MCExpr *Expr; 202 }; 203 204 bool isToken() const override { 205 if (Kind == Token) 206 return true; 207 208 if (Kind != Expression || !Expr) 209 return false; 210 211 // When parsing operands, we can't always tell if something was meant to be 212 // a token, like 'gds', or an expression that references a global variable. 213 // In this case, we assume the string is an expression, and if we need to 214 // interpret is a token, then we treat the symbol name as the token. 215 return isa<MCSymbolRefExpr>(Expr); 216 } 217 218 bool isImm() const override { 219 return Kind == Immediate; 220 } 221 222 bool isInlinableImm(MVT type) const; 223 bool isLiteralImm(MVT type) const; 224 225 bool isRegKind() const { 226 return Kind == Register; 227 } 228 229 bool isReg() const override { 230 return isRegKind() && !hasModifiers(); 231 } 232 233 bool isRegOrImmWithInputMods(MVT type) const { 234 return isRegKind() || isInlinableImm(type); 235 } 236 237 bool isRegOrImmWithInt16InputMods() const { 238 return isRegOrImmWithInputMods(MVT::i16); 239 } 240 241 bool isRegOrImmWithInt32InputMods() const { 242 return isRegOrImmWithInputMods(MVT::i32); 243 } 244 245 bool isRegOrImmWithInt64InputMods() const { 246 return isRegOrImmWithInputMods(MVT::i64); 247 } 248 249 bool isRegOrImmWithFP16InputMods() const { 250 return isRegOrImmWithInputMods(MVT::f16); 251 } 252 253 bool isRegOrImmWithFP32InputMods() const { 254 return isRegOrImmWithInputMods(MVT::f32); 255 } 256 257 bool isRegOrImmWithFP64InputMods() const { 258 return isRegOrImmWithInputMods(MVT::f64); 259 } 260 261 bool isVReg() const { 262 return isRegClass(AMDGPU::VGPR_32RegClassID) || 263 isRegClass(AMDGPU::VReg_64RegClassID) || 264 isRegClass(AMDGPU::VReg_96RegClassID) || 265 isRegClass(AMDGPU::VReg_128RegClassID) || 266 isRegClass(AMDGPU::VReg_256RegClassID) || 267 isRegClass(AMDGPU::VReg_512RegClassID); 268 } 269 270 bool isVReg32OrOff() const { 271 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 272 } 273 274 bool isSDWAOperand(MVT type) const; 275 bool isSDWAFP16Operand() const; 276 bool isSDWAFP32Operand() const; 277 bool isSDWAInt16Operand() const; 278 bool isSDWAInt32Operand() const; 279 280 bool isImmTy(ImmTy ImmT) const { 281 return isImm() && Imm.Type == ImmT; 282 } 283 284 bool isImmModifier() const { 285 return isImm() && Imm.Type != ImmTyNone; 286 } 287 288 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 289 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 290 bool isDMask() const { return isImmTy(ImmTyDMask); } 291 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 292 bool isDA() const { return isImmTy(ImmTyDA); } 293 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 294 bool isLWE() const { return isImmTy(ImmTyLWE); } 295 bool isOff() const { return isImmTy(ImmTyOff); } 296 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 297 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 298 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 299 bool isOffen() const { return isImmTy(ImmTyOffen); } 300 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 301 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 302 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 303 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 304 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 305 306 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 307 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 308 bool isGDS() const { return isImmTy(ImmTyGDS); } 309 bool isLDS() const { return isImmTy(ImmTyLDS); } 310 bool isGLC() const { return isImmTy(ImmTyGLC); } 311 bool isSLC() const { return isImmTy(ImmTySLC); } 312 bool isTFE() const { return isImmTy(ImmTyTFE); } 313 bool isD16() const { return isImmTy(ImmTyD16); } 314 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 315 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 316 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 317 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 318 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 319 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 320 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 321 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 322 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 323 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 324 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 325 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 326 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 327 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 328 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 329 bool isHigh() const { return isImmTy(ImmTyHigh); } 330 331 bool isMod() const { 332 return isClampSI() || isOModSI(); 333 } 334 335 bool isRegOrImm() const { 336 return isReg() || isImm(); 337 } 338 339 bool isRegClass(unsigned RCID) const; 340 341 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 342 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 343 } 344 345 bool isSCSrcB16() const { 346 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 347 } 348 349 bool isSCSrcV2B16() const { 350 return isSCSrcB16(); 351 } 352 353 bool isSCSrcB32() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 355 } 356 357 bool isSCSrcB64() const { 358 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 359 } 360 361 bool isSCSrcF16() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 363 } 364 365 bool isSCSrcV2F16() const { 366 return isSCSrcF16(); 367 } 368 369 bool isSCSrcF32() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 371 } 372 373 bool isSCSrcF64() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 375 } 376 377 bool isSSrcB32() const { 378 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 379 } 380 381 bool isSSrcB16() const { 382 return isSCSrcB16() || isLiteralImm(MVT::i16); 383 } 384 385 bool isSSrcV2B16() const { 386 llvm_unreachable("cannot happen"); 387 return isSSrcB16(); 388 } 389 390 bool isSSrcB64() const { 391 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 392 // See isVSrc64(). 393 return isSCSrcB64() || isLiteralImm(MVT::i64); 394 } 395 396 bool isSSrcF32() const { 397 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 398 } 399 400 bool isSSrcF64() const { 401 return isSCSrcB64() || isLiteralImm(MVT::f64); 402 } 403 404 bool isSSrcF16() const { 405 return isSCSrcB16() || isLiteralImm(MVT::f16); 406 } 407 408 bool isSSrcV2F16() const { 409 llvm_unreachable("cannot happen"); 410 return isSSrcF16(); 411 } 412 413 bool isVCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 415 } 416 417 bool isVCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 419 } 420 421 bool isVCSrcB16() const { 422 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 423 } 424 425 bool isVCSrcV2B16() const { 426 return isVCSrcB16(); 427 } 428 429 bool isVCSrcF32() const { 430 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 431 } 432 433 bool isVCSrcF64() const { 434 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 435 } 436 437 bool isVCSrcF16() const { 438 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 439 } 440 441 bool isVCSrcV2F16() const { 442 return isVCSrcF16(); 443 } 444 445 bool isVSrcB32() const { 446 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 447 } 448 449 bool isVSrcB64() const { 450 return isVCSrcF64() || isLiteralImm(MVT::i64); 451 } 452 453 bool isVSrcB16() const { 454 return isVCSrcF16() || isLiteralImm(MVT::i16); 455 } 456 457 bool isVSrcV2B16() const { 458 llvm_unreachable("cannot happen"); 459 return isVSrcB16(); 460 } 461 462 bool isVSrcF32() const { 463 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isVSrcF64() const { 467 return isVCSrcF64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isVSrcF16() const { 471 return isVCSrcF16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isVSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isVSrcF16(); 477 } 478 479 bool isKImmFP32() const { 480 return isLiteralImm(MVT::f32); 481 } 482 483 bool isKImmFP16() const { 484 return isLiteralImm(MVT::f16); 485 } 486 487 bool isMem() const override { 488 return false; 489 } 490 491 bool isExpr() const { 492 return Kind == Expression; 493 } 494 495 bool isSoppBrTarget() const { 496 return isExpr() || isImm(); 497 } 498 499 bool isSWaitCnt() const; 500 bool isHwreg() const; 501 bool isSendMsg() const; 502 bool isSwizzle() const; 503 bool isSMRDOffset8() const; 504 bool isSMRDOffset20() const; 505 bool isSMRDLiteralOffset() const; 506 bool isDPPCtrl() const; 507 bool isGPRIdxMode() const; 508 bool isS16Imm() const; 509 bool isU16Imm() const; 510 511 StringRef getExpressionAsToken() const { 512 assert(isExpr()); 513 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 514 return S->getSymbol().getName(); 515 } 516 517 StringRef getToken() const { 518 assert(isToken()); 519 520 if (Kind == Expression) 521 return getExpressionAsToken(); 522 523 return StringRef(Tok.Data, Tok.Length); 524 } 525 526 int64_t getImm() const { 527 assert(isImm()); 528 return Imm.Val; 529 } 530 531 ImmTy getImmTy() const { 532 assert(isImm()); 533 return Imm.Type; 534 } 535 536 unsigned getReg() const override { 537 return Reg.RegNo; 538 } 539 540 SMLoc getStartLoc() const override { 541 return StartLoc; 542 } 543 544 SMLoc getEndLoc() const override { 545 return EndLoc; 546 } 547 548 SMRange getLocRange() const { 549 return SMRange(StartLoc, EndLoc); 550 } 551 552 Modifiers getModifiers() const { 553 assert(isRegKind() || isImmTy(ImmTyNone)); 554 return isRegKind() ? Reg.Mods : Imm.Mods; 555 } 556 557 void setModifiers(Modifiers Mods) { 558 assert(isRegKind() || isImmTy(ImmTyNone)); 559 if (isRegKind()) 560 Reg.Mods = Mods; 561 else 562 Imm.Mods = Mods; 563 } 564 565 bool hasModifiers() const { 566 return getModifiers().hasModifiers(); 567 } 568 569 bool hasFPModifiers() const { 570 return getModifiers().hasFPModifiers(); 571 } 572 573 bool hasIntModifiers() const { 574 return getModifiers().hasIntModifiers(); 575 } 576 577 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 578 579 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 580 581 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 582 583 template <unsigned Bitwidth> 584 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 585 586 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 587 addKImmFPOperands<16>(Inst, N); 588 } 589 590 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 591 addKImmFPOperands<32>(Inst, N); 592 } 593 594 void addRegOperands(MCInst &Inst, unsigned N) const; 595 596 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 597 if (isRegKind()) 598 addRegOperands(Inst, N); 599 else if (isExpr()) 600 Inst.addOperand(MCOperand::createExpr(Expr)); 601 else 602 addImmOperands(Inst, N); 603 } 604 605 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 606 Modifiers Mods = getModifiers(); 607 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 608 if (isRegKind()) { 609 addRegOperands(Inst, N); 610 } else { 611 addImmOperands(Inst, N, false); 612 } 613 } 614 615 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 616 assert(!hasIntModifiers()); 617 addRegOrImmWithInputModsOperands(Inst, N); 618 } 619 620 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 621 assert(!hasFPModifiers()); 622 addRegOrImmWithInputModsOperands(Inst, N); 623 } 624 625 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 626 Modifiers Mods = getModifiers(); 627 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 628 assert(isRegKind()); 629 addRegOperands(Inst, N); 630 } 631 632 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 633 assert(!hasIntModifiers()); 634 addRegWithInputModsOperands(Inst, N); 635 } 636 637 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 638 assert(!hasFPModifiers()); 639 addRegWithInputModsOperands(Inst, N); 640 } 641 642 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 643 if (isImm()) 644 addImmOperands(Inst, N); 645 else { 646 assert(isExpr()); 647 Inst.addOperand(MCOperand::createExpr(Expr)); 648 } 649 } 650 651 static void printImmTy(raw_ostream& OS, ImmTy Type) { 652 switch (Type) { 653 case ImmTyNone: OS << "None"; break; 654 case ImmTyGDS: OS << "GDS"; break; 655 case ImmTyLDS: OS << "LDS"; break; 656 case ImmTyOffen: OS << "Offen"; break; 657 case ImmTyIdxen: OS << "Idxen"; break; 658 case ImmTyAddr64: OS << "Addr64"; break; 659 case ImmTyOffset: OS << "Offset"; break; 660 case ImmTyInstOffset: OS << "InstOffset"; break; 661 case ImmTyOffset0: OS << "Offset0"; break; 662 case ImmTyOffset1: OS << "Offset1"; break; 663 case ImmTyGLC: OS << "GLC"; break; 664 case ImmTySLC: OS << "SLC"; break; 665 case ImmTyTFE: OS << "TFE"; break; 666 case ImmTyD16: OS << "D16"; break; 667 case ImmTyFORMAT: OS << "FORMAT"; break; 668 case ImmTyClampSI: OS << "ClampSI"; break; 669 case ImmTyOModSI: OS << "OModSI"; break; 670 case ImmTyDppCtrl: OS << "DppCtrl"; break; 671 case ImmTyDppRowMask: OS << "DppRowMask"; break; 672 case ImmTyDppBankMask: OS << "DppBankMask"; break; 673 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 674 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 675 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 676 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 677 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 678 case ImmTyDMask: OS << "DMask"; break; 679 case ImmTyUNorm: OS << "UNorm"; break; 680 case ImmTyDA: OS << "DA"; break; 681 case ImmTyR128A16: OS << "R128A16"; break; 682 case ImmTyLWE: OS << "LWE"; break; 683 case ImmTyOff: OS << "Off"; break; 684 case ImmTyExpTgt: OS << "ExpTgt"; break; 685 case ImmTyExpCompr: OS << "ExpCompr"; break; 686 case ImmTyExpVM: OS << "ExpVM"; break; 687 case ImmTyHwreg: OS << "Hwreg"; break; 688 case ImmTySendMsg: OS << "SendMsg"; break; 689 case ImmTyInterpSlot: OS << "InterpSlot"; break; 690 case ImmTyInterpAttr: OS << "InterpAttr"; break; 691 case ImmTyAttrChan: OS << "AttrChan"; break; 692 case ImmTyOpSel: OS << "OpSel"; break; 693 case ImmTyOpSelHi: OS << "OpSelHi"; break; 694 case ImmTyNegLo: OS << "NegLo"; break; 695 case ImmTyNegHi: OS << "NegHi"; break; 696 case ImmTySwizzle: OS << "Swizzle"; break; 697 case ImmTyHigh: OS << "High"; break; 698 } 699 } 700 701 void print(raw_ostream &OS) const override { 702 switch (Kind) { 703 case Register: 704 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 705 break; 706 case Immediate: 707 OS << '<' << getImm(); 708 if (getImmTy() != ImmTyNone) { 709 OS << " type: "; printImmTy(OS, getImmTy()); 710 } 711 OS << " mods: " << Imm.Mods << '>'; 712 break; 713 case Token: 714 OS << '\'' << getToken() << '\''; 715 break; 716 case Expression: 717 OS << "<expr " << *Expr << '>'; 718 break; 719 } 720 } 721 722 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 723 int64_t Val, SMLoc Loc, 724 ImmTy Type = ImmTyNone, 725 bool IsFPImm = false) { 726 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 727 Op->Imm.Val = Val; 728 Op->Imm.IsFPImm = IsFPImm; 729 Op->Imm.Type = Type; 730 Op->Imm.Mods = Modifiers(); 731 Op->StartLoc = Loc; 732 Op->EndLoc = Loc; 733 return Op; 734 } 735 736 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 737 StringRef Str, SMLoc Loc, 738 bool HasExplicitEncodingSize = true) { 739 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 740 Res->Tok.Data = Str.data(); 741 Res->Tok.Length = Str.size(); 742 Res->StartLoc = Loc; 743 Res->EndLoc = Loc; 744 return Res; 745 } 746 747 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 748 unsigned RegNo, SMLoc S, 749 SMLoc E, 750 bool ForceVOP3) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 752 Op->Reg.RegNo = RegNo; 753 Op->Reg.Mods = Modifiers(); 754 Op->Reg.IsForcedVOP3 = ForceVOP3; 755 Op->StartLoc = S; 756 Op->EndLoc = E; 757 return Op; 758 } 759 760 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 761 const class MCExpr *Expr, SMLoc S) { 762 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 763 Op->Expr = Expr; 764 Op->StartLoc = S; 765 Op->EndLoc = S; 766 return Op; 767 } 768 }; 769 770 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 771 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 772 return OS; 773 } 774 775 //===----------------------------------------------------------------------===// 776 // AsmParser 777 //===----------------------------------------------------------------------===// 778 779 // Holds info related to the current kernel, e.g. count of SGPRs used. 780 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 781 // .amdgpu_hsa_kernel or at EOF. 782 class KernelScopeInfo { 783 int SgprIndexUnusedMin = -1; 784 int VgprIndexUnusedMin = -1; 785 MCContext *Ctx = nullptr; 786 787 void usesSgprAt(int i) { 788 if (i >= SgprIndexUnusedMin) { 789 SgprIndexUnusedMin = ++i; 790 if (Ctx) { 791 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 792 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 793 } 794 } 795 } 796 797 void usesVgprAt(int i) { 798 if (i >= VgprIndexUnusedMin) { 799 VgprIndexUnusedMin = ++i; 800 if (Ctx) { 801 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 802 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 803 } 804 } 805 } 806 807 public: 808 KernelScopeInfo() = default; 809 810 void initialize(MCContext &Context) { 811 Ctx = &Context; 812 usesSgprAt(SgprIndexUnusedMin = -1); 813 usesVgprAt(VgprIndexUnusedMin = -1); 814 } 815 816 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 817 switch (RegKind) { 818 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 819 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 820 default: break; 821 } 822 } 823 }; 824 825 class AMDGPUAsmParser : public MCTargetAsmParser { 826 MCAsmParser &Parser; 827 828 // Number of extra operands parsed after the first optional operand. 829 // This may be necessary to skip hardcoded mandatory operands. 830 static const unsigned MAX_OPR_LOOKAHEAD = 8; 831 832 unsigned ForcedEncodingSize = 0; 833 bool ForcedDPP = false; 834 bool ForcedSDWA = false; 835 KernelScopeInfo KernelScope; 836 837 /// @name Auto-generated Match Functions 838 /// { 839 840 #define GET_ASSEMBLER_HEADER 841 #include "AMDGPUGenAsmMatcher.inc" 842 843 /// } 844 845 private: 846 bool ParseAsAbsoluteExpression(uint32_t &Ret); 847 bool OutOfRangeError(SMRange Range); 848 /// Calculate VGPR/SGPR blocks required for given target, reserved 849 /// registers, and user-specified NextFreeXGPR values. 850 /// 851 /// \param Features [in] Target features, used for bug corrections. 852 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 853 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 854 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 855 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 856 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 857 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 858 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 859 /// \param VGPRBlocks [out] Result VGPR block count. 860 /// \param SGPRBlocks [out] Result SGPR block count. 861 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 862 bool FlatScrUsed, bool XNACKUsed, 863 unsigned NextFreeVGPR, SMRange VGPRRange, 864 unsigned NextFreeSGPR, SMRange SGPRRange, 865 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 866 bool ParseDirectiveAMDGCNTarget(); 867 bool ParseDirectiveAMDHSAKernel(); 868 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 869 bool ParseDirectiveHSACodeObjectVersion(); 870 bool ParseDirectiveHSACodeObjectISA(); 871 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 872 bool ParseDirectiveAMDKernelCodeT(); 873 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 874 bool ParseDirectiveAMDGPUHsaKernel(); 875 876 bool ParseDirectiveISAVersion(); 877 bool ParseDirectiveHSAMetadata(); 878 bool ParseDirectivePALMetadata(); 879 880 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 881 RegisterKind RegKind, unsigned Reg1, 882 unsigned RegNum); 883 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 884 unsigned& RegNum, unsigned& RegWidth, 885 unsigned *DwordRegIndex); 886 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 887 void initializeGprCountSymbol(RegisterKind RegKind); 888 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 889 unsigned RegWidth); 890 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 891 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 892 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 893 bool IsGdsHardcoded); 894 895 public: 896 enum AMDGPUMatchResultTy { 897 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 898 }; 899 900 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 901 902 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 903 const MCInstrInfo &MII, 904 const MCTargetOptions &Options) 905 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 906 MCAsmParserExtension::Initialize(Parser); 907 908 if (getFeatureBits().none()) { 909 // Set default features. 910 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 911 } 912 913 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 914 915 { 916 // TODO: make those pre-defined variables read-only. 917 // Currently there is none suitable machinery in the core llvm-mc for this. 918 // MCSymbol::isRedefinable is intended for another purpose, and 919 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 920 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 921 MCContext &Ctx = getContext(); 922 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 923 MCSymbol *Sym = 924 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 925 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 926 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 927 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 928 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 929 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 930 } else { 931 MCSymbol *Sym = 932 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 933 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 934 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 935 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 936 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 937 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 938 } 939 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 940 initializeGprCountSymbol(IS_VGPR); 941 initializeGprCountSymbol(IS_SGPR); 942 } else 943 KernelScope.initialize(getContext()); 944 } 945 } 946 947 bool hasXNACK() const { 948 return AMDGPU::hasXNACK(getSTI()); 949 } 950 951 bool hasMIMG_R128() const { 952 return AMDGPU::hasMIMG_R128(getSTI()); 953 } 954 955 bool hasPackedD16() const { 956 return AMDGPU::hasPackedD16(getSTI()); 957 } 958 959 bool isSI() const { 960 return AMDGPU::isSI(getSTI()); 961 } 962 963 bool isCI() const { 964 return AMDGPU::isCI(getSTI()); 965 } 966 967 bool isVI() const { 968 return AMDGPU::isVI(getSTI()); 969 } 970 971 bool isGFX9() const { 972 return AMDGPU::isGFX9(getSTI()); 973 } 974 975 bool hasInv2PiInlineImm() const { 976 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 977 } 978 979 bool hasFlatOffsets() const { 980 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 981 } 982 983 bool hasSGPR102_SGPR103() const { 984 return !isVI(); 985 } 986 987 bool hasIntClamp() const { 988 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 989 } 990 991 AMDGPUTargetStreamer &getTargetStreamer() { 992 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 993 return static_cast<AMDGPUTargetStreamer &>(TS); 994 } 995 996 const MCRegisterInfo *getMRI() const { 997 // We need this const_cast because for some reason getContext() is not const 998 // in MCAsmParser. 999 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1000 } 1001 1002 const MCInstrInfo *getMII() const { 1003 return &MII; 1004 } 1005 1006 const FeatureBitset &getFeatureBits() const { 1007 return getSTI().getFeatureBits(); 1008 } 1009 1010 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1011 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1012 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1013 1014 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1015 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1016 bool isForcedDPP() const { return ForcedDPP; } 1017 bool isForcedSDWA() const { return ForcedSDWA; } 1018 ArrayRef<unsigned> getMatchedVariants() const; 1019 1020 std::unique_ptr<AMDGPUOperand> parseRegister(); 1021 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1022 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1023 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1024 unsigned Kind) override; 1025 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1026 OperandVector &Operands, MCStreamer &Out, 1027 uint64_t &ErrorInfo, 1028 bool MatchingInlineAsm) override; 1029 bool ParseDirective(AsmToken DirectiveID) override; 1030 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1031 StringRef parseMnemonicSuffix(StringRef Name); 1032 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1033 SMLoc NameLoc, OperandVector &Operands) override; 1034 //bool ProcessInstruction(MCInst &Inst); 1035 1036 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1037 1038 OperandMatchResultTy 1039 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1040 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1041 bool (*ConvertResult)(int64_t &) = nullptr); 1042 1043 OperandMatchResultTy parseOperandArrayWithPrefix( 1044 const char *Prefix, 1045 OperandVector &Operands, 1046 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1047 bool (*ConvertResult)(int64_t&) = nullptr); 1048 1049 OperandMatchResultTy 1050 parseNamedBit(const char *Name, OperandVector &Operands, 1051 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1052 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1053 StringRef &Value); 1054 1055 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1056 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1057 OperandMatchResultTy parseReg(OperandVector &Operands); 1058 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1059 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1060 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1061 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1062 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1063 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1064 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1065 1066 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1067 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1068 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1069 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1070 1071 bool parseCnt(int64_t &IntVal); 1072 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1073 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1074 1075 private: 1076 struct OperandInfoTy { 1077 int64_t Id; 1078 bool IsSymbolic = false; 1079 1080 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1081 }; 1082 1083 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1084 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1085 1086 void errorExpTgt(); 1087 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1088 1089 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1090 bool validateSOPLiteral(const MCInst &Inst) const; 1091 bool validateConstantBusLimitations(const MCInst &Inst); 1092 bool validateEarlyClobberLimitations(const MCInst &Inst); 1093 bool validateIntClampSupported(const MCInst &Inst); 1094 bool validateMIMGAtomicDMask(const MCInst &Inst); 1095 bool validateMIMGGatherDMask(const MCInst &Inst); 1096 bool validateMIMGDataSize(const MCInst &Inst); 1097 bool validateMIMGD16(const MCInst &Inst); 1098 bool validateLdsDirect(const MCInst &Inst); 1099 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1100 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1101 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1102 1103 bool trySkipId(const StringRef Id); 1104 bool trySkipToken(const AsmToken::TokenKind Kind); 1105 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1106 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1107 bool parseExpr(int64_t &Imm); 1108 1109 public: 1110 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1111 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1112 1113 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1114 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1115 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1116 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1117 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1118 1119 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1120 const unsigned MinVal, 1121 const unsigned MaxVal, 1122 const StringRef ErrMsg); 1123 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1124 bool parseSwizzleOffset(int64_t &Imm); 1125 bool parseSwizzleMacro(int64_t &Imm); 1126 bool parseSwizzleQuadPerm(int64_t &Imm); 1127 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1128 bool parseSwizzleBroadcast(int64_t &Imm); 1129 bool parseSwizzleSwap(int64_t &Imm); 1130 bool parseSwizzleReverse(int64_t &Imm); 1131 1132 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1133 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1134 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1135 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1136 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1137 1138 AMDGPUOperand::Ptr defaultGLC() const; 1139 AMDGPUOperand::Ptr defaultSLC() const; 1140 1141 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1142 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1143 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1144 AMDGPUOperand::Ptr defaultOffsetU12() const; 1145 AMDGPUOperand::Ptr defaultOffsetS13() const; 1146 1147 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1148 1149 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1150 OptionalImmIndexMap &OptionalIdx); 1151 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1152 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1153 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1154 1155 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1156 1157 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1158 bool IsAtomic = false); 1159 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1160 1161 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1162 AMDGPUOperand::Ptr defaultRowMask() const; 1163 AMDGPUOperand::Ptr defaultBankMask() const; 1164 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1165 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1166 1167 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1168 AMDGPUOperand::ImmTy Type); 1169 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1170 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1171 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1172 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1173 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1174 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1175 uint64_t BasicInstType, bool skipVcc = false); 1176 }; 1177 1178 struct OptionalOperand { 1179 const char *Name; 1180 AMDGPUOperand::ImmTy Type; 1181 bool IsBit; 1182 bool (*ConvertResult)(int64_t&); 1183 }; 1184 1185 } // end anonymous namespace 1186 1187 // May be called with integer type with equivalent bitwidth. 1188 static const fltSemantics *getFltSemantics(unsigned Size) { 1189 switch (Size) { 1190 case 4: 1191 return &APFloat::IEEEsingle(); 1192 case 8: 1193 return &APFloat::IEEEdouble(); 1194 case 2: 1195 return &APFloat::IEEEhalf(); 1196 default: 1197 llvm_unreachable("unsupported fp type"); 1198 } 1199 } 1200 1201 static const fltSemantics *getFltSemantics(MVT VT) { 1202 return getFltSemantics(VT.getSizeInBits() / 8); 1203 } 1204 1205 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1206 switch (OperandType) { 1207 case AMDGPU::OPERAND_REG_IMM_INT32: 1208 case AMDGPU::OPERAND_REG_IMM_FP32: 1209 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1210 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1211 return &APFloat::IEEEsingle(); 1212 case AMDGPU::OPERAND_REG_IMM_INT64: 1213 case AMDGPU::OPERAND_REG_IMM_FP64: 1214 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1215 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1216 return &APFloat::IEEEdouble(); 1217 case AMDGPU::OPERAND_REG_IMM_INT16: 1218 case AMDGPU::OPERAND_REG_IMM_FP16: 1219 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1220 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1221 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1222 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1223 return &APFloat::IEEEhalf(); 1224 default: 1225 llvm_unreachable("unsupported fp type"); 1226 } 1227 } 1228 1229 //===----------------------------------------------------------------------===// 1230 // Operand 1231 //===----------------------------------------------------------------------===// 1232 1233 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1234 bool Lost; 1235 1236 // Convert literal to single precision 1237 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1238 APFloat::rmNearestTiesToEven, 1239 &Lost); 1240 // We allow precision lost but not overflow or underflow 1241 if (Status != APFloat::opOK && 1242 Lost && 1243 ((Status & APFloat::opOverflow) != 0 || 1244 (Status & APFloat::opUnderflow) != 0)) { 1245 return false; 1246 } 1247 1248 return true; 1249 } 1250 1251 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1252 if (!isImmTy(ImmTyNone)) { 1253 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1254 return false; 1255 } 1256 // TODO: We should avoid using host float here. It would be better to 1257 // check the float bit values which is what a few other places do. 1258 // We've had bot failures before due to weird NaN support on mips hosts. 1259 1260 APInt Literal(64, Imm.Val); 1261 1262 if (Imm.IsFPImm) { // We got fp literal token 1263 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1264 return AMDGPU::isInlinableLiteral64(Imm.Val, 1265 AsmParser->hasInv2PiInlineImm()); 1266 } 1267 1268 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1269 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1270 return false; 1271 1272 if (type.getScalarSizeInBits() == 16) { 1273 return AMDGPU::isInlinableLiteral16( 1274 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1275 AsmParser->hasInv2PiInlineImm()); 1276 } 1277 1278 // Check if single precision literal is inlinable 1279 return AMDGPU::isInlinableLiteral32( 1280 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1281 AsmParser->hasInv2PiInlineImm()); 1282 } 1283 1284 // We got int literal token. 1285 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1286 return AMDGPU::isInlinableLiteral64(Imm.Val, 1287 AsmParser->hasInv2PiInlineImm()); 1288 } 1289 1290 if (type.getScalarSizeInBits() == 16) { 1291 return AMDGPU::isInlinableLiteral16( 1292 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1293 AsmParser->hasInv2PiInlineImm()); 1294 } 1295 1296 return AMDGPU::isInlinableLiteral32( 1297 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1298 AsmParser->hasInv2PiInlineImm()); 1299 } 1300 1301 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1302 // Check that this immediate can be added as literal 1303 if (!isImmTy(ImmTyNone)) { 1304 return false; 1305 } 1306 1307 if (!Imm.IsFPImm) { 1308 // We got int literal token. 1309 1310 if (type == MVT::f64 && hasFPModifiers()) { 1311 // Cannot apply fp modifiers to int literals preserving the same semantics 1312 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1313 // disable these cases. 1314 return false; 1315 } 1316 1317 unsigned Size = type.getSizeInBits(); 1318 if (Size == 64) 1319 Size = 32; 1320 1321 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1322 // types. 1323 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1324 } 1325 1326 // We got fp literal token 1327 if (type == MVT::f64) { // Expected 64-bit fp operand 1328 // We would set low 64-bits of literal to zeroes but we accept this literals 1329 return true; 1330 } 1331 1332 if (type == MVT::i64) { // Expected 64-bit int operand 1333 // We don't allow fp literals in 64-bit integer instructions. It is 1334 // unclear how we should encode them. 1335 return false; 1336 } 1337 1338 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1339 return canLosslesslyConvertToFPType(FPLiteral, type); 1340 } 1341 1342 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1343 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1344 } 1345 1346 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1347 if (AsmParser->isVI()) 1348 return isVReg(); 1349 else if (AsmParser->isGFX9()) 1350 return isRegKind() || isInlinableImm(type); 1351 else 1352 return false; 1353 } 1354 1355 bool AMDGPUOperand::isSDWAFP16Operand() const { 1356 return isSDWAOperand(MVT::f16); 1357 } 1358 1359 bool AMDGPUOperand::isSDWAFP32Operand() const { 1360 return isSDWAOperand(MVT::f32); 1361 } 1362 1363 bool AMDGPUOperand::isSDWAInt16Operand() const { 1364 return isSDWAOperand(MVT::i16); 1365 } 1366 1367 bool AMDGPUOperand::isSDWAInt32Operand() const { 1368 return isSDWAOperand(MVT::i32); 1369 } 1370 1371 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1372 { 1373 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1374 assert(Size == 2 || Size == 4 || Size == 8); 1375 1376 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1377 1378 if (Imm.Mods.Abs) { 1379 Val &= ~FpSignMask; 1380 } 1381 if (Imm.Mods.Neg) { 1382 Val ^= FpSignMask; 1383 } 1384 1385 return Val; 1386 } 1387 1388 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1389 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1390 Inst.getNumOperands())) { 1391 addLiteralImmOperand(Inst, Imm.Val, 1392 ApplyModifiers & 1393 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1394 } else { 1395 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1396 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1397 } 1398 } 1399 1400 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1401 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1402 auto OpNum = Inst.getNumOperands(); 1403 // Check that this operand accepts literals 1404 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1405 1406 if (ApplyModifiers) { 1407 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1408 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1409 Val = applyInputFPModifiers(Val, Size); 1410 } 1411 1412 APInt Literal(64, Val); 1413 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1414 1415 if (Imm.IsFPImm) { // We got fp literal token 1416 switch (OpTy) { 1417 case AMDGPU::OPERAND_REG_IMM_INT64: 1418 case AMDGPU::OPERAND_REG_IMM_FP64: 1419 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1420 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1421 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1422 AsmParser->hasInv2PiInlineImm())) { 1423 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1424 return; 1425 } 1426 1427 // Non-inlineable 1428 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1429 // For fp operands we check if low 32 bits are zeros 1430 if (Literal.getLoBits(32) != 0) { 1431 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1432 "Can't encode literal as exact 64-bit floating-point operand. " 1433 "Low 32-bits will be set to zero"); 1434 } 1435 1436 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1437 return; 1438 } 1439 1440 // We don't allow fp literals in 64-bit integer instructions. It is 1441 // unclear how we should encode them. This case should be checked earlier 1442 // in predicate methods (isLiteralImm()) 1443 llvm_unreachable("fp literal in 64-bit integer instruction."); 1444 1445 case AMDGPU::OPERAND_REG_IMM_INT32: 1446 case AMDGPU::OPERAND_REG_IMM_FP32: 1447 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1448 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1449 case AMDGPU::OPERAND_REG_IMM_INT16: 1450 case AMDGPU::OPERAND_REG_IMM_FP16: 1451 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1452 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1453 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1454 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1455 bool lost; 1456 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1457 // Convert literal to single precision 1458 FPLiteral.convert(*getOpFltSemantics(OpTy), 1459 APFloat::rmNearestTiesToEven, &lost); 1460 // We allow precision lost but not overflow or underflow. This should be 1461 // checked earlier in isLiteralImm() 1462 1463 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1464 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1465 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1466 ImmVal |= (ImmVal << 16); 1467 } 1468 1469 Inst.addOperand(MCOperand::createImm(ImmVal)); 1470 return; 1471 } 1472 default: 1473 llvm_unreachable("invalid operand size"); 1474 } 1475 1476 return; 1477 } 1478 1479 // We got int literal token. 1480 // Only sign extend inline immediates. 1481 // FIXME: No errors on truncation 1482 switch (OpTy) { 1483 case AMDGPU::OPERAND_REG_IMM_INT32: 1484 case AMDGPU::OPERAND_REG_IMM_FP32: 1485 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1486 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1487 if (isInt<32>(Val) && 1488 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1489 AsmParser->hasInv2PiInlineImm())) { 1490 Inst.addOperand(MCOperand::createImm(Val)); 1491 return; 1492 } 1493 1494 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1495 return; 1496 1497 case AMDGPU::OPERAND_REG_IMM_INT64: 1498 case AMDGPU::OPERAND_REG_IMM_FP64: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1501 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1502 Inst.addOperand(MCOperand::createImm(Val)); 1503 return; 1504 } 1505 1506 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1507 return; 1508 1509 case AMDGPU::OPERAND_REG_IMM_INT16: 1510 case AMDGPU::OPERAND_REG_IMM_FP16: 1511 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1512 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1513 if (isInt<16>(Val) && 1514 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1515 AsmParser->hasInv2PiInlineImm())) { 1516 Inst.addOperand(MCOperand::createImm(Val)); 1517 return; 1518 } 1519 1520 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1521 return; 1522 1523 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1524 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1525 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1526 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1527 AsmParser->hasInv2PiInlineImm())); 1528 1529 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1530 static_cast<uint32_t>(LiteralVal); 1531 Inst.addOperand(MCOperand::createImm(ImmVal)); 1532 return; 1533 } 1534 default: 1535 llvm_unreachable("invalid operand size"); 1536 } 1537 } 1538 1539 template <unsigned Bitwidth> 1540 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1541 APInt Literal(64, Imm.Val); 1542 1543 if (!Imm.IsFPImm) { 1544 // We got int literal token. 1545 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1546 return; 1547 } 1548 1549 bool Lost; 1550 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1551 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1552 APFloat::rmNearestTiesToEven, &Lost); 1553 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1554 } 1555 1556 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1557 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1558 } 1559 1560 //===----------------------------------------------------------------------===// 1561 // AsmParser 1562 //===----------------------------------------------------------------------===// 1563 1564 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1565 if (Is == IS_VGPR) { 1566 switch (RegWidth) { 1567 default: return -1; 1568 case 1: return AMDGPU::VGPR_32RegClassID; 1569 case 2: return AMDGPU::VReg_64RegClassID; 1570 case 3: return AMDGPU::VReg_96RegClassID; 1571 case 4: return AMDGPU::VReg_128RegClassID; 1572 case 8: return AMDGPU::VReg_256RegClassID; 1573 case 16: return AMDGPU::VReg_512RegClassID; 1574 } 1575 } else if (Is == IS_TTMP) { 1576 switch (RegWidth) { 1577 default: return -1; 1578 case 1: return AMDGPU::TTMP_32RegClassID; 1579 case 2: return AMDGPU::TTMP_64RegClassID; 1580 case 4: return AMDGPU::TTMP_128RegClassID; 1581 case 8: return AMDGPU::TTMP_256RegClassID; 1582 case 16: return AMDGPU::TTMP_512RegClassID; 1583 } 1584 } else if (Is == IS_SGPR) { 1585 switch (RegWidth) { 1586 default: return -1; 1587 case 1: return AMDGPU::SGPR_32RegClassID; 1588 case 2: return AMDGPU::SGPR_64RegClassID; 1589 case 4: return AMDGPU::SGPR_128RegClassID; 1590 case 8: return AMDGPU::SGPR_256RegClassID; 1591 case 16: return AMDGPU::SGPR_512RegClassID; 1592 } 1593 } 1594 return -1; 1595 } 1596 1597 static unsigned getSpecialRegForName(StringRef RegName) { 1598 return StringSwitch<unsigned>(RegName) 1599 .Case("exec", AMDGPU::EXEC) 1600 .Case("vcc", AMDGPU::VCC) 1601 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1602 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1603 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1604 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1605 .Case("m0", AMDGPU::M0) 1606 .Case("scc", AMDGPU::SCC) 1607 .Case("tba", AMDGPU::TBA) 1608 .Case("tma", AMDGPU::TMA) 1609 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1610 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1611 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1612 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1613 .Case("vcc_lo", AMDGPU::VCC_LO) 1614 .Case("vcc_hi", AMDGPU::VCC_HI) 1615 .Case("exec_lo", AMDGPU::EXEC_LO) 1616 .Case("exec_hi", AMDGPU::EXEC_HI) 1617 .Case("tma_lo", AMDGPU::TMA_LO) 1618 .Case("tma_hi", AMDGPU::TMA_HI) 1619 .Case("tba_lo", AMDGPU::TBA_LO) 1620 .Case("tba_hi", AMDGPU::TBA_HI) 1621 .Default(0); 1622 } 1623 1624 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1625 SMLoc &EndLoc) { 1626 auto R = parseRegister(); 1627 if (!R) return true; 1628 assert(R->isReg()); 1629 RegNo = R->getReg(); 1630 StartLoc = R->getStartLoc(); 1631 EndLoc = R->getEndLoc(); 1632 return false; 1633 } 1634 1635 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1636 RegisterKind RegKind, unsigned Reg1, 1637 unsigned RegNum) { 1638 switch (RegKind) { 1639 case IS_SPECIAL: 1640 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1641 Reg = AMDGPU::EXEC; 1642 RegWidth = 2; 1643 return true; 1644 } 1645 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1646 Reg = AMDGPU::FLAT_SCR; 1647 RegWidth = 2; 1648 return true; 1649 } 1650 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1651 Reg = AMDGPU::XNACK_MASK; 1652 RegWidth = 2; 1653 return true; 1654 } 1655 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1656 Reg = AMDGPU::VCC; 1657 RegWidth = 2; 1658 return true; 1659 } 1660 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1661 Reg = AMDGPU::TBA; 1662 RegWidth = 2; 1663 return true; 1664 } 1665 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1666 Reg = AMDGPU::TMA; 1667 RegWidth = 2; 1668 return true; 1669 } 1670 return false; 1671 case IS_VGPR: 1672 case IS_SGPR: 1673 case IS_TTMP: 1674 if (Reg1 != Reg + RegWidth) { 1675 return false; 1676 } 1677 RegWidth++; 1678 return true; 1679 default: 1680 llvm_unreachable("unexpected register kind"); 1681 } 1682 } 1683 1684 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1685 unsigned &RegNum, unsigned &RegWidth, 1686 unsigned *DwordRegIndex) { 1687 if (DwordRegIndex) { *DwordRegIndex = 0; } 1688 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1689 if (getLexer().is(AsmToken::Identifier)) { 1690 StringRef RegName = Parser.getTok().getString(); 1691 if ((Reg = getSpecialRegForName(RegName))) { 1692 Parser.Lex(); 1693 RegKind = IS_SPECIAL; 1694 } else { 1695 unsigned RegNumIndex = 0; 1696 if (RegName[0] == 'v') { 1697 RegNumIndex = 1; 1698 RegKind = IS_VGPR; 1699 } else if (RegName[0] == 's') { 1700 RegNumIndex = 1; 1701 RegKind = IS_SGPR; 1702 } else if (RegName.startswith("ttmp")) { 1703 RegNumIndex = strlen("ttmp"); 1704 RegKind = IS_TTMP; 1705 } else { 1706 return false; 1707 } 1708 if (RegName.size() > RegNumIndex) { 1709 // Single 32-bit register: vXX. 1710 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1711 return false; 1712 Parser.Lex(); 1713 RegWidth = 1; 1714 } else { 1715 // Range of registers: v[XX:YY]. ":YY" is optional. 1716 Parser.Lex(); 1717 int64_t RegLo, RegHi; 1718 if (getLexer().isNot(AsmToken::LBrac)) 1719 return false; 1720 Parser.Lex(); 1721 1722 if (getParser().parseAbsoluteExpression(RegLo)) 1723 return false; 1724 1725 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1726 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1727 return false; 1728 Parser.Lex(); 1729 1730 if (isRBrace) { 1731 RegHi = RegLo; 1732 } else { 1733 if (getParser().parseAbsoluteExpression(RegHi)) 1734 return false; 1735 1736 if (getLexer().isNot(AsmToken::RBrac)) 1737 return false; 1738 Parser.Lex(); 1739 } 1740 RegNum = (unsigned) RegLo; 1741 RegWidth = (RegHi - RegLo) + 1; 1742 } 1743 } 1744 } else if (getLexer().is(AsmToken::LBrac)) { 1745 // List of consecutive registers: [s0,s1,s2,s3] 1746 Parser.Lex(); 1747 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1748 return false; 1749 if (RegWidth != 1) 1750 return false; 1751 RegisterKind RegKind1; 1752 unsigned Reg1, RegNum1, RegWidth1; 1753 do { 1754 if (getLexer().is(AsmToken::Comma)) { 1755 Parser.Lex(); 1756 } else if (getLexer().is(AsmToken::RBrac)) { 1757 Parser.Lex(); 1758 break; 1759 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1760 if (RegWidth1 != 1) { 1761 return false; 1762 } 1763 if (RegKind1 != RegKind) { 1764 return false; 1765 } 1766 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1767 return false; 1768 } 1769 } else { 1770 return false; 1771 } 1772 } while (true); 1773 } else { 1774 return false; 1775 } 1776 switch (RegKind) { 1777 case IS_SPECIAL: 1778 RegNum = 0; 1779 RegWidth = 1; 1780 break; 1781 case IS_VGPR: 1782 case IS_SGPR: 1783 case IS_TTMP: 1784 { 1785 unsigned Size = 1; 1786 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1787 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1788 Size = std::min(RegWidth, 4u); 1789 } 1790 if (RegNum % Size != 0) 1791 return false; 1792 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1793 RegNum = RegNum / Size; 1794 int RCID = getRegClass(RegKind, RegWidth); 1795 if (RCID == -1) 1796 return false; 1797 const MCRegisterClass RC = TRI->getRegClass(RCID); 1798 if (RegNum >= RC.getNumRegs()) 1799 return false; 1800 Reg = RC.getRegister(RegNum); 1801 break; 1802 } 1803 1804 default: 1805 llvm_unreachable("unexpected register kind"); 1806 } 1807 1808 if (!subtargetHasRegister(*TRI, Reg)) 1809 return false; 1810 return true; 1811 } 1812 1813 Optional<StringRef> 1814 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1815 switch (RegKind) { 1816 case IS_VGPR: 1817 return StringRef(".amdgcn.next_free_vgpr"); 1818 case IS_SGPR: 1819 return StringRef(".amdgcn.next_free_sgpr"); 1820 default: 1821 return None; 1822 } 1823 } 1824 1825 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1826 auto SymbolName = getGprCountSymbolName(RegKind); 1827 assert(SymbolName && "initializing invalid register kind"); 1828 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1829 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1830 } 1831 1832 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1833 unsigned DwordRegIndex, 1834 unsigned RegWidth) { 1835 // Symbols are only defined for GCN targets 1836 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1837 return true; 1838 1839 auto SymbolName = getGprCountSymbolName(RegKind); 1840 if (!SymbolName) 1841 return true; 1842 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1843 1844 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1845 int64_t OldCount; 1846 1847 if (!Sym->isVariable()) 1848 return !Error(getParser().getTok().getLoc(), 1849 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1850 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1851 return !Error( 1852 getParser().getTok().getLoc(), 1853 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1854 1855 if (OldCount <= NewMax) 1856 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1857 1858 return true; 1859 } 1860 1861 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1862 const auto &Tok = Parser.getTok(); 1863 SMLoc StartLoc = Tok.getLoc(); 1864 SMLoc EndLoc = Tok.getEndLoc(); 1865 RegisterKind RegKind; 1866 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1867 1868 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1869 return nullptr; 1870 } 1871 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1872 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1873 return nullptr; 1874 } else 1875 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1876 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1877 } 1878 1879 bool 1880 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1881 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1882 (getLexer().getKind() == AsmToken::Integer || 1883 getLexer().getKind() == AsmToken::Real)) { 1884 // This is a workaround for handling operands like these: 1885 // |1.0| 1886 // |-1| 1887 // This syntax is not compatible with syntax of standard 1888 // MC expressions (due to the trailing '|'). 1889 1890 SMLoc EndLoc; 1891 const MCExpr *Expr; 1892 1893 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1894 return true; 1895 } 1896 1897 return !Expr->evaluateAsAbsolute(Val); 1898 } 1899 1900 return getParser().parseAbsoluteExpression(Val); 1901 } 1902 1903 OperandMatchResultTy 1904 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1905 // TODO: add syntactic sugar for 1/(2*PI) 1906 bool Minus = false; 1907 if (getLexer().getKind() == AsmToken::Minus) { 1908 const AsmToken NextToken = getLexer().peekTok(); 1909 if (!NextToken.is(AsmToken::Integer) && 1910 !NextToken.is(AsmToken::Real)) { 1911 return MatchOperand_NoMatch; 1912 } 1913 Minus = true; 1914 Parser.Lex(); 1915 } 1916 1917 SMLoc S = Parser.getTok().getLoc(); 1918 switch(getLexer().getKind()) { 1919 case AsmToken::Integer: { 1920 int64_t IntVal; 1921 if (parseAbsoluteExpr(IntVal, AbsMod)) 1922 return MatchOperand_ParseFail; 1923 if (Minus) 1924 IntVal *= -1; 1925 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1926 return MatchOperand_Success; 1927 } 1928 case AsmToken::Real: { 1929 int64_t IntVal; 1930 if (parseAbsoluteExpr(IntVal, AbsMod)) 1931 return MatchOperand_ParseFail; 1932 1933 APFloat F(BitsToDouble(IntVal)); 1934 if (Minus) 1935 F.changeSign(); 1936 Operands.push_back( 1937 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1938 AMDGPUOperand::ImmTyNone, true)); 1939 return MatchOperand_Success; 1940 } 1941 default: 1942 return MatchOperand_NoMatch; 1943 } 1944 } 1945 1946 OperandMatchResultTy 1947 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1948 if (auto R = parseRegister()) { 1949 assert(R->isReg()); 1950 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1951 Operands.push_back(std::move(R)); 1952 return MatchOperand_Success; 1953 } 1954 return MatchOperand_NoMatch; 1955 } 1956 1957 OperandMatchResultTy 1958 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1959 auto res = parseImm(Operands, AbsMod); 1960 if (res != MatchOperand_NoMatch) { 1961 return res; 1962 } 1963 1964 return parseReg(Operands); 1965 } 1966 1967 OperandMatchResultTy 1968 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1969 bool AllowImm) { 1970 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1971 1972 if (getLexer().getKind()== AsmToken::Minus) { 1973 const AsmToken NextToken = getLexer().peekTok(); 1974 1975 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1976 if (NextToken.is(AsmToken::Minus)) { 1977 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1978 return MatchOperand_ParseFail; 1979 } 1980 1981 // '-' followed by an integer literal N should be interpreted as integer 1982 // negation rather than a floating-point NEG modifier applied to N. 1983 // Beside being contr-intuitive, such use of floating-point NEG modifier 1984 // results in different meaning of integer literals used with VOP1/2/C 1985 // and VOP3, for example: 1986 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1987 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1988 // Negative fp literals should be handled likewise for unifomtity 1989 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1990 Parser.Lex(); 1991 Negate = true; 1992 } 1993 } 1994 1995 if (getLexer().getKind() == AsmToken::Identifier && 1996 Parser.getTok().getString() == "neg") { 1997 if (Negate) { 1998 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1999 return MatchOperand_ParseFail; 2000 } 2001 Parser.Lex(); 2002 Negate2 = true; 2003 if (getLexer().isNot(AsmToken::LParen)) { 2004 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2005 return MatchOperand_ParseFail; 2006 } 2007 Parser.Lex(); 2008 } 2009 2010 if (getLexer().getKind() == AsmToken::Identifier && 2011 Parser.getTok().getString() == "abs") { 2012 Parser.Lex(); 2013 Abs2 = true; 2014 if (getLexer().isNot(AsmToken::LParen)) { 2015 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2016 return MatchOperand_ParseFail; 2017 } 2018 Parser.Lex(); 2019 } 2020 2021 if (getLexer().getKind() == AsmToken::Pipe) { 2022 if (Abs2) { 2023 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2024 return MatchOperand_ParseFail; 2025 } 2026 Parser.Lex(); 2027 Abs = true; 2028 } 2029 2030 OperandMatchResultTy Res; 2031 if (AllowImm) { 2032 Res = parseRegOrImm(Operands, Abs); 2033 } else { 2034 Res = parseReg(Operands); 2035 } 2036 if (Res != MatchOperand_Success) { 2037 return Res; 2038 } 2039 2040 AMDGPUOperand::Modifiers Mods; 2041 if (Abs) { 2042 if (getLexer().getKind() != AsmToken::Pipe) { 2043 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2044 return MatchOperand_ParseFail; 2045 } 2046 Parser.Lex(); 2047 Mods.Abs = true; 2048 } 2049 if (Abs2) { 2050 if (getLexer().isNot(AsmToken::RParen)) { 2051 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2052 return MatchOperand_ParseFail; 2053 } 2054 Parser.Lex(); 2055 Mods.Abs = true; 2056 } 2057 2058 if (Negate) { 2059 Mods.Neg = true; 2060 } else if (Negate2) { 2061 if (getLexer().isNot(AsmToken::RParen)) { 2062 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2063 return MatchOperand_ParseFail; 2064 } 2065 Parser.Lex(); 2066 Mods.Neg = true; 2067 } 2068 2069 if (Mods.hasFPModifiers()) { 2070 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2071 Op.setModifiers(Mods); 2072 } 2073 return MatchOperand_Success; 2074 } 2075 2076 OperandMatchResultTy 2077 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2078 bool AllowImm) { 2079 bool Sext = false; 2080 2081 if (getLexer().getKind() == AsmToken::Identifier && 2082 Parser.getTok().getString() == "sext") { 2083 Parser.Lex(); 2084 Sext = true; 2085 if (getLexer().isNot(AsmToken::LParen)) { 2086 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2087 return MatchOperand_ParseFail; 2088 } 2089 Parser.Lex(); 2090 } 2091 2092 OperandMatchResultTy Res; 2093 if (AllowImm) { 2094 Res = parseRegOrImm(Operands); 2095 } else { 2096 Res = parseReg(Operands); 2097 } 2098 if (Res != MatchOperand_Success) { 2099 return Res; 2100 } 2101 2102 AMDGPUOperand::Modifiers Mods; 2103 if (Sext) { 2104 if (getLexer().isNot(AsmToken::RParen)) { 2105 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2106 return MatchOperand_ParseFail; 2107 } 2108 Parser.Lex(); 2109 Mods.Sext = true; 2110 } 2111 2112 if (Mods.hasIntModifiers()) { 2113 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2114 Op.setModifiers(Mods); 2115 } 2116 2117 return MatchOperand_Success; 2118 } 2119 2120 OperandMatchResultTy 2121 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2122 return parseRegOrImmWithFPInputMods(Operands, false); 2123 } 2124 2125 OperandMatchResultTy 2126 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2127 return parseRegOrImmWithIntInputMods(Operands, false); 2128 } 2129 2130 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2131 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2132 if (Reg) { 2133 Operands.push_back(std::move(Reg)); 2134 return MatchOperand_Success; 2135 } 2136 2137 const AsmToken &Tok = Parser.getTok(); 2138 if (Tok.getString() == "off") { 2139 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2140 AMDGPUOperand::ImmTyOff, false)); 2141 Parser.Lex(); 2142 return MatchOperand_Success; 2143 } 2144 2145 return MatchOperand_NoMatch; 2146 } 2147 2148 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2149 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2150 2151 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2152 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2153 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2154 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2155 return Match_InvalidOperand; 2156 2157 if ((TSFlags & SIInstrFlags::VOP3) && 2158 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2159 getForcedEncodingSize() != 64) 2160 return Match_PreferE32; 2161 2162 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2163 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2164 // v_mac_f32/16 allow only dst_sel == DWORD; 2165 auto OpNum = 2166 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2167 const auto &Op = Inst.getOperand(OpNum); 2168 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2169 return Match_InvalidOperand; 2170 } 2171 } 2172 2173 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2174 // FIXME: Produces error without correct column reported. 2175 auto OpNum = 2176 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2177 const auto &Op = Inst.getOperand(OpNum); 2178 if (Op.getImm() != 0) 2179 return Match_InvalidOperand; 2180 } 2181 2182 return Match_Success; 2183 } 2184 2185 // What asm variants we should check 2186 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2187 if (getForcedEncodingSize() == 32) { 2188 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2189 return makeArrayRef(Variants); 2190 } 2191 2192 if (isForcedVOP3()) { 2193 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2194 return makeArrayRef(Variants); 2195 } 2196 2197 if (isForcedSDWA()) { 2198 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2199 AMDGPUAsmVariants::SDWA9}; 2200 return makeArrayRef(Variants); 2201 } 2202 2203 if (isForcedDPP()) { 2204 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2205 return makeArrayRef(Variants); 2206 } 2207 2208 static const unsigned Variants[] = { 2209 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2210 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2211 }; 2212 2213 return makeArrayRef(Variants); 2214 } 2215 2216 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2217 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2218 const unsigned Num = Desc.getNumImplicitUses(); 2219 for (unsigned i = 0; i < Num; ++i) { 2220 unsigned Reg = Desc.ImplicitUses[i]; 2221 switch (Reg) { 2222 case AMDGPU::FLAT_SCR: 2223 case AMDGPU::VCC: 2224 case AMDGPU::M0: 2225 return Reg; 2226 default: 2227 break; 2228 } 2229 } 2230 return AMDGPU::NoRegister; 2231 } 2232 2233 // NB: This code is correct only when used to check constant 2234 // bus limitations because GFX7 support no f16 inline constants. 2235 // Note that there are no cases when a GFX7 opcode violates 2236 // constant bus limitations due to the use of an f16 constant. 2237 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2238 unsigned OpIdx) const { 2239 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2240 2241 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2242 return false; 2243 } 2244 2245 const MCOperand &MO = Inst.getOperand(OpIdx); 2246 2247 int64_t Val = MO.getImm(); 2248 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2249 2250 switch (OpSize) { // expected operand size 2251 case 8: 2252 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2253 case 4: 2254 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2255 case 2: { 2256 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2257 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2258 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2259 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2260 } else { 2261 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2262 } 2263 } 2264 default: 2265 llvm_unreachable("invalid operand size"); 2266 } 2267 } 2268 2269 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2270 const MCOperand &MO = Inst.getOperand(OpIdx); 2271 if (MO.isImm()) { 2272 return !isInlineConstant(Inst, OpIdx); 2273 } 2274 return !MO.isReg() || 2275 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2276 } 2277 2278 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2279 const unsigned Opcode = Inst.getOpcode(); 2280 const MCInstrDesc &Desc = MII.get(Opcode); 2281 unsigned ConstantBusUseCount = 0; 2282 2283 if (Desc.TSFlags & 2284 (SIInstrFlags::VOPC | 2285 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2286 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2287 SIInstrFlags::SDWA)) { 2288 // Check special imm operands (used by madmk, etc) 2289 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2290 ++ConstantBusUseCount; 2291 } 2292 2293 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2294 if (SGPRUsed != AMDGPU::NoRegister) { 2295 ++ConstantBusUseCount; 2296 } 2297 2298 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2299 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2300 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2301 2302 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2303 2304 for (int OpIdx : OpIndices) { 2305 if (OpIdx == -1) break; 2306 2307 const MCOperand &MO = Inst.getOperand(OpIdx); 2308 if (usesConstantBus(Inst, OpIdx)) { 2309 if (MO.isReg()) { 2310 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2311 // Pairs of registers with a partial intersections like these 2312 // s0, s[0:1] 2313 // flat_scratch_lo, flat_scratch 2314 // flat_scratch_lo, flat_scratch_hi 2315 // are theoretically valid but they are disabled anyway. 2316 // Note that this code mimics SIInstrInfo::verifyInstruction 2317 if (Reg != SGPRUsed) { 2318 ++ConstantBusUseCount; 2319 } 2320 SGPRUsed = Reg; 2321 } else { // Expression or a literal 2322 ++ConstantBusUseCount; 2323 } 2324 } 2325 } 2326 } 2327 2328 return ConstantBusUseCount <= 1; 2329 } 2330 2331 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2332 const unsigned Opcode = Inst.getOpcode(); 2333 const MCInstrDesc &Desc = MII.get(Opcode); 2334 2335 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2336 if (DstIdx == -1 || 2337 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2338 return true; 2339 } 2340 2341 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2342 2343 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2344 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2345 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2346 2347 assert(DstIdx != -1); 2348 const MCOperand &Dst = Inst.getOperand(DstIdx); 2349 assert(Dst.isReg()); 2350 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2351 2352 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2353 2354 for (int SrcIdx : SrcIndices) { 2355 if (SrcIdx == -1) break; 2356 const MCOperand &Src = Inst.getOperand(SrcIdx); 2357 if (Src.isReg()) { 2358 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2359 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2360 return false; 2361 } 2362 } 2363 } 2364 2365 return true; 2366 } 2367 2368 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2369 2370 const unsigned Opc = Inst.getOpcode(); 2371 const MCInstrDesc &Desc = MII.get(Opc); 2372 2373 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2374 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2375 assert(ClampIdx != -1); 2376 return Inst.getOperand(ClampIdx).getImm() == 0; 2377 } 2378 2379 return true; 2380 } 2381 2382 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2383 2384 const unsigned Opc = Inst.getOpcode(); 2385 const MCInstrDesc &Desc = MII.get(Opc); 2386 2387 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2388 return true; 2389 2390 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2391 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2392 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2393 2394 assert(VDataIdx != -1); 2395 assert(DMaskIdx != -1); 2396 assert(TFEIdx != -1); 2397 2398 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2399 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2400 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2401 if (DMask == 0) 2402 DMask = 1; 2403 2404 unsigned DataSize = 2405 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2406 if (hasPackedD16()) { 2407 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2408 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2409 DataSize = (DataSize + 1) / 2; 2410 } 2411 2412 return (VDataSize / 4) == DataSize + TFESize; 2413 } 2414 2415 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2416 2417 const unsigned Opc = Inst.getOpcode(); 2418 const MCInstrDesc &Desc = MII.get(Opc); 2419 2420 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2421 return true; 2422 if (!Desc.mayLoad() || !Desc.mayStore()) 2423 return true; // Not atomic 2424 2425 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2426 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2427 2428 // This is an incomplete check because image_atomic_cmpswap 2429 // may only use 0x3 and 0xf while other atomic operations 2430 // may use 0x1 and 0x3. However these limitations are 2431 // verified when we check that dmask matches dst size. 2432 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2433 } 2434 2435 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2436 2437 const unsigned Opc = Inst.getOpcode(); 2438 const MCInstrDesc &Desc = MII.get(Opc); 2439 2440 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2441 return true; 2442 2443 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2444 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2445 2446 // GATHER4 instructions use dmask in a different fashion compared to 2447 // other MIMG instructions. The only useful DMASK values are 2448 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2449 // (red,red,red,red) etc.) The ISA document doesn't mention 2450 // this. 2451 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2452 } 2453 2454 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2455 2456 const unsigned Opc = Inst.getOpcode(); 2457 const MCInstrDesc &Desc = MII.get(Opc); 2458 2459 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2460 return true; 2461 2462 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2463 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2464 if (isCI() || isSI()) 2465 return false; 2466 } 2467 2468 return true; 2469 } 2470 2471 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2472 2473 using namespace SIInstrFlags; 2474 const unsigned Opcode = Inst.getOpcode(); 2475 const MCInstrDesc &Desc = MII.get(Opcode); 2476 2477 // lds_direct register is defined so that it can be used 2478 // with 9-bit operands only. Ignore encodings which do not accept these. 2479 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2480 return true; 2481 2482 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2483 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2484 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2485 2486 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2487 2488 // lds_direct cannot be specified as either src1 or src2. 2489 for (int SrcIdx : SrcIndices) { 2490 if (SrcIdx == -1) break; 2491 const MCOperand &Src = Inst.getOperand(SrcIdx); 2492 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2493 return false; 2494 } 2495 } 2496 2497 if (Src0Idx == -1) 2498 return true; 2499 2500 const MCOperand &Src = Inst.getOperand(Src0Idx); 2501 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2502 return true; 2503 2504 // lds_direct is specified as src0. Check additional limitations. 2505 2506 // FIXME: This is a workaround for bug 37943 2507 // which allows 64-bit VOP3 opcodes use 32-bit operands. 2508 if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4) 2509 return false; 2510 2511 // Documentation does not disable lds_direct for SDWA, but SP3 assembler does. 2512 // FIXME: This inconsistence needs to be investigated further. 2513 if (Desc.TSFlags & SIInstrFlags::SDWA) 2514 return false; 2515 2516 // The following opcodes do not accept lds_direct which is explicitly stated 2517 // in AMD documentation. However SP3 disables lds_direct for most other 'rev' 2518 // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32). 2519 // FIXME: This inconsistence needs to be investigated further. 2520 switch (Opcode) { 2521 case AMDGPU::V_LSHLREV_B32_e32_si: 2522 case AMDGPU::V_LSHLREV_B32_e64_si: 2523 case AMDGPU::V_LSHLREV_B16_e32_vi: 2524 case AMDGPU::V_LSHLREV_B16_e64_vi: 2525 case AMDGPU::V_LSHLREV_B32_e32_vi: 2526 case AMDGPU::V_LSHLREV_B32_e64_vi: 2527 case AMDGPU::V_LSHLREV_B64_vi: 2528 case AMDGPU::V_LSHRREV_B32_e32_si: 2529 case AMDGPU::V_LSHRREV_B32_e64_si: 2530 case AMDGPU::V_LSHRREV_B16_e32_vi: 2531 case AMDGPU::V_LSHRREV_B16_e64_vi: 2532 case AMDGPU::V_LSHRREV_B32_e32_vi: 2533 case AMDGPU::V_LSHRREV_B32_e64_vi: 2534 case AMDGPU::V_LSHRREV_B64_vi: 2535 case AMDGPU::V_ASHRREV_I32_e64_si: 2536 case AMDGPU::V_ASHRREV_I32_e32_si: 2537 case AMDGPU::V_ASHRREV_I16_e32_vi: 2538 case AMDGPU::V_ASHRREV_I16_e64_vi: 2539 case AMDGPU::V_ASHRREV_I32_e32_vi: 2540 case AMDGPU::V_ASHRREV_I32_e64_vi: 2541 case AMDGPU::V_ASHRREV_I64_vi: 2542 case AMDGPU::V_PK_LSHLREV_B16_vi: 2543 case AMDGPU::V_PK_LSHRREV_B16_vi: 2544 case AMDGPU::V_PK_ASHRREV_I16_vi: 2545 return false; 2546 default: 2547 return true; 2548 } 2549 } 2550 2551 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2552 unsigned Opcode = Inst.getOpcode(); 2553 const MCInstrDesc &Desc = MII.get(Opcode); 2554 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2555 return true; 2556 2557 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2558 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2559 2560 const int OpIndices[] = { Src0Idx, Src1Idx }; 2561 2562 unsigned NumLiterals = 0; 2563 uint32_t LiteralValue; 2564 2565 for (int OpIdx : OpIndices) { 2566 if (OpIdx == -1) break; 2567 2568 const MCOperand &MO = Inst.getOperand(OpIdx); 2569 if (MO.isImm() && 2570 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2571 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2572 !isInlineConstant(Inst, OpIdx)) { 2573 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2574 if (NumLiterals == 0 || LiteralValue != Value) { 2575 LiteralValue = Value; 2576 ++NumLiterals; 2577 } 2578 } 2579 } 2580 2581 return NumLiterals <= 1; 2582 } 2583 2584 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2585 const SMLoc &IDLoc) { 2586 if (!validateLdsDirect(Inst)) { 2587 Error(IDLoc, 2588 "invalid use of lds_direct"); 2589 return false; 2590 } 2591 if (!validateSOPLiteral(Inst)) { 2592 Error(IDLoc, 2593 "only one literal operand is allowed"); 2594 return false; 2595 } 2596 if (!validateConstantBusLimitations(Inst)) { 2597 Error(IDLoc, 2598 "invalid operand (violates constant bus restrictions)"); 2599 return false; 2600 } 2601 if (!validateEarlyClobberLimitations(Inst)) { 2602 Error(IDLoc, 2603 "destination must be different than all sources"); 2604 return false; 2605 } 2606 if (!validateIntClampSupported(Inst)) { 2607 Error(IDLoc, 2608 "integer clamping is not supported on this GPU"); 2609 return false; 2610 } 2611 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2612 if (!validateMIMGD16(Inst)) { 2613 Error(IDLoc, 2614 "d16 modifier is not supported on this GPU"); 2615 return false; 2616 } 2617 if (!validateMIMGDataSize(Inst)) { 2618 Error(IDLoc, 2619 "image data size does not match dmask and tfe"); 2620 return false; 2621 } 2622 if (!validateMIMGAtomicDMask(Inst)) { 2623 Error(IDLoc, 2624 "invalid atomic image dmask"); 2625 return false; 2626 } 2627 if (!validateMIMGGatherDMask(Inst)) { 2628 Error(IDLoc, 2629 "invalid image_gather dmask: only one bit must be set"); 2630 return false; 2631 } 2632 2633 return true; 2634 } 2635 2636 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2637 unsigned VariantID = 0); 2638 2639 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2640 OperandVector &Operands, 2641 MCStreamer &Out, 2642 uint64_t &ErrorInfo, 2643 bool MatchingInlineAsm) { 2644 MCInst Inst; 2645 unsigned Result = Match_Success; 2646 for (auto Variant : getMatchedVariants()) { 2647 uint64_t EI; 2648 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2649 Variant); 2650 // We order match statuses from least to most specific. We use most specific 2651 // status as resulting 2652 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2653 if ((R == Match_Success) || 2654 (R == Match_PreferE32) || 2655 (R == Match_MissingFeature && Result != Match_PreferE32) || 2656 (R == Match_InvalidOperand && Result != Match_MissingFeature 2657 && Result != Match_PreferE32) || 2658 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2659 && Result != Match_MissingFeature 2660 && Result != Match_PreferE32)) { 2661 Result = R; 2662 ErrorInfo = EI; 2663 } 2664 if (R == Match_Success) 2665 break; 2666 } 2667 2668 switch (Result) { 2669 default: break; 2670 case Match_Success: 2671 if (!validateInstruction(Inst, IDLoc)) { 2672 return true; 2673 } 2674 Inst.setLoc(IDLoc); 2675 Out.EmitInstruction(Inst, getSTI()); 2676 return false; 2677 2678 case Match_MissingFeature: 2679 return Error(IDLoc, "instruction not supported on this GPU"); 2680 2681 case Match_MnemonicFail: { 2682 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2683 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2684 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2685 return Error(IDLoc, "invalid instruction" + Suggestion, 2686 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2687 } 2688 2689 case Match_InvalidOperand: { 2690 SMLoc ErrorLoc = IDLoc; 2691 if (ErrorInfo != ~0ULL) { 2692 if (ErrorInfo >= Operands.size()) { 2693 return Error(IDLoc, "too few operands for instruction"); 2694 } 2695 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2696 if (ErrorLoc == SMLoc()) 2697 ErrorLoc = IDLoc; 2698 } 2699 return Error(ErrorLoc, "invalid operand for instruction"); 2700 } 2701 2702 case Match_PreferE32: 2703 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2704 "should be encoded as e32"); 2705 } 2706 llvm_unreachable("Implement any new match types added!"); 2707 } 2708 2709 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2710 int64_t Tmp = -1; 2711 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2712 return true; 2713 } 2714 if (getParser().parseAbsoluteExpression(Tmp)) { 2715 return true; 2716 } 2717 Ret = static_cast<uint32_t>(Tmp); 2718 return false; 2719 } 2720 2721 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2722 uint32_t &Minor) { 2723 if (ParseAsAbsoluteExpression(Major)) 2724 return TokError("invalid major version"); 2725 2726 if (getLexer().isNot(AsmToken::Comma)) 2727 return TokError("minor version number required, comma expected"); 2728 Lex(); 2729 2730 if (ParseAsAbsoluteExpression(Minor)) 2731 return TokError("invalid minor version"); 2732 2733 return false; 2734 } 2735 2736 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2737 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2738 return TokError("directive only supported for amdgcn architecture"); 2739 2740 std::string Target; 2741 2742 SMLoc TargetStart = getTok().getLoc(); 2743 if (getParser().parseEscapedString(Target)) 2744 return true; 2745 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2746 2747 std::string ExpectedTarget; 2748 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2749 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2750 2751 if (Target != ExpectedTargetOS.str()) 2752 return getParser().Error(TargetRange.Start, "target must match options", 2753 TargetRange); 2754 2755 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2756 return false; 2757 } 2758 2759 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2760 return getParser().Error(Range.Start, "value out of range", Range); 2761 } 2762 2763 bool AMDGPUAsmParser::calculateGPRBlocks( 2764 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2765 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2766 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2767 unsigned &SGPRBlocks) { 2768 // TODO(scott.linder): These calculations are duplicated from 2769 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2770 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2771 2772 unsigned NumVGPRs = NextFreeVGPR; 2773 unsigned NumSGPRs = NextFreeSGPR; 2774 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2775 2776 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2777 NumSGPRs > MaxAddressableNumSGPRs) 2778 return OutOfRangeError(SGPRRange); 2779 2780 NumSGPRs += 2781 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2782 2783 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2784 NumSGPRs > MaxAddressableNumSGPRs) 2785 return OutOfRangeError(SGPRRange); 2786 2787 if (Features.test(FeatureSGPRInitBug)) 2788 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2789 2790 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2791 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2792 2793 return false; 2794 } 2795 2796 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2797 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2798 return TokError("directive only supported for amdgcn architecture"); 2799 2800 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2801 return TokError("directive only supported for amdhsa OS"); 2802 2803 StringRef KernelName; 2804 if (getParser().parseIdentifier(KernelName)) 2805 return true; 2806 2807 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2808 2809 StringSet<> Seen; 2810 2811 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2812 2813 SMRange VGPRRange; 2814 uint64_t NextFreeVGPR = 0; 2815 SMRange SGPRRange; 2816 uint64_t NextFreeSGPR = 0; 2817 unsigned UserSGPRCount = 0; 2818 bool ReserveVCC = true; 2819 bool ReserveFlatScr = true; 2820 bool ReserveXNACK = hasXNACK(); 2821 2822 while (true) { 2823 while (getLexer().is(AsmToken::EndOfStatement)) 2824 Lex(); 2825 2826 if (getLexer().isNot(AsmToken::Identifier)) 2827 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2828 2829 StringRef ID = getTok().getIdentifier(); 2830 SMRange IDRange = getTok().getLocRange(); 2831 Lex(); 2832 2833 if (ID == ".end_amdhsa_kernel") 2834 break; 2835 2836 if (Seen.find(ID) != Seen.end()) 2837 return TokError(".amdhsa_ directives cannot be repeated"); 2838 Seen.insert(ID); 2839 2840 SMLoc ValStart = getTok().getLoc(); 2841 int64_t IVal; 2842 if (getParser().parseAbsoluteExpression(IVal)) 2843 return true; 2844 SMLoc ValEnd = getTok().getLoc(); 2845 SMRange ValRange = SMRange(ValStart, ValEnd); 2846 2847 if (IVal < 0) 2848 return OutOfRangeError(ValRange); 2849 2850 uint64_t Val = IVal; 2851 2852 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2853 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2854 return OutOfRangeError(RANGE); \ 2855 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2856 2857 if (ID == ".amdhsa_group_segment_fixed_size") { 2858 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2859 return OutOfRangeError(ValRange); 2860 KD.group_segment_fixed_size = Val; 2861 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2862 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2863 return OutOfRangeError(ValRange); 2864 KD.private_segment_fixed_size = Val; 2865 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2866 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2867 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2868 Val, ValRange); 2869 UserSGPRCount++; 2870 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2871 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2872 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2873 ValRange); 2874 UserSGPRCount++; 2875 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2876 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2877 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2878 ValRange); 2879 UserSGPRCount++; 2880 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2881 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2882 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2883 Val, ValRange); 2884 UserSGPRCount++; 2885 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2886 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2887 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2888 ValRange); 2889 UserSGPRCount++; 2890 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2891 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2892 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2893 ValRange); 2894 UserSGPRCount++; 2895 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2896 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2897 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2898 Val, ValRange); 2899 UserSGPRCount++; 2900 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2901 PARSE_BITS_ENTRY( 2902 KD.compute_pgm_rsrc2, 2903 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2904 ValRange); 2905 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2906 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2907 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2908 ValRange); 2909 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2910 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2911 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2912 ValRange); 2913 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2914 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2915 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2916 ValRange); 2917 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2918 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2919 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2920 ValRange); 2921 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2922 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2923 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2924 ValRange); 2925 } else if (ID == ".amdhsa_next_free_vgpr") { 2926 VGPRRange = ValRange; 2927 NextFreeVGPR = Val; 2928 } else if (ID == ".amdhsa_next_free_sgpr") { 2929 SGPRRange = ValRange; 2930 NextFreeSGPR = Val; 2931 } else if (ID == ".amdhsa_reserve_vcc") { 2932 if (!isUInt<1>(Val)) 2933 return OutOfRangeError(ValRange); 2934 ReserveVCC = Val; 2935 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2936 if (IVersion.Major < 7) 2937 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2938 IDRange); 2939 if (!isUInt<1>(Val)) 2940 return OutOfRangeError(ValRange); 2941 ReserveFlatScr = Val; 2942 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2943 if (IVersion.Major < 8) 2944 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2945 IDRange); 2946 if (!isUInt<1>(Val)) 2947 return OutOfRangeError(ValRange); 2948 ReserveXNACK = Val; 2949 } else if (ID == ".amdhsa_float_round_mode_32") { 2950 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2951 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2952 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2953 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2954 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2955 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2957 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2958 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2959 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2960 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2961 ValRange); 2962 } else if (ID == ".amdhsa_dx10_clamp") { 2963 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2964 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2965 } else if (ID == ".amdhsa_ieee_mode") { 2966 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2967 Val, ValRange); 2968 } else if (ID == ".amdhsa_fp16_overflow") { 2969 if (IVersion.Major < 9) 2970 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2971 IDRange); 2972 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2973 ValRange); 2974 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2975 PARSE_BITS_ENTRY( 2976 KD.compute_pgm_rsrc2, 2977 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2978 ValRange); 2979 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2980 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2981 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2982 Val, ValRange); 2983 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2984 PARSE_BITS_ENTRY( 2985 KD.compute_pgm_rsrc2, 2986 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2987 ValRange); 2988 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2989 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2990 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2991 Val, ValRange); 2992 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2993 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2994 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2995 Val, ValRange); 2996 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2998 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2999 Val, ValRange); 3000 } else if (ID == ".amdhsa_exception_int_div_zero") { 3001 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3002 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3003 Val, ValRange); 3004 } else { 3005 return getParser().Error(IDRange.Start, 3006 "unknown .amdhsa_kernel directive", IDRange); 3007 } 3008 3009 #undef PARSE_BITS_ENTRY 3010 } 3011 3012 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3013 return TokError(".amdhsa_next_free_vgpr directive is required"); 3014 3015 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3016 return TokError(".amdhsa_next_free_sgpr directive is required"); 3017 3018 unsigned VGPRBlocks; 3019 unsigned SGPRBlocks; 3020 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3021 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3022 SGPRRange, VGPRBlocks, SGPRBlocks)) 3023 return true; 3024 3025 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3026 VGPRBlocks)) 3027 return OutOfRangeError(VGPRRange); 3028 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3029 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3030 3031 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3032 SGPRBlocks)) 3033 return OutOfRangeError(SGPRRange); 3034 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3035 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3036 SGPRBlocks); 3037 3038 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3039 return TokError("too many user SGPRs enabled"); 3040 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3041 UserSGPRCount); 3042 3043 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3044 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3045 ReserveFlatScr, ReserveXNACK); 3046 return false; 3047 } 3048 3049 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3050 uint32_t Major; 3051 uint32_t Minor; 3052 3053 if (ParseDirectiveMajorMinor(Major, Minor)) 3054 return true; 3055 3056 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3057 return false; 3058 } 3059 3060 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3061 uint32_t Major; 3062 uint32_t Minor; 3063 uint32_t Stepping; 3064 StringRef VendorName; 3065 StringRef ArchName; 3066 3067 // If this directive has no arguments, then use the ISA version for the 3068 // targeted GPU. 3069 if (getLexer().is(AsmToken::EndOfStatement)) { 3070 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3071 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3072 ISA.Stepping, 3073 "AMD", "AMDGPU"); 3074 return false; 3075 } 3076 3077 if (ParseDirectiveMajorMinor(Major, Minor)) 3078 return true; 3079 3080 if (getLexer().isNot(AsmToken::Comma)) 3081 return TokError("stepping version number required, comma expected"); 3082 Lex(); 3083 3084 if (ParseAsAbsoluteExpression(Stepping)) 3085 return TokError("invalid stepping version"); 3086 3087 if (getLexer().isNot(AsmToken::Comma)) 3088 return TokError("vendor name required, comma expected"); 3089 Lex(); 3090 3091 if (getLexer().isNot(AsmToken::String)) 3092 return TokError("invalid vendor name"); 3093 3094 VendorName = getLexer().getTok().getStringContents(); 3095 Lex(); 3096 3097 if (getLexer().isNot(AsmToken::Comma)) 3098 return TokError("arch name required, comma expected"); 3099 Lex(); 3100 3101 if (getLexer().isNot(AsmToken::String)) 3102 return TokError("invalid arch name"); 3103 3104 ArchName = getLexer().getTok().getStringContents(); 3105 Lex(); 3106 3107 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3108 VendorName, ArchName); 3109 return false; 3110 } 3111 3112 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3113 amd_kernel_code_t &Header) { 3114 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3115 // assembly for backwards compatibility. 3116 if (ID == "max_scratch_backing_memory_byte_size") { 3117 Parser.eatToEndOfStatement(); 3118 return false; 3119 } 3120 3121 SmallString<40> ErrStr; 3122 raw_svector_ostream Err(ErrStr); 3123 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3124 return TokError(Err.str()); 3125 } 3126 Lex(); 3127 return false; 3128 } 3129 3130 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3131 amd_kernel_code_t Header; 3132 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3133 3134 while (true) { 3135 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3136 // will set the current token to EndOfStatement. 3137 while(getLexer().is(AsmToken::EndOfStatement)) 3138 Lex(); 3139 3140 if (getLexer().isNot(AsmToken::Identifier)) 3141 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3142 3143 StringRef ID = getLexer().getTok().getIdentifier(); 3144 Lex(); 3145 3146 if (ID == ".end_amd_kernel_code_t") 3147 break; 3148 3149 if (ParseAMDKernelCodeTValue(ID, Header)) 3150 return true; 3151 } 3152 3153 getTargetStreamer().EmitAMDKernelCodeT(Header); 3154 3155 return false; 3156 } 3157 3158 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3159 if (getLexer().isNot(AsmToken::Identifier)) 3160 return TokError("expected symbol name"); 3161 3162 StringRef KernelName = Parser.getTok().getString(); 3163 3164 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3165 ELF::STT_AMDGPU_HSA_KERNEL); 3166 Lex(); 3167 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3168 KernelScope.initialize(getContext()); 3169 return false; 3170 } 3171 3172 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3173 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3174 return Error(getParser().getTok().getLoc(), 3175 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3176 "architectures"); 3177 } 3178 3179 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3180 3181 std::string ISAVersionStringFromSTI; 3182 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3183 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3184 3185 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3186 return Error(getParser().getTok().getLoc(), 3187 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3188 "arguments specified through the command line"); 3189 } 3190 3191 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3192 Lex(); 3193 3194 return false; 3195 } 3196 3197 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3198 const char *AssemblerDirectiveBegin; 3199 const char *AssemblerDirectiveEnd; 3200 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3201 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3202 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3203 HSAMD::V3::AssemblerDirectiveEnd) 3204 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3205 HSAMD::AssemblerDirectiveEnd); 3206 3207 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3208 return Error(getParser().getTok().getLoc(), 3209 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3210 "not available on non-amdhsa OSes")).str()); 3211 } 3212 3213 std::string HSAMetadataString; 3214 raw_string_ostream YamlStream(HSAMetadataString); 3215 3216 getLexer().setSkipSpace(false); 3217 3218 bool FoundEnd = false; 3219 while (!getLexer().is(AsmToken::Eof)) { 3220 while (getLexer().is(AsmToken::Space)) { 3221 YamlStream << getLexer().getTok().getString(); 3222 Lex(); 3223 } 3224 3225 if (getLexer().is(AsmToken::Identifier)) { 3226 StringRef ID = getLexer().getTok().getIdentifier(); 3227 if (ID == AssemblerDirectiveEnd) { 3228 Lex(); 3229 FoundEnd = true; 3230 break; 3231 } 3232 } 3233 3234 YamlStream << Parser.parseStringToEndOfStatement() 3235 << getContext().getAsmInfo()->getSeparatorString(); 3236 3237 Parser.eatToEndOfStatement(); 3238 } 3239 3240 getLexer().setSkipSpace(true); 3241 3242 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3243 return TokError(Twine("expected directive ") + 3244 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3245 } 3246 3247 YamlStream.flush(); 3248 3249 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3250 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3251 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3252 } else { 3253 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3254 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3255 } 3256 3257 return false; 3258 } 3259 3260 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3261 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3262 return Error(getParser().getTok().getLoc(), 3263 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3264 "not available on non-amdpal OSes")).str()); 3265 } 3266 3267 PALMD::Metadata PALMetadata; 3268 for (;;) { 3269 uint32_t Value; 3270 if (ParseAsAbsoluteExpression(Value)) { 3271 return TokError(Twine("invalid value in ") + 3272 Twine(PALMD::AssemblerDirective)); 3273 } 3274 PALMetadata.push_back(Value); 3275 if (getLexer().isNot(AsmToken::Comma)) 3276 break; 3277 Lex(); 3278 } 3279 getTargetStreamer().EmitPALMetadata(PALMetadata); 3280 return false; 3281 } 3282 3283 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3284 StringRef IDVal = DirectiveID.getString(); 3285 3286 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3287 if (IDVal == ".amdgcn_target") 3288 return ParseDirectiveAMDGCNTarget(); 3289 3290 if (IDVal == ".amdhsa_kernel") 3291 return ParseDirectiveAMDHSAKernel(); 3292 3293 // TODO: Restructure/combine with PAL metadata directive. 3294 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3295 return ParseDirectiveHSAMetadata(); 3296 } else { 3297 if (IDVal == ".hsa_code_object_version") 3298 return ParseDirectiveHSACodeObjectVersion(); 3299 3300 if (IDVal == ".hsa_code_object_isa") 3301 return ParseDirectiveHSACodeObjectISA(); 3302 3303 if (IDVal == ".amd_kernel_code_t") 3304 return ParseDirectiveAMDKernelCodeT(); 3305 3306 if (IDVal == ".amdgpu_hsa_kernel") 3307 return ParseDirectiveAMDGPUHsaKernel(); 3308 3309 if (IDVal == ".amd_amdgpu_isa") 3310 return ParseDirectiveISAVersion(); 3311 3312 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3313 return ParseDirectiveHSAMetadata(); 3314 } 3315 3316 if (IDVal == PALMD::AssemblerDirective) 3317 return ParseDirectivePALMetadata(); 3318 3319 return true; 3320 } 3321 3322 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3323 unsigned RegNo) const { 3324 3325 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3326 R.isValid(); ++R) { 3327 if (*R == RegNo) 3328 return isGFX9(); 3329 } 3330 3331 switch (RegNo) { 3332 case AMDGPU::TBA: 3333 case AMDGPU::TBA_LO: 3334 case AMDGPU::TBA_HI: 3335 case AMDGPU::TMA: 3336 case AMDGPU::TMA_LO: 3337 case AMDGPU::TMA_HI: 3338 return !isGFX9(); 3339 case AMDGPU::XNACK_MASK: 3340 case AMDGPU::XNACK_MASK_LO: 3341 case AMDGPU::XNACK_MASK_HI: 3342 return !isCI() && !isSI() && hasXNACK(); 3343 default: 3344 break; 3345 } 3346 3347 if (isCI()) 3348 return true; 3349 3350 if (isSI()) { 3351 // No flat_scr 3352 switch (RegNo) { 3353 case AMDGPU::FLAT_SCR: 3354 case AMDGPU::FLAT_SCR_LO: 3355 case AMDGPU::FLAT_SCR_HI: 3356 return false; 3357 default: 3358 return true; 3359 } 3360 } 3361 3362 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3363 // SI/CI have. 3364 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3365 R.isValid(); ++R) { 3366 if (*R == RegNo) 3367 return false; 3368 } 3369 3370 return true; 3371 } 3372 3373 OperandMatchResultTy 3374 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3375 // Try to parse with a custom parser 3376 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3377 3378 // If we successfully parsed the operand or if there as an error parsing, 3379 // we are done. 3380 // 3381 // If we are parsing after we reach EndOfStatement then this means we 3382 // are appending default values to the Operands list. This is only done 3383 // by custom parser, so we shouldn't continue on to the generic parsing. 3384 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3385 getLexer().is(AsmToken::EndOfStatement)) 3386 return ResTy; 3387 3388 ResTy = parseRegOrImm(Operands); 3389 3390 if (ResTy == MatchOperand_Success) 3391 return ResTy; 3392 3393 const auto &Tok = Parser.getTok(); 3394 SMLoc S = Tok.getLoc(); 3395 3396 const MCExpr *Expr = nullptr; 3397 if (!Parser.parseExpression(Expr)) { 3398 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3399 return MatchOperand_Success; 3400 } 3401 3402 // Possibly this is an instruction flag like 'gds'. 3403 if (Tok.getKind() == AsmToken::Identifier) { 3404 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3405 Parser.Lex(); 3406 return MatchOperand_Success; 3407 } 3408 3409 return MatchOperand_NoMatch; 3410 } 3411 3412 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3413 // Clear any forced encodings from the previous instruction. 3414 setForcedEncodingSize(0); 3415 setForcedDPP(false); 3416 setForcedSDWA(false); 3417 3418 if (Name.endswith("_e64")) { 3419 setForcedEncodingSize(64); 3420 return Name.substr(0, Name.size() - 4); 3421 } else if (Name.endswith("_e32")) { 3422 setForcedEncodingSize(32); 3423 return Name.substr(0, Name.size() - 4); 3424 } else if (Name.endswith("_dpp")) { 3425 setForcedDPP(true); 3426 return Name.substr(0, Name.size() - 4); 3427 } else if (Name.endswith("_sdwa")) { 3428 setForcedSDWA(true); 3429 return Name.substr(0, Name.size() - 5); 3430 } 3431 return Name; 3432 } 3433 3434 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3435 StringRef Name, 3436 SMLoc NameLoc, OperandVector &Operands) { 3437 // Add the instruction mnemonic 3438 Name = parseMnemonicSuffix(Name); 3439 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3440 3441 while (!getLexer().is(AsmToken::EndOfStatement)) { 3442 OperandMatchResultTy Res = parseOperand(Operands, Name); 3443 3444 // Eat the comma or space if there is one. 3445 if (getLexer().is(AsmToken::Comma)) 3446 Parser.Lex(); 3447 3448 switch (Res) { 3449 case MatchOperand_Success: break; 3450 case MatchOperand_ParseFail: 3451 Error(getLexer().getLoc(), "failed parsing operand."); 3452 while (!getLexer().is(AsmToken::EndOfStatement)) { 3453 Parser.Lex(); 3454 } 3455 return true; 3456 case MatchOperand_NoMatch: 3457 Error(getLexer().getLoc(), "not a valid operand."); 3458 while (!getLexer().is(AsmToken::EndOfStatement)) { 3459 Parser.Lex(); 3460 } 3461 return true; 3462 } 3463 } 3464 3465 return false; 3466 } 3467 3468 //===----------------------------------------------------------------------===// 3469 // Utility functions 3470 //===----------------------------------------------------------------------===// 3471 3472 OperandMatchResultTy 3473 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3474 switch(getLexer().getKind()) { 3475 default: return MatchOperand_NoMatch; 3476 case AsmToken::Identifier: { 3477 StringRef Name = Parser.getTok().getString(); 3478 if (!Name.equals(Prefix)) { 3479 return MatchOperand_NoMatch; 3480 } 3481 3482 Parser.Lex(); 3483 if (getLexer().isNot(AsmToken::Colon)) 3484 return MatchOperand_ParseFail; 3485 3486 Parser.Lex(); 3487 3488 bool IsMinus = false; 3489 if (getLexer().getKind() == AsmToken::Minus) { 3490 Parser.Lex(); 3491 IsMinus = true; 3492 } 3493 3494 if (getLexer().isNot(AsmToken::Integer)) 3495 return MatchOperand_ParseFail; 3496 3497 if (getParser().parseAbsoluteExpression(Int)) 3498 return MatchOperand_ParseFail; 3499 3500 if (IsMinus) 3501 Int = -Int; 3502 break; 3503 } 3504 } 3505 return MatchOperand_Success; 3506 } 3507 3508 OperandMatchResultTy 3509 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3510 AMDGPUOperand::ImmTy ImmTy, 3511 bool (*ConvertResult)(int64_t&)) { 3512 SMLoc S = Parser.getTok().getLoc(); 3513 int64_t Value = 0; 3514 3515 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3516 if (Res != MatchOperand_Success) 3517 return Res; 3518 3519 if (ConvertResult && !ConvertResult(Value)) { 3520 return MatchOperand_ParseFail; 3521 } 3522 3523 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3524 return MatchOperand_Success; 3525 } 3526 3527 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3528 const char *Prefix, 3529 OperandVector &Operands, 3530 AMDGPUOperand::ImmTy ImmTy, 3531 bool (*ConvertResult)(int64_t&)) { 3532 StringRef Name = Parser.getTok().getString(); 3533 if (!Name.equals(Prefix)) 3534 return MatchOperand_NoMatch; 3535 3536 Parser.Lex(); 3537 if (getLexer().isNot(AsmToken::Colon)) 3538 return MatchOperand_ParseFail; 3539 3540 Parser.Lex(); 3541 if (getLexer().isNot(AsmToken::LBrac)) 3542 return MatchOperand_ParseFail; 3543 Parser.Lex(); 3544 3545 unsigned Val = 0; 3546 SMLoc S = Parser.getTok().getLoc(); 3547 3548 // FIXME: How to verify the number of elements matches the number of src 3549 // operands? 3550 for (int I = 0; I < 4; ++I) { 3551 if (I != 0) { 3552 if (getLexer().is(AsmToken::RBrac)) 3553 break; 3554 3555 if (getLexer().isNot(AsmToken::Comma)) 3556 return MatchOperand_ParseFail; 3557 Parser.Lex(); 3558 } 3559 3560 if (getLexer().isNot(AsmToken::Integer)) 3561 return MatchOperand_ParseFail; 3562 3563 int64_t Op; 3564 if (getParser().parseAbsoluteExpression(Op)) 3565 return MatchOperand_ParseFail; 3566 3567 if (Op != 0 && Op != 1) 3568 return MatchOperand_ParseFail; 3569 Val |= (Op << I); 3570 } 3571 3572 Parser.Lex(); 3573 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3574 return MatchOperand_Success; 3575 } 3576 3577 OperandMatchResultTy 3578 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3579 AMDGPUOperand::ImmTy ImmTy) { 3580 int64_t Bit = 0; 3581 SMLoc S = Parser.getTok().getLoc(); 3582 3583 // We are at the end of the statement, and this is a default argument, so 3584 // use a default value. 3585 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3586 switch(getLexer().getKind()) { 3587 case AsmToken::Identifier: { 3588 StringRef Tok = Parser.getTok().getString(); 3589 if (Tok == Name) { 3590 if (Tok == "r128" && isGFX9()) 3591 Error(S, "r128 modifier is not supported on this GPU"); 3592 if (Tok == "a16" && !isGFX9()) 3593 Error(S, "a16 modifier is not supported on this GPU"); 3594 Bit = 1; 3595 Parser.Lex(); 3596 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3597 Bit = 0; 3598 Parser.Lex(); 3599 } else { 3600 return MatchOperand_NoMatch; 3601 } 3602 break; 3603 } 3604 default: 3605 return MatchOperand_NoMatch; 3606 } 3607 } 3608 3609 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3610 return MatchOperand_Success; 3611 } 3612 3613 static void addOptionalImmOperand( 3614 MCInst& Inst, const OperandVector& Operands, 3615 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3616 AMDGPUOperand::ImmTy ImmT, 3617 int64_t Default = 0) { 3618 auto i = OptionalIdx.find(ImmT); 3619 if (i != OptionalIdx.end()) { 3620 unsigned Idx = i->second; 3621 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3622 } else { 3623 Inst.addOperand(MCOperand::createImm(Default)); 3624 } 3625 } 3626 3627 OperandMatchResultTy 3628 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3629 if (getLexer().isNot(AsmToken::Identifier)) { 3630 return MatchOperand_NoMatch; 3631 } 3632 StringRef Tok = Parser.getTok().getString(); 3633 if (Tok != Prefix) { 3634 return MatchOperand_NoMatch; 3635 } 3636 3637 Parser.Lex(); 3638 if (getLexer().isNot(AsmToken::Colon)) { 3639 return MatchOperand_ParseFail; 3640 } 3641 3642 Parser.Lex(); 3643 if (getLexer().isNot(AsmToken::Identifier)) { 3644 return MatchOperand_ParseFail; 3645 } 3646 3647 Value = Parser.getTok().getString(); 3648 return MatchOperand_Success; 3649 } 3650 3651 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3652 // values to live in a joint format operand in the MCInst encoding. 3653 OperandMatchResultTy 3654 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3655 SMLoc S = Parser.getTok().getLoc(); 3656 int64_t Dfmt = 0, Nfmt = 0; 3657 // dfmt and nfmt can appear in either order, and each is optional. 3658 bool GotDfmt = false, GotNfmt = false; 3659 while (!GotDfmt || !GotNfmt) { 3660 if (!GotDfmt) { 3661 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3662 if (Res != MatchOperand_NoMatch) { 3663 if (Res != MatchOperand_Success) 3664 return Res; 3665 if (Dfmt >= 16) { 3666 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3667 return MatchOperand_ParseFail; 3668 } 3669 GotDfmt = true; 3670 Parser.Lex(); 3671 continue; 3672 } 3673 } 3674 if (!GotNfmt) { 3675 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3676 if (Res != MatchOperand_NoMatch) { 3677 if (Res != MatchOperand_Success) 3678 return Res; 3679 if (Nfmt >= 8) { 3680 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3681 return MatchOperand_ParseFail; 3682 } 3683 GotNfmt = true; 3684 Parser.Lex(); 3685 continue; 3686 } 3687 } 3688 break; 3689 } 3690 if (!GotDfmt && !GotNfmt) 3691 return MatchOperand_NoMatch; 3692 auto Format = Dfmt | Nfmt << 4; 3693 Operands.push_back( 3694 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3695 return MatchOperand_Success; 3696 } 3697 3698 //===----------------------------------------------------------------------===// 3699 // ds 3700 //===----------------------------------------------------------------------===// 3701 3702 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3703 const OperandVector &Operands) { 3704 OptionalImmIndexMap OptionalIdx; 3705 3706 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3707 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3708 3709 // Add the register arguments 3710 if (Op.isReg()) { 3711 Op.addRegOperands(Inst, 1); 3712 continue; 3713 } 3714 3715 // Handle optional arguments 3716 OptionalIdx[Op.getImmTy()] = i; 3717 } 3718 3719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3722 3723 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3724 } 3725 3726 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3727 bool IsGdsHardcoded) { 3728 OptionalImmIndexMap OptionalIdx; 3729 3730 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3731 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3732 3733 // Add the register arguments 3734 if (Op.isReg()) { 3735 Op.addRegOperands(Inst, 1); 3736 continue; 3737 } 3738 3739 if (Op.isToken() && Op.getToken() == "gds") { 3740 IsGdsHardcoded = true; 3741 continue; 3742 } 3743 3744 // Handle optional arguments 3745 OptionalIdx[Op.getImmTy()] = i; 3746 } 3747 3748 AMDGPUOperand::ImmTy OffsetType = 3749 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3750 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3751 AMDGPUOperand::ImmTyOffset; 3752 3753 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3754 3755 if (!IsGdsHardcoded) { 3756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3757 } 3758 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3759 } 3760 3761 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3762 OptionalImmIndexMap OptionalIdx; 3763 3764 unsigned OperandIdx[4]; 3765 unsigned EnMask = 0; 3766 int SrcIdx = 0; 3767 3768 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3769 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3770 3771 // Add the register arguments 3772 if (Op.isReg()) { 3773 assert(SrcIdx < 4); 3774 OperandIdx[SrcIdx] = Inst.size(); 3775 Op.addRegOperands(Inst, 1); 3776 ++SrcIdx; 3777 continue; 3778 } 3779 3780 if (Op.isOff()) { 3781 assert(SrcIdx < 4); 3782 OperandIdx[SrcIdx] = Inst.size(); 3783 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3784 ++SrcIdx; 3785 continue; 3786 } 3787 3788 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3789 Op.addImmOperands(Inst, 1); 3790 continue; 3791 } 3792 3793 if (Op.isToken() && Op.getToken() == "done") 3794 continue; 3795 3796 // Handle optional arguments 3797 OptionalIdx[Op.getImmTy()] = i; 3798 } 3799 3800 assert(SrcIdx == 4); 3801 3802 bool Compr = false; 3803 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3804 Compr = true; 3805 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3806 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3807 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3808 } 3809 3810 for (auto i = 0; i < SrcIdx; ++i) { 3811 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3812 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3813 } 3814 } 3815 3816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3818 3819 Inst.addOperand(MCOperand::createImm(EnMask)); 3820 } 3821 3822 //===----------------------------------------------------------------------===// 3823 // s_waitcnt 3824 //===----------------------------------------------------------------------===// 3825 3826 static bool 3827 encodeCnt( 3828 const AMDGPU::IsaVersion ISA, 3829 int64_t &IntVal, 3830 int64_t CntVal, 3831 bool Saturate, 3832 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3833 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3834 { 3835 bool Failed = false; 3836 3837 IntVal = encode(ISA, IntVal, CntVal); 3838 if (CntVal != decode(ISA, IntVal)) { 3839 if (Saturate) { 3840 IntVal = encode(ISA, IntVal, -1); 3841 } else { 3842 Failed = true; 3843 } 3844 } 3845 return Failed; 3846 } 3847 3848 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3849 StringRef CntName = Parser.getTok().getString(); 3850 int64_t CntVal; 3851 3852 Parser.Lex(); 3853 if (getLexer().isNot(AsmToken::LParen)) 3854 return true; 3855 3856 Parser.Lex(); 3857 if (getLexer().isNot(AsmToken::Integer)) 3858 return true; 3859 3860 SMLoc ValLoc = Parser.getTok().getLoc(); 3861 if (getParser().parseAbsoluteExpression(CntVal)) 3862 return true; 3863 3864 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3865 3866 bool Failed = true; 3867 bool Sat = CntName.endswith("_sat"); 3868 3869 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3870 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3871 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3872 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3873 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3874 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3875 } 3876 3877 if (Failed) { 3878 Error(ValLoc, "too large value for " + CntName); 3879 return true; 3880 } 3881 3882 if (getLexer().isNot(AsmToken::RParen)) { 3883 return true; 3884 } 3885 3886 Parser.Lex(); 3887 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3888 const AsmToken NextToken = getLexer().peekTok(); 3889 if (NextToken.is(AsmToken::Identifier)) { 3890 Parser.Lex(); 3891 } 3892 } 3893 3894 return false; 3895 } 3896 3897 OperandMatchResultTy 3898 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3899 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3900 int64_t Waitcnt = getWaitcntBitMask(ISA); 3901 SMLoc S = Parser.getTok().getLoc(); 3902 3903 switch(getLexer().getKind()) { 3904 default: return MatchOperand_ParseFail; 3905 case AsmToken::Integer: 3906 // The operand can be an integer value. 3907 if (getParser().parseAbsoluteExpression(Waitcnt)) 3908 return MatchOperand_ParseFail; 3909 break; 3910 3911 case AsmToken::Identifier: 3912 do { 3913 if (parseCnt(Waitcnt)) 3914 return MatchOperand_ParseFail; 3915 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3916 break; 3917 } 3918 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3919 return MatchOperand_Success; 3920 } 3921 3922 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3923 int64_t &Width) { 3924 using namespace llvm::AMDGPU::Hwreg; 3925 3926 if (Parser.getTok().getString() != "hwreg") 3927 return true; 3928 Parser.Lex(); 3929 3930 if (getLexer().isNot(AsmToken::LParen)) 3931 return true; 3932 Parser.Lex(); 3933 3934 if (getLexer().is(AsmToken::Identifier)) { 3935 HwReg.IsSymbolic = true; 3936 HwReg.Id = ID_UNKNOWN_; 3937 const StringRef tok = Parser.getTok().getString(); 3938 int Last = ID_SYMBOLIC_LAST_; 3939 if (isSI() || isCI() || isVI()) 3940 Last = ID_SYMBOLIC_FIRST_GFX9_; 3941 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3942 if (tok == IdSymbolic[i]) { 3943 HwReg.Id = i; 3944 break; 3945 } 3946 } 3947 Parser.Lex(); 3948 } else { 3949 HwReg.IsSymbolic = false; 3950 if (getLexer().isNot(AsmToken::Integer)) 3951 return true; 3952 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3953 return true; 3954 } 3955 3956 if (getLexer().is(AsmToken::RParen)) { 3957 Parser.Lex(); 3958 return false; 3959 } 3960 3961 // optional params 3962 if (getLexer().isNot(AsmToken::Comma)) 3963 return true; 3964 Parser.Lex(); 3965 3966 if (getLexer().isNot(AsmToken::Integer)) 3967 return true; 3968 if (getParser().parseAbsoluteExpression(Offset)) 3969 return true; 3970 3971 if (getLexer().isNot(AsmToken::Comma)) 3972 return true; 3973 Parser.Lex(); 3974 3975 if (getLexer().isNot(AsmToken::Integer)) 3976 return true; 3977 if (getParser().parseAbsoluteExpression(Width)) 3978 return true; 3979 3980 if (getLexer().isNot(AsmToken::RParen)) 3981 return true; 3982 Parser.Lex(); 3983 3984 return false; 3985 } 3986 3987 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3988 using namespace llvm::AMDGPU::Hwreg; 3989 3990 int64_t Imm16Val = 0; 3991 SMLoc S = Parser.getTok().getLoc(); 3992 3993 switch(getLexer().getKind()) { 3994 default: return MatchOperand_NoMatch; 3995 case AsmToken::Integer: 3996 // The operand can be an integer value. 3997 if (getParser().parseAbsoluteExpression(Imm16Val)) 3998 return MatchOperand_NoMatch; 3999 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4000 Error(S, "invalid immediate: only 16-bit values are legal"); 4001 // Do not return error code, but create an imm operand anyway and proceed 4002 // to the next operand, if any. That avoids unneccessary error messages. 4003 } 4004 break; 4005 4006 case AsmToken::Identifier: { 4007 OperandInfoTy HwReg(ID_UNKNOWN_); 4008 int64_t Offset = OFFSET_DEFAULT_; 4009 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4010 if (parseHwregConstruct(HwReg, Offset, Width)) 4011 return MatchOperand_ParseFail; 4012 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4013 if (HwReg.IsSymbolic) 4014 Error(S, "invalid symbolic name of hardware register"); 4015 else 4016 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4017 } 4018 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4019 Error(S, "invalid bit offset: only 5-bit values are legal"); 4020 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4021 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4022 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4023 } 4024 break; 4025 } 4026 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4027 return MatchOperand_Success; 4028 } 4029 4030 bool AMDGPUOperand::isSWaitCnt() const { 4031 return isImm(); 4032 } 4033 4034 bool AMDGPUOperand::isHwreg() const { 4035 return isImmTy(ImmTyHwreg); 4036 } 4037 4038 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4039 using namespace llvm::AMDGPU::SendMsg; 4040 4041 if (Parser.getTok().getString() != "sendmsg") 4042 return true; 4043 Parser.Lex(); 4044 4045 if (getLexer().isNot(AsmToken::LParen)) 4046 return true; 4047 Parser.Lex(); 4048 4049 if (getLexer().is(AsmToken::Identifier)) { 4050 Msg.IsSymbolic = true; 4051 Msg.Id = ID_UNKNOWN_; 4052 const std::string tok = Parser.getTok().getString(); 4053 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4054 switch(i) { 4055 default: continue; // Omit gaps. 4056 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4057 } 4058 if (tok == IdSymbolic[i]) { 4059 Msg.Id = i; 4060 break; 4061 } 4062 } 4063 Parser.Lex(); 4064 } else { 4065 Msg.IsSymbolic = false; 4066 if (getLexer().isNot(AsmToken::Integer)) 4067 return true; 4068 if (getParser().parseAbsoluteExpression(Msg.Id)) 4069 return true; 4070 if (getLexer().is(AsmToken::Integer)) 4071 if (getParser().parseAbsoluteExpression(Msg.Id)) 4072 Msg.Id = ID_UNKNOWN_; 4073 } 4074 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4075 return false; 4076 4077 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4078 if (getLexer().isNot(AsmToken::RParen)) 4079 return true; 4080 Parser.Lex(); 4081 return false; 4082 } 4083 4084 if (getLexer().isNot(AsmToken::Comma)) 4085 return true; 4086 Parser.Lex(); 4087 4088 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4089 Operation.Id = ID_UNKNOWN_; 4090 if (getLexer().is(AsmToken::Identifier)) { 4091 Operation.IsSymbolic = true; 4092 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4093 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4094 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4095 const StringRef Tok = Parser.getTok().getString(); 4096 for (int i = F; i < L; ++i) { 4097 if (Tok == S[i]) { 4098 Operation.Id = i; 4099 break; 4100 } 4101 } 4102 Parser.Lex(); 4103 } else { 4104 Operation.IsSymbolic = false; 4105 if (getLexer().isNot(AsmToken::Integer)) 4106 return true; 4107 if (getParser().parseAbsoluteExpression(Operation.Id)) 4108 return true; 4109 } 4110 4111 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4112 // Stream id is optional. 4113 if (getLexer().is(AsmToken::RParen)) { 4114 Parser.Lex(); 4115 return false; 4116 } 4117 4118 if (getLexer().isNot(AsmToken::Comma)) 4119 return true; 4120 Parser.Lex(); 4121 4122 if (getLexer().isNot(AsmToken::Integer)) 4123 return true; 4124 if (getParser().parseAbsoluteExpression(StreamId)) 4125 return true; 4126 } 4127 4128 if (getLexer().isNot(AsmToken::RParen)) 4129 return true; 4130 Parser.Lex(); 4131 return false; 4132 } 4133 4134 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4135 if (getLexer().getKind() != AsmToken::Identifier) 4136 return MatchOperand_NoMatch; 4137 4138 StringRef Str = Parser.getTok().getString(); 4139 int Slot = StringSwitch<int>(Str) 4140 .Case("p10", 0) 4141 .Case("p20", 1) 4142 .Case("p0", 2) 4143 .Default(-1); 4144 4145 SMLoc S = Parser.getTok().getLoc(); 4146 if (Slot == -1) 4147 return MatchOperand_ParseFail; 4148 4149 Parser.Lex(); 4150 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4151 AMDGPUOperand::ImmTyInterpSlot)); 4152 return MatchOperand_Success; 4153 } 4154 4155 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4156 if (getLexer().getKind() != AsmToken::Identifier) 4157 return MatchOperand_NoMatch; 4158 4159 StringRef Str = Parser.getTok().getString(); 4160 if (!Str.startswith("attr")) 4161 return MatchOperand_NoMatch; 4162 4163 StringRef Chan = Str.take_back(2); 4164 int AttrChan = StringSwitch<int>(Chan) 4165 .Case(".x", 0) 4166 .Case(".y", 1) 4167 .Case(".z", 2) 4168 .Case(".w", 3) 4169 .Default(-1); 4170 if (AttrChan == -1) 4171 return MatchOperand_ParseFail; 4172 4173 Str = Str.drop_back(2).drop_front(4); 4174 4175 uint8_t Attr; 4176 if (Str.getAsInteger(10, Attr)) 4177 return MatchOperand_ParseFail; 4178 4179 SMLoc S = Parser.getTok().getLoc(); 4180 Parser.Lex(); 4181 if (Attr > 63) { 4182 Error(S, "out of bounds attr"); 4183 return MatchOperand_Success; 4184 } 4185 4186 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4187 4188 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4189 AMDGPUOperand::ImmTyInterpAttr)); 4190 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4191 AMDGPUOperand::ImmTyAttrChan)); 4192 return MatchOperand_Success; 4193 } 4194 4195 void AMDGPUAsmParser::errorExpTgt() { 4196 Error(Parser.getTok().getLoc(), "invalid exp target"); 4197 } 4198 4199 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4200 uint8_t &Val) { 4201 if (Str == "null") { 4202 Val = 9; 4203 return MatchOperand_Success; 4204 } 4205 4206 if (Str.startswith("mrt")) { 4207 Str = Str.drop_front(3); 4208 if (Str == "z") { // == mrtz 4209 Val = 8; 4210 return MatchOperand_Success; 4211 } 4212 4213 if (Str.getAsInteger(10, Val)) 4214 return MatchOperand_ParseFail; 4215 4216 if (Val > 7) 4217 errorExpTgt(); 4218 4219 return MatchOperand_Success; 4220 } 4221 4222 if (Str.startswith("pos")) { 4223 Str = Str.drop_front(3); 4224 if (Str.getAsInteger(10, Val)) 4225 return MatchOperand_ParseFail; 4226 4227 if (Val > 3) 4228 errorExpTgt(); 4229 4230 Val += 12; 4231 return MatchOperand_Success; 4232 } 4233 4234 if (Str.startswith("param")) { 4235 Str = Str.drop_front(5); 4236 if (Str.getAsInteger(10, Val)) 4237 return MatchOperand_ParseFail; 4238 4239 if (Val >= 32) 4240 errorExpTgt(); 4241 4242 Val += 32; 4243 return MatchOperand_Success; 4244 } 4245 4246 if (Str.startswith("invalid_target_")) { 4247 Str = Str.drop_front(15); 4248 if (Str.getAsInteger(10, Val)) 4249 return MatchOperand_ParseFail; 4250 4251 errorExpTgt(); 4252 return MatchOperand_Success; 4253 } 4254 4255 return MatchOperand_NoMatch; 4256 } 4257 4258 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4259 uint8_t Val; 4260 StringRef Str = Parser.getTok().getString(); 4261 4262 auto Res = parseExpTgtImpl(Str, Val); 4263 if (Res != MatchOperand_Success) 4264 return Res; 4265 4266 SMLoc S = Parser.getTok().getLoc(); 4267 Parser.Lex(); 4268 4269 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4270 AMDGPUOperand::ImmTyExpTgt)); 4271 return MatchOperand_Success; 4272 } 4273 4274 OperandMatchResultTy 4275 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4276 using namespace llvm::AMDGPU::SendMsg; 4277 4278 int64_t Imm16Val = 0; 4279 SMLoc S = Parser.getTok().getLoc(); 4280 4281 switch(getLexer().getKind()) { 4282 default: 4283 return MatchOperand_NoMatch; 4284 case AsmToken::Integer: 4285 // The operand can be an integer value. 4286 if (getParser().parseAbsoluteExpression(Imm16Val)) 4287 return MatchOperand_NoMatch; 4288 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4289 Error(S, "invalid immediate: only 16-bit values are legal"); 4290 // Do not return error code, but create an imm operand anyway and proceed 4291 // to the next operand, if any. That avoids unneccessary error messages. 4292 } 4293 break; 4294 case AsmToken::Identifier: { 4295 OperandInfoTy Msg(ID_UNKNOWN_); 4296 OperandInfoTy Operation(OP_UNKNOWN_); 4297 int64_t StreamId = STREAM_ID_DEFAULT_; 4298 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4299 return MatchOperand_ParseFail; 4300 do { 4301 // Validate and encode message ID. 4302 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4303 || Msg.Id == ID_SYSMSG)) { 4304 if (Msg.IsSymbolic) 4305 Error(S, "invalid/unsupported symbolic name of message"); 4306 else 4307 Error(S, "invalid/unsupported code of message"); 4308 break; 4309 } 4310 Imm16Val = (Msg.Id << ID_SHIFT_); 4311 // Validate and encode operation ID. 4312 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4313 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4314 if (Operation.IsSymbolic) 4315 Error(S, "invalid symbolic name of GS_OP"); 4316 else 4317 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4318 break; 4319 } 4320 if (Operation.Id == OP_GS_NOP 4321 && Msg.Id != ID_GS_DONE) { 4322 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4323 break; 4324 } 4325 Imm16Val |= (Operation.Id << OP_SHIFT_); 4326 } 4327 if (Msg.Id == ID_SYSMSG) { 4328 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4329 if (Operation.IsSymbolic) 4330 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4331 else 4332 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4333 break; 4334 } 4335 Imm16Val |= (Operation.Id << OP_SHIFT_); 4336 } 4337 // Validate and encode stream ID. 4338 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4339 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4340 Error(S, "invalid stream id: only 2-bit values are legal"); 4341 break; 4342 } 4343 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4344 } 4345 } while (false); 4346 } 4347 break; 4348 } 4349 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4350 return MatchOperand_Success; 4351 } 4352 4353 bool AMDGPUOperand::isSendMsg() const { 4354 return isImmTy(ImmTySendMsg); 4355 } 4356 4357 //===----------------------------------------------------------------------===// 4358 // parser helpers 4359 //===----------------------------------------------------------------------===// 4360 4361 bool 4362 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4363 if (getLexer().getKind() == AsmToken::Identifier && 4364 Parser.getTok().getString() == Id) { 4365 Parser.Lex(); 4366 return true; 4367 } 4368 return false; 4369 } 4370 4371 bool 4372 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4373 if (getLexer().getKind() == Kind) { 4374 Parser.Lex(); 4375 return true; 4376 } 4377 return false; 4378 } 4379 4380 bool 4381 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4382 const StringRef ErrMsg) { 4383 if (!trySkipToken(Kind)) { 4384 Error(Parser.getTok().getLoc(), ErrMsg); 4385 return false; 4386 } 4387 return true; 4388 } 4389 4390 bool 4391 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4392 return !getParser().parseAbsoluteExpression(Imm); 4393 } 4394 4395 bool 4396 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4397 SMLoc S = Parser.getTok().getLoc(); 4398 if (getLexer().getKind() == AsmToken::String) { 4399 Val = Parser.getTok().getStringContents(); 4400 Parser.Lex(); 4401 return true; 4402 } else { 4403 Error(S, ErrMsg); 4404 return false; 4405 } 4406 } 4407 4408 //===----------------------------------------------------------------------===// 4409 // swizzle 4410 //===----------------------------------------------------------------------===// 4411 4412 LLVM_READNONE 4413 static unsigned 4414 encodeBitmaskPerm(const unsigned AndMask, 4415 const unsigned OrMask, 4416 const unsigned XorMask) { 4417 using namespace llvm::AMDGPU::Swizzle; 4418 4419 return BITMASK_PERM_ENC | 4420 (AndMask << BITMASK_AND_SHIFT) | 4421 (OrMask << BITMASK_OR_SHIFT) | 4422 (XorMask << BITMASK_XOR_SHIFT); 4423 } 4424 4425 bool 4426 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4427 const unsigned MinVal, 4428 const unsigned MaxVal, 4429 const StringRef ErrMsg) { 4430 for (unsigned i = 0; i < OpNum; ++i) { 4431 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4432 return false; 4433 } 4434 SMLoc ExprLoc = Parser.getTok().getLoc(); 4435 if (!parseExpr(Op[i])) { 4436 return false; 4437 } 4438 if (Op[i] < MinVal || Op[i] > MaxVal) { 4439 Error(ExprLoc, ErrMsg); 4440 return false; 4441 } 4442 } 4443 4444 return true; 4445 } 4446 4447 bool 4448 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4449 using namespace llvm::AMDGPU::Swizzle; 4450 4451 int64_t Lane[LANE_NUM]; 4452 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4453 "expected a 2-bit lane id")) { 4454 Imm = QUAD_PERM_ENC; 4455 for (auto i = 0; i < LANE_NUM; ++i) { 4456 Imm |= Lane[i] << (LANE_SHIFT * i); 4457 } 4458 return true; 4459 } 4460 return false; 4461 } 4462 4463 bool 4464 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4465 using namespace llvm::AMDGPU::Swizzle; 4466 4467 SMLoc S = Parser.getTok().getLoc(); 4468 int64_t GroupSize; 4469 int64_t LaneIdx; 4470 4471 if (!parseSwizzleOperands(1, &GroupSize, 4472 2, 32, 4473 "group size must be in the interval [2,32]")) { 4474 return false; 4475 } 4476 if (!isPowerOf2_64(GroupSize)) { 4477 Error(S, "group size must be a power of two"); 4478 return false; 4479 } 4480 if (parseSwizzleOperands(1, &LaneIdx, 4481 0, GroupSize - 1, 4482 "lane id must be in the interval [0,group size - 1]")) { 4483 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4484 return true; 4485 } 4486 return false; 4487 } 4488 4489 bool 4490 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4491 using namespace llvm::AMDGPU::Swizzle; 4492 4493 SMLoc S = Parser.getTok().getLoc(); 4494 int64_t GroupSize; 4495 4496 if (!parseSwizzleOperands(1, &GroupSize, 4497 2, 32, "group size must be in the interval [2,32]")) { 4498 return false; 4499 } 4500 if (!isPowerOf2_64(GroupSize)) { 4501 Error(S, "group size must be a power of two"); 4502 return false; 4503 } 4504 4505 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4506 return true; 4507 } 4508 4509 bool 4510 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4511 using namespace llvm::AMDGPU::Swizzle; 4512 4513 SMLoc S = Parser.getTok().getLoc(); 4514 int64_t GroupSize; 4515 4516 if (!parseSwizzleOperands(1, &GroupSize, 4517 1, 16, "group size must be in the interval [1,16]")) { 4518 return false; 4519 } 4520 if (!isPowerOf2_64(GroupSize)) { 4521 Error(S, "group size must be a power of two"); 4522 return false; 4523 } 4524 4525 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4526 return true; 4527 } 4528 4529 bool 4530 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4531 using namespace llvm::AMDGPU::Swizzle; 4532 4533 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4534 return false; 4535 } 4536 4537 StringRef Ctl; 4538 SMLoc StrLoc = Parser.getTok().getLoc(); 4539 if (!parseString(Ctl)) { 4540 return false; 4541 } 4542 if (Ctl.size() != BITMASK_WIDTH) { 4543 Error(StrLoc, "expected a 5-character mask"); 4544 return false; 4545 } 4546 4547 unsigned AndMask = 0; 4548 unsigned OrMask = 0; 4549 unsigned XorMask = 0; 4550 4551 for (size_t i = 0; i < Ctl.size(); ++i) { 4552 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4553 switch(Ctl[i]) { 4554 default: 4555 Error(StrLoc, "invalid mask"); 4556 return false; 4557 case '0': 4558 break; 4559 case '1': 4560 OrMask |= Mask; 4561 break; 4562 case 'p': 4563 AndMask |= Mask; 4564 break; 4565 case 'i': 4566 AndMask |= Mask; 4567 XorMask |= Mask; 4568 break; 4569 } 4570 } 4571 4572 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4573 return true; 4574 } 4575 4576 bool 4577 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4578 4579 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4580 4581 if (!parseExpr(Imm)) { 4582 return false; 4583 } 4584 if (!isUInt<16>(Imm)) { 4585 Error(OffsetLoc, "expected a 16-bit offset"); 4586 return false; 4587 } 4588 return true; 4589 } 4590 4591 bool 4592 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4593 using namespace llvm::AMDGPU::Swizzle; 4594 4595 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4596 4597 SMLoc ModeLoc = Parser.getTok().getLoc(); 4598 bool Ok = false; 4599 4600 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4601 Ok = parseSwizzleQuadPerm(Imm); 4602 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4603 Ok = parseSwizzleBitmaskPerm(Imm); 4604 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4605 Ok = parseSwizzleBroadcast(Imm); 4606 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4607 Ok = parseSwizzleSwap(Imm); 4608 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4609 Ok = parseSwizzleReverse(Imm); 4610 } else { 4611 Error(ModeLoc, "expected a swizzle mode"); 4612 } 4613 4614 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4615 } 4616 4617 return false; 4618 } 4619 4620 OperandMatchResultTy 4621 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4622 SMLoc S = Parser.getTok().getLoc(); 4623 int64_t Imm = 0; 4624 4625 if (trySkipId("offset")) { 4626 4627 bool Ok = false; 4628 if (skipToken(AsmToken::Colon, "expected a colon")) { 4629 if (trySkipId("swizzle")) { 4630 Ok = parseSwizzleMacro(Imm); 4631 } else { 4632 Ok = parseSwizzleOffset(Imm); 4633 } 4634 } 4635 4636 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4637 4638 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4639 } else { 4640 // Swizzle "offset" operand is optional. 4641 // If it is omitted, try parsing other optional operands. 4642 return parseOptionalOpr(Operands); 4643 } 4644 } 4645 4646 bool 4647 AMDGPUOperand::isSwizzle() const { 4648 return isImmTy(ImmTySwizzle); 4649 } 4650 4651 //===----------------------------------------------------------------------===// 4652 // sopp branch targets 4653 //===----------------------------------------------------------------------===// 4654 4655 OperandMatchResultTy 4656 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4657 SMLoc S = Parser.getTok().getLoc(); 4658 4659 switch (getLexer().getKind()) { 4660 default: return MatchOperand_ParseFail; 4661 case AsmToken::Integer: { 4662 int64_t Imm; 4663 if (getParser().parseAbsoluteExpression(Imm)) 4664 return MatchOperand_ParseFail; 4665 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4666 return MatchOperand_Success; 4667 } 4668 4669 case AsmToken::Identifier: 4670 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4671 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4672 Parser.getTok().getString()), getContext()), S)); 4673 Parser.Lex(); 4674 return MatchOperand_Success; 4675 } 4676 } 4677 4678 //===----------------------------------------------------------------------===// 4679 // mubuf 4680 //===----------------------------------------------------------------------===// 4681 4682 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4683 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4684 } 4685 4686 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4687 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4688 } 4689 4690 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4691 const OperandVector &Operands, 4692 bool IsAtomic, 4693 bool IsAtomicReturn, 4694 bool IsLds) { 4695 bool IsLdsOpcode = IsLds; 4696 bool HasLdsModifier = false; 4697 OptionalImmIndexMap OptionalIdx; 4698 assert(IsAtomicReturn ? IsAtomic : true); 4699 4700 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4701 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4702 4703 // Add the register arguments 4704 if (Op.isReg()) { 4705 Op.addRegOperands(Inst, 1); 4706 continue; 4707 } 4708 4709 // Handle the case where soffset is an immediate 4710 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4711 Op.addImmOperands(Inst, 1); 4712 continue; 4713 } 4714 4715 HasLdsModifier = Op.isLDS(); 4716 4717 // Handle tokens like 'offen' which are sometimes hard-coded into the 4718 // asm string. There are no MCInst operands for these. 4719 if (Op.isToken()) { 4720 continue; 4721 } 4722 assert(Op.isImm()); 4723 4724 // Handle optional arguments 4725 OptionalIdx[Op.getImmTy()] = i; 4726 } 4727 4728 // This is a workaround for an llvm quirk which may result in an 4729 // incorrect instruction selection. Lds and non-lds versions of 4730 // MUBUF instructions are identical except that lds versions 4731 // have mandatory 'lds' modifier. However this modifier follows 4732 // optional modifiers and llvm asm matcher regards this 'lds' 4733 // modifier as an optional one. As a result, an lds version 4734 // of opcode may be selected even if it has no 'lds' modifier. 4735 if (IsLdsOpcode && !HasLdsModifier) { 4736 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4737 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4738 Inst.setOpcode(NoLdsOpcode); 4739 IsLdsOpcode = false; 4740 } 4741 } 4742 4743 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4744 if (IsAtomicReturn) { 4745 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4746 Inst.insert(I, *I); 4747 } 4748 4749 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4750 if (!IsAtomic) { // glc is hard-coded. 4751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4752 } 4753 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4754 4755 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4757 } 4758 } 4759 4760 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4761 OptionalImmIndexMap OptionalIdx; 4762 4763 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4764 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4765 4766 // Add the register arguments 4767 if (Op.isReg()) { 4768 Op.addRegOperands(Inst, 1); 4769 continue; 4770 } 4771 4772 // Handle the case where soffset is an immediate 4773 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4774 Op.addImmOperands(Inst, 1); 4775 continue; 4776 } 4777 4778 // Handle tokens like 'offen' which are sometimes hard-coded into the 4779 // asm string. There are no MCInst operands for these. 4780 if (Op.isToken()) { 4781 continue; 4782 } 4783 assert(Op.isImm()); 4784 4785 // Handle optional arguments 4786 OptionalIdx[Op.getImmTy()] = i; 4787 } 4788 4789 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4790 AMDGPUOperand::ImmTyOffset); 4791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4793 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4795 } 4796 4797 //===----------------------------------------------------------------------===// 4798 // mimg 4799 //===----------------------------------------------------------------------===// 4800 4801 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4802 bool IsAtomic) { 4803 unsigned I = 1; 4804 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4805 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4806 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4807 } 4808 4809 if (IsAtomic) { 4810 // Add src, same as dst 4811 assert(Desc.getNumDefs() == 1); 4812 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4813 } 4814 4815 OptionalImmIndexMap OptionalIdx; 4816 4817 for (unsigned E = Operands.size(); I != E; ++I) { 4818 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4819 4820 // Add the register arguments 4821 if (Op.isReg()) { 4822 Op.addRegOperands(Inst, 1); 4823 } else if (Op.isImmModifier()) { 4824 OptionalIdx[Op.getImmTy()] = I; 4825 } else { 4826 llvm_unreachable("unexpected operand type"); 4827 } 4828 } 4829 4830 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4839 } 4840 4841 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4842 cvtMIMG(Inst, Operands, true); 4843 } 4844 4845 //===----------------------------------------------------------------------===// 4846 // smrd 4847 //===----------------------------------------------------------------------===// 4848 4849 bool AMDGPUOperand::isSMRDOffset8() const { 4850 return isImm() && isUInt<8>(getImm()); 4851 } 4852 4853 bool AMDGPUOperand::isSMRDOffset20() const { 4854 return isImm() && isUInt<20>(getImm()); 4855 } 4856 4857 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4858 // 32-bit literals are only supported on CI and we only want to use them 4859 // when the offset is > 8-bits. 4860 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4861 } 4862 4863 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4864 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4865 } 4866 4867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4868 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4869 } 4870 4871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4872 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4873 } 4874 4875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4877 } 4878 4879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4881 } 4882 4883 //===----------------------------------------------------------------------===// 4884 // vop3 4885 //===----------------------------------------------------------------------===// 4886 4887 static bool ConvertOmodMul(int64_t &Mul) { 4888 if (Mul != 1 && Mul != 2 && Mul != 4) 4889 return false; 4890 4891 Mul >>= 1; 4892 return true; 4893 } 4894 4895 static bool ConvertOmodDiv(int64_t &Div) { 4896 if (Div == 1) { 4897 Div = 0; 4898 return true; 4899 } 4900 4901 if (Div == 2) { 4902 Div = 3; 4903 return true; 4904 } 4905 4906 return false; 4907 } 4908 4909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4910 if (BoundCtrl == 0) { 4911 BoundCtrl = 1; 4912 return true; 4913 } 4914 4915 if (BoundCtrl == -1) { 4916 BoundCtrl = 0; 4917 return true; 4918 } 4919 4920 return false; 4921 } 4922 4923 // Note: the order in this table matches the order of operands in AsmString. 4924 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4925 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4926 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4927 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4928 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4929 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4930 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4931 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4932 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4933 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4934 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 4935 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4936 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4937 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4938 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4939 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4940 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4941 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4942 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4943 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4944 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4945 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4946 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4947 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4948 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4949 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4950 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4951 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4952 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4953 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4954 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4955 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4956 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4957 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4958 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4959 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4960 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4961 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4962 }; 4963 4964 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4965 unsigned size = Operands.size(); 4966 assert(size > 0); 4967 4968 OperandMatchResultTy res = parseOptionalOpr(Operands); 4969 4970 // This is a hack to enable hardcoded mandatory operands which follow 4971 // optional operands. 4972 // 4973 // Current design assumes that all operands after the first optional operand 4974 // are also optional. However implementation of some instructions violates 4975 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4976 // 4977 // To alleviate this problem, we have to (implicitly) parse extra operands 4978 // to make sure autogenerated parser of custom operands never hit hardcoded 4979 // mandatory operands. 4980 4981 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4982 4983 // We have parsed the first optional operand. 4984 // Parse as many operands as necessary to skip all mandatory operands. 4985 4986 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4987 if (res != MatchOperand_Success || 4988 getLexer().is(AsmToken::EndOfStatement)) break; 4989 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4990 res = parseOptionalOpr(Operands); 4991 } 4992 } 4993 4994 return res; 4995 } 4996 4997 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4998 OperandMatchResultTy res; 4999 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5000 // try to parse any optional operand here 5001 if (Op.IsBit) { 5002 res = parseNamedBit(Op.Name, Operands, Op.Type); 5003 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5004 res = parseOModOperand(Operands); 5005 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5006 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5007 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5008 res = parseSDWASel(Operands, Op.Name, Op.Type); 5009 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5010 res = parseSDWADstUnused(Operands); 5011 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5012 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5013 Op.Type == AMDGPUOperand::ImmTyNegLo || 5014 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5015 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5016 Op.ConvertResult); 5017 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5018 res = parseDfmtNfmt(Operands); 5019 } else { 5020 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5021 } 5022 if (res != MatchOperand_NoMatch) { 5023 return res; 5024 } 5025 } 5026 return MatchOperand_NoMatch; 5027 } 5028 5029 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5030 StringRef Name = Parser.getTok().getString(); 5031 if (Name == "mul") { 5032 return parseIntWithPrefix("mul", Operands, 5033 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5034 } 5035 5036 if (Name == "div") { 5037 return parseIntWithPrefix("div", Operands, 5038 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5039 } 5040 5041 return MatchOperand_NoMatch; 5042 } 5043 5044 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5045 cvtVOP3P(Inst, Operands); 5046 5047 int Opc = Inst.getOpcode(); 5048 5049 int SrcNum; 5050 const int Ops[] = { AMDGPU::OpName::src0, 5051 AMDGPU::OpName::src1, 5052 AMDGPU::OpName::src2 }; 5053 for (SrcNum = 0; 5054 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5055 ++SrcNum); 5056 assert(SrcNum > 0); 5057 5058 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5059 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5060 5061 if ((OpSel & (1 << SrcNum)) != 0) { 5062 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5063 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5064 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5065 } 5066 } 5067 5068 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5069 // 1. This operand is input modifiers 5070 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5071 // 2. This is not last operand 5072 && Desc.NumOperands > (OpNum + 1) 5073 // 3. Next operand is register class 5074 && Desc.OpInfo[OpNum + 1].RegClass != -1 5075 // 4. Next register is not tied to any other operand 5076 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5077 } 5078 5079 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5080 { 5081 OptionalImmIndexMap OptionalIdx; 5082 unsigned Opc = Inst.getOpcode(); 5083 5084 unsigned I = 1; 5085 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5086 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5087 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5088 } 5089 5090 for (unsigned E = Operands.size(); I != E; ++I) { 5091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5092 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5093 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5094 } else if (Op.isInterpSlot() || 5095 Op.isInterpAttr() || 5096 Op.isAttrChan()) { 5097 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5098 } else if (Op.isImmModifier()) { 5099 OptionalIdx[Op.getImmTy()] = I; 5100 } else { 5101 llvm_unreachable("unhandled operand type"); 5102 } 5103 } 5104 5105 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5107 } 5108 5109 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5111 } 5112 5113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5115 } 5116 } 5117 5118 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5119 OptionalImmIndexMap &OptionalIdx) { 5120 unsigned Opc = Inst.getOpcode(); 5121 5122 unsigned I = 1; 5123 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5124 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5125 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5126 } 5127 5128 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5129 // This instruction has src modifiers 5130 for (unsigned E = Operands.size(); I != E; ++I) { 5131 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5132 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5133 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5134 } else if (Op.isImmModifier()) { 5135 OptionalIdx[Op.getImmTy()] = I; 5136 } else if (Op.isRegOrImm()) { 5137 Op.addRegOrImmOperands(Inst, 1); 5138 } else { 5139 llvm_unreachable("unhandled operand type"); 5140 } 5141 } 5142 } else { 5143 // No src modifiers 5144 for (unsigned E = Operands.size(); I != E; ++I) { 5145 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5146 if (Op.isMod()) { 5147 OptionalIdx[Op.getImmTy()] = I; 5148 } else { 5149 Op.addRegOrImmOperands(Inst, 1); 5150 } 5151 } 5152 } 5153 5154 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5156 } 5157 5158 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5160 } 5161 5162 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5163 // it has src2 register operand that is tied to dst operand 5164 // we don't allow modifiers for this operand in assembler so src2_modifiers 5165 // should be 0. 5166 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5167 Opc == AMDGPU::V_MAC_F32_e64_vi || 5168 Opc == AMDGPU::V_MAC_F16_e64_vi || 5169 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5170 auto it = Inst.begin(); 5171 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5172 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5173 ++it; 5174 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5175 } 5176 } 5177 5178 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5179 OptionalImmIndexMap OptionalIdx; 5180 cvtVOP3(Inst, Operands, OptionalIdx); 5181 } 5182 5183 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5184 const OperandVector &Operands) { 5185 OptionalImmIndexMap OptIdx; 5186 const int Opc = Inst.getOpcode(); 5187 const MCInstrDesc &Desc = MII.get(Opc); 5188 5189 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5190 5191 cvtVOP3(Inst, Operands, OptIdx); 5192 5193 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5194 assert(!IsPacked); 5195 Inst.addOperand(Inst.getOperand(0)); 5196 } 5197 5198 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5199 // instruction, and then figure out where to actually put the modifiers 5200 5201 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5202 5203 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5204 if (OpSelHiIdx != -1) { 5205 int DefaultVal = IsPacked ? -1 : 0; 5206 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5207 DefaultVal); 5208 } 5209 5210 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5211 if (NegLoIdx != -1) { 5212 assert(IsPacked); 5213 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5214 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5215 } 5216 5217 const int Ops[] = { AMDGPU::OpName::src0, 5218 AMDGPU::OpName::src1, 5219 AMDGPU::OpName::src2 }; 5220 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5221 AMDGPU::OpName::src1_modifiers, 5222 AMDGPU::OpName::src2_modifiers }; 5223 5224 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5225 5226 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5227 unsigned OpSelHi = 0; 5228 unsigned NegLo = 0; 5229 unsigned NegHi = 0; 5230 5231 if (OpSelHiIdx != -1) { 5232 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5233 } 5234 5235 if (NegLoIdx != -1) { 5236 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5237 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5238 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5239 } 5240 5241 for (int J = 0; J < 3; ++J) { 5242 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5243 if (OpIdx == -1) 5244 break; 5245 5246 uint32_t ModVal = 0; 5247 5248 if ((OpSel & (1 << J)) != 0) 5249 ModVal |= SISrcMods::OP_SEL_0; 5250 5251 if ((OpSelHi & (1 << J)) != 0) 5252 ModVal |= SISrcMods::OP_SEL_1; 5253 5254 if ((NegLo & (1 << J)) != 0) 5255 ModVal |= SISrcMods::NEG; 5256 5257 if ((NegHi & (1 << J)) != 0) 5258 ModVal |= SISrcMods::NEG_HI; 5259 5260 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5261 5262 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5263 } 5264 } 5265 5266 //===----------------------------------------------------------------------===// 5267 // dpp 5268 //===----------------------------------------------------------------------===// 5269 5270 bool AMDGPUOperand::isDPPCtrl() const { 5271 using namespace AMDGPU::DPP; 5272 5273 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5274 if (result) { 5275 int64_t Imm = getImm(); 5276 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5277 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5278 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5279 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5280 (Imm == DppCtrl::WAVE_SHL1) || 5281 (Imm == DppCtrl::WAVE_ROL1) || 5282 (Imm == DppCtrl::WAVE_SHR1) || 5283 (Imm == DppCtrl::WAVE_ROR1) || 5284 (Imm == DppCtrl::ROW_MIRROR) || 5285 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5286 (Imm == DppCtrl::BCAST15) || 5287 (Imm == DppCtrl::BCAST31); 5288 } 5289 return false; 5290 } 5291 5292 bool AMDGPUOperand::isGPRIdxMode() const { 5293 return isImm() && isUInt<4>(getImm()); 5294 } 5295 5296 bool AMDGPUOperand::isS16Imm() const { 5297 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5298 } 5299 5300 bool AMDGPUOperand::isU16Imm() const { 5301 return isImm() && isUInt<16>(getImm()); 5302 } 5303 5304 OperandMatchResultTy 5305 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5306 using namespace AMDGPU::DPP; 5307 5308 SMLoc S = Parser.getTok().getLoc(); 5309 StringRef Prefix; 5310 int64_t Int; 5311 5312 if (getLexer().getKind() == AsmToken::Identifier) { 5313 Prefix = Parser.getTok().getString(); 5314 } else { 5315 return MatchOperand_NoMatch; 5316 } 5317 5318 if (Prefix == "row_mirror") { 5319 Int = DppCtrl::ROW_MIRROR; 5320 Parser.Lex(); 5321 } else if (Prefix == "row_half_mirror") { 5322 Int = DppCtrl::ROW_HALF_MIRROR; 5323 Parser.Lex(); 5324 } else { 5325 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5326 if (Prefix != "quad_perm" 5327 && Prefix != "row_shl" 5328 && Prefix != "row_shr" 5329 && Prefix != "row_ror" 5330 && Prefix != "wave_shl" 5331 && Prefix != "wave_rol" 5332 && Prefix != "wave_shr" 5333 && Prefix != "wave_ror" 5334 && Prefix != "row_bcast") { 5335 return MatchOperand_NoMatch; 5336 } 5337 5338 Parser.Lex(); 5339 if (getLexer().isNot(AsmToken::Colon)) 5340 return MatchOperand_ParseFail; 5341 5342 if (Prefix == "quad_perm") { 5343 // quad_perm:[%d,%d,%d,%d] 5344 Parser.Lex(); 5345 if (getLexer().isNot(AsmToken::LBrac)) 5346 return MatchOperand_ParseFail; 5347 Parser.Lex(); 5348 5349 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5350 return MatchOperand_ParseFail; 5351 5352 for (int i = 0; i < 3; ++i) { 5353 if (getLexer().isNot(AsmToken::Comma)) 5354 return MatchOperand_ParseFail; 5355 Parser.Lex(); 5356 5357 int64_t Temp; 5358 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5359 return MatchOperand_ParseFail; 5360 const int shift = i*2 + 2; 5361 Int += (Temp << shift); 5362 } 5363 5364 if (getLexer().isNot(AsmToken::RBrac)) 5365 return MatchOperand_ParseFail; 5366 Parser.Lex(); 5367 } else { 5368 // sel:%d 5369 Parser.Lex(); 5370 if (getParser().parseAbsoluteExpression(Int)) 5371 return MatchOperand_ParseFail; 5372 5373 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5374 Int |= DppCtrl::ROW_SHL0; 5375 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5376 Int |= DppCtrl::ROW_SHR0; 5377 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5378 Int |= DppCtrl::ROW_ROR0; 5379 } else if (Prefix == "wave_shl" && 1 == Int) { 5380 Int = DppCtrl::WAVE_SHL1; 5381 } else if (Prefix == "wave_rol" && 1 == Int) { 5382 Int = DppCtrl::WAVE_ROL1; 5383 } else if (Prefix == "wave_shr" && 1 == Int) { 5384 Int = DppCtrl::WAVE_SHR1; 5385 } else if (Prefix == "wave_ror" && 1 == Int) { 5386 Int = DppCtrl::WAVE_ROR1; 5387 } else if (Prefix == "row_bcast") { 5388 if (Int == 15) { 5389 Int = DppCtrl::BCAST15; 5390 } else if (Int == 31) { 5391 Int = DppCtrl::BCAST31; 5392 } else { 5393 return MatchOperand_ParseFail; 5394 } 5395 } else { 5396 return MatchOperand_ParseFail; 5397 } 5398 } 5399 } 5400 5401 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5402 return MatchOperand_Success; 5403 } 5404 5405 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5406 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5407 } 5408 5409 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5410 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5411 } 5412 5413 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5414 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5415 } 5416 5417 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5418 OptionalImmIndexMap OptionalIdx; 5419 5420 unsigned I = 1; 5421 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5422 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5423 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5424 } 5425 5426 for (unsigned E = Operands.size(); I != E; ++I) { 5427 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5428 MCOI::TIED_TO); 5429 if (TiedTo != -1) { 5430 assert((unsigned)TiedTo < Inst.getNumOperands()); 5431 // handle tied old or src2 for MAC instructions 5432 Inst.addOperand(Inst.getOperand(TiedTo)); 5433 } 5434 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5435 // Add the register arguments 5436 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5437 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5438 // Skip it. 5439 continue; 5440 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5441 Op.addRegWithFPInputModsOperands(Inst, 2); 5442 } else if (Op.isDPPCtrl()) { 5443 Op.addImmOperands(Inst, 1); 5444 } else if (Op.isImm()) { 5445 // Handle optional arguments 5446 OptionalIdx[Op.getImmTy()] = I; 5447 } else { 5448 llvm_unreachable("Invalid operand type"); 5449 } 5450 } 5451 5452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5455 } 5456 5457 //===----------------------------------------------------------------------===// 5458 // sdwa 5459 //===----------------------------------------------------------------------===// 5460 5461 OperandMatchResultTy 5462 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5463 AMDGPUOperand::ImmTy Type) { 5464 using namespace llvm::AMDGPU::SDWA; 5465 5466 SMLoc S = Parser.getTok().getLoc(); 5467 StringRef Value; 5468 OperandMatchResultTy res; 5469 5470 res = parseStringWithPrefix(Prefix, Value); 5471 if (res != MatchOperand_Success) { 5472 return res; 5473 } 5474 5475 int64_t Int; 5476 Int = StringSwitch<int64_t>(Value) 5477 .Case("BYTE_0", SdwaSel::BYTE_0) 5478 .Case("BYTE_1", SdwaSel::BYTE_1) 5479 .Case("BYTE_2", SdwaSel::BYTE_2) 5480 .Case("BYTE_3", SdwaSel::BYTE_3) 5481 .Case("WORD_0", SdwaSel::WORD_0) 5482 .Case("WORD_1", SdwaSel::WORD_1) 5483 .Case("DWORD", SdwaSel::DWORD) 5484 .Default(0xffffffff); 5485 Parser.Lex(); // eat last token 5486 5487 if (Int == 0xffffffff) { 5488 return MatchOperand_ParseFail; 5489 } 5490 5491 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5492 return MatchOperand_Success; 5493 } 5494 5495 OperandMatchResultTy 5496 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5497 using namespace llvm::AMDGPU::SDWA; 5498 5499 SMLoc S = Parser.getTok().getLoc(); 5500 StringRef Value; 5501 OperandMatchResultTy res; 5502 5503 res = parseStringWithPrefix("dst_unused", Value); 5504 if (res != MatchOperand_Success) { 5505 return res; 5506 } 5507 5508 int64_t Int; 5509 Int = StringSwitch<int64_t>(Value) 5510 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5511 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5512 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5513 .Default(0xffffffff); 5514 Parser.Lex(); // eat last token 5515 5516 if (Int == 0xffffffff) { 5517 return MatchOperand_ParseFail; 5518 } 5519 5520 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5521 return MatchOperand_Success; 5522 } 5523 5524 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5525 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5526 } 5527 5528 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5529 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5530 } 5531 5532 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5533 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5534 } 5535 5536 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5537 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5538 } 5539 5540 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5541 uint64_t BasicInstType, bool skipVcc) { 5542 using namespace llvm::AMDGPU::SDWA; 5543 5544 OptionalImmIndexMap OptionalIdx; 5545 bool skippedVcc = false; 5546 5547 unsigned I = 1; 5548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5549 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5550 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5551 } 5552 5553 for (unsigned E = Operands.size(); I != E; ++I) { 5554 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5555 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5556 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5557 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5558 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5559 // Skip VCC only if we didn't skip it on previous iteration. 5560 if (BasicInstType == SIInstrFlags::VOP2 && 5561 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5562 skippedVcc = true; 5563 continue; 5564 } else if (BasicInstType == SIInstrFlags::VOPC && 5565 Inst.getNumOperands() == 0) { 5566 skippedVcc = true; 5567 continue; 5568 } 5569 } 5570 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5571 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5572 } else if (Op.isImm()) { 5573 // Handle optional arguments 5574 OptionalIdx[Op.getImmTy()] = I; 5575 } else { 5576 llvm_unreachable("Invalid operand type"); 5577 } 5578 skippedVcc = false; 5579 } 5580 5581 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5582 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5583 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5584 switch (BasicInstType) { 5585 case SIInstrFlags::VOP1: 5586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5587 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5589 } 5590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5593 break; 5594 5595 case SIInstrFlags::VOP2: 5596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5597 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5599 } 5600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5604 break; 5605 5606 case SIInstrFlags::VOPC: 5607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5610 break; 5611 5612 default: 5613 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5614 } 5615 } 5616 5617 // special case v_mac_{f16, f32}: 5618 // it has src2 register operand that is tied to dst operand 5619 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5620 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5621 auto it = Inst.begin(); 5622 std::advance( 5623 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5624 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5625 } 5626 } 5627 5628 /// Force static initialization. 5629 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5630 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5631 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5632 } 5633 5634 #define GET_REGISTER_MATCHER 5635 #define GET_MATCHER_IMPLEMENTATION 5636 #define GET_MNEMONIC_SPELL_CHECKER 5637 #include "AMDGPUGenAsmMatcher.inc" 5638 5639 // This fuction should be defined after auto-generated include so that we have 5640 // MatchClassKind enum defined 5641 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5642 unsigned Kind) { 5643 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5644 // But MatchInstructionImpl() expects to meet token and fails to validate 5645 // operand. This method checks if we are given immediate operand but expect to 5646 // get corresponding token. 5647 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5648 switch (Kind) { 5649 case MCK_addr64: 5650 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5651 case MCK_gds: 5652 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5653 case MCK_lds: 5654 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5655 case MCK_glc: 5656 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5657 case MCK_idxen: 5658 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5659 case MCK_offen: 5660 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5661 case MCK_SSrcB32: 5662 // When operands have expression values, they will return true for isToken, 5663 // because it is not possible to distinguish between a token and an 5664 // expression at parse time. MatchInstructionImpl() will always try to 5665 // match an operand as a token, when isToken returns true, and when the 5666 // name of the expression is not a valid token, the match will fail, 5667 // so we need to handle it here. 5668 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5669 case MCK_SSrcF32: 5670 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5671 case MCK_SoppBrTarget: 5672 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5673 case MCK_VReg32OrOff: 5674 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5675 case MCK_InterpSlot: 5676 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5677 case MCK_Attr: 5678 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5679 case MCK_AttrChan: 5680 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5681 default: 5682 return Match_InvalidOperand; 5683 } 5684 } 5685