1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyHigh 178 }; 179 180 struct TokOp { 181 const char *Data; 182 unsigned Length; 183 }; 184 185 struct ImmOp { 186 int64_t Val; 187 ImmTy Type; 188 bool IsFPImm; 189 Modifiers Mods; 190 }; 191 192 struct RegOp { 193 unsigned RegNo; 194 bool IsForcedVOP3; 195 Modifiers Mods; 196 }; 197 198 union { 199 TokOp Tok; 200 ImmOp Imm; 201 RegOp Reg; 202 const MCExpr *Expr; 203 }; 204 205 bool isToken() const override { 206 if (Kind == Token) 207 return true; 208 209 if (Kind != Expression || !Expr) 210 return false; 211 212 // When parsing operands, we can't always tell if something was meant to be 213 // a token, like 'gds', or an expression that references a global variable. 214 // In this case, we assume the string is an expression, and if we need to 215 // interpret is a token, then we treat the symbol name as the token. 216 return isa<MCSymbolRefExpr>(Expr); 217 } 218 219 bool isImm() const override { 220 return Kind == Immediate; 221 } 222 223 bool isInlinableImm(MVT type) const; 224 bool isLiteralImm(MVT type) const; 225 226 bool isRegKind() const { 227 return Kind == Register; 228 } 229 230 bool isReg() const override { 231 return isRegKind() && !hasModifiers(); 232 } 233 234 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 235 return isRegClass(RCID) || isInlinableImm(type); 236 } 237 238 bool isRegOrImmWithInt16InputMods() const { 239 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 240 } 241 242 bool isRegOrImmWithInt32InputMods() const { 243 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 244 } 245 246 bool isRegOrImmWithInt64InputMods() const { 247 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 248 } 249 250 bool isRegOrImmWithFP16InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 252 } 253 254 bool isRegOrImmWithFP32InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 256 } 257 258 bool isRegOrImmWithFP64InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 260 } 261 262 bool isVReg() const { 263 return isRegClass(AMDGPU::VGPR_32RegClassID) || 264 isRegClass(AMDGPU::VReg_64RegClassID) || 265 isRegClass(AMDGPU::VReg_96RegClassID) || 266 isRegClass(AMDGPU::VReg_128RegClassID) || 267 isRegClass(AMDGPU::VReg_256RegClassID) || 268 isRegClass(AMDGPU::VReg_512RegClassID); 269 } 270 271 bool isVReg32() const { 272 return isRegClass(AMDGPU::VGPR_32RegClassID); 273 } 274 275 bool isVReg32OrOff() const { 276 return isOff() || isVReg32(); 277 } 278 279 bool isSDWAOperand(MVT type) const; 280 bool isSDWAFP16Operand() const; 281 bool isSDWAFP32Operand() const; 282 bool isSDWAInt16Operand() const; 283 bool isSDWAInt32Operand() const; 284 285 bool isImmTy(ImmTy ImmT) const { 286 return isImm() && Imm.Type == ImmT; 287 } 288 289 bool isImmModifier() const { 290 return isImm() && Imm.Type != ImmTyNone; 291 } 292 293 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 294 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 295 bool isDMask() const { return isImmTy(ImmTyDMask); } 296 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 297 bool isDA() const { return isImmTy(ImmTyDA); } 298 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 299 bool isLWE() const { return isImmTy(ImmTyLWE); } 300 bool isOff() const { return isImmTy(ImmTyOff); } 301 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 302 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 303 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 304 bool isOffen() const { return isImmTy(ImmTyOffen); } 305 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 306 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 307 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 308 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 309 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 310 311 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 312 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 313 bool isGDS() const { return isImmTy(ImmTyGDS); } 314 bool isLDS() const { return isImmTy(ImmTyLDS); } 315 bool isGLC() const { return isImmTy(ImmTyGLC); } 316 bool isSLC() const { return isImmTy(ImmTySLC); } 317 bool isTFE() const { return isImmTy(ImmTyTFE); } 318 bool isD16() const { return isImmTy(ImmTyD16); } 319 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 320 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 321 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 322 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 323 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 324 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 325 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 326 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 327 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 328 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 329 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 330 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 331 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 332 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 333 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 334 bool isHigh() const { return isImmTy(ImmTyHigh); } 335 336 bool isMod() const { 337 return isClampSI() || isOModSI(); 338 } 339 340 bool isRegOrImm() const { 341 return isReg() || isImm(); 342 } 343 344 bool isRegClass(unsigned RCID) const; 345 346 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 347 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 348 } 349 350 bool isSCSrcB16() const { 351 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 352 } 353 354 bool isSCSrcV2B16() const { 355 return isSCSrcB16(); 356 } 357 358 bool isSCSrcB32() const { 359 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 360 } 361 362 bool isSCSrcB64() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 364 } 365 366 bool isSCSrcF16() const { 367 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 368 } 369 370 bool isSCSrcV2F16() const { 371 return isSCSrcF16(); 372 } 373 374 bool isSCSrcF32() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 376 } 377 378 bool isSCSrcF64() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 380 } 381 382 bool isSSrcB32() const { 383 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 384 } 385 386 bool isSSrcB16() const { 387 return isSCSrcB16() || isLiteralImm(MVT::i16); 388 } 389 390 bool isSSrcV2B16() const { 391 llvm_unreachable("cannot happen"); 392 return isSSrcB16(); 393 } 394 395 bool isSSrcB64() const { 396 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 397 // See isVSrc64(). 398 return isSCSrcB64() || isLiteralImm(MVT::i64); 399 } 400 401 bool isSSrcF32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 403 } 404 405 bool isSSrcF64() const { 406 return isSCSrcB64() || isLiteralImm(MVT::f64); 407 } 408 409 bool isSSrcF16() const { 410 return isSCSrcB16() || isLiteralImm(MVT::f16); 411 } 412 413 bool isSSrcV2F16() const { 414 llvm_unreachable("cannot happen"); 415 return isSSrcF16(); 416 } 417 418 bool isSSrcOrLdsB32() const { 419 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 420 isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isVCSrcB32() const { 424 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 425 } 426 427 bool isVCSrcB64() const { 428 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 429 } 430 431 bool isVCSrcB16() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 433 } 434 435 bool isVCSrcV2B16() const { 436 return isVCSrcB16(); 437 } 438 439 bool isVCSrcF32() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 441 } 442 443 bool isVCSrcF64() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 445 } 446 447 bool isVCSrcF16() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 449 } 450 451 bool isVCSrcV2F16() const { 452 return isVCSrcF16(); 453 } 454 455 bool isVSrcB32() const { 456 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 457 } 458 459 bool isVSrcB64() const { 460 return isVCSrcF64() || isLiteralImm(MVT::i64); 461 } 462 463 bool isVSrcB16() const { 464 return isVCSrcF16() || isLiteralImm(MVT::i16); 465 } 466 467 bool isVSrcV2B16() const { 468 llvm_unreachable("cannot happen"); 469 return isVSrcB16(); 470 } 471 472 bool isVSrcF32() const { 473 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 474 } 475 476 bool isVSrcF64() const { 477 return isVCSrcF64() || isLiteralImm(MVT::f64); 478 } 479 480 bool isVSrcF16() const { 481 return isVCSrcF16() || isLiteralImm(MVT::f16); 482 } 483 484 bool isVSrcV2F16() const { 485 llvm_unreachable("cannot happen"); 486 return isVSrcF16(); 487 } 488 489 bool isKImmFP32() const { 490 return isLiteralImm(MVT::f32); 491 } 492 493 bool isKImmFP16() const { 494 return isLiteralImm(MVT::f16); 495 } 496 497 bool isMem() const override { 498 return false; 499 } 500 501 bool isExpr() const { 502 return Kind == Expression; 503 } 504 505 bool isSoppBrTarget() const { 506 return isExpr() || isImm(); 507 } 508 509 bool isSWaitCnt() const; 510 bool isHwreg() const; 511 bool isSendMsg() const; 512 bool isSwizzle() const; 513 bool isSMRDOffset8() const; 514 bool isSMRDOffset20() const; 515 bool isSMRDLiteralOffset() const; 516 bool isDPPCtrl() const; 517 bool isGPRIdxMode() const; 518 bool isS16Imm() const; 519 bool isU16Imm() const; 520 521 StringRef getExpressionAsToken() const { 522 assert(isExpr()); 523 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 524 return S->getSymbol().getName(); 525 } 526 527 StringRef getToken() const { 528 assert(isToken()); 529 530 if (Kind == Expression) 531 return getExpressionAsToken(); 532 533 return StringRef(Tok.Data, Tok.Length); 534 } 535 536 int64_t getImm() const { 537 assert(isImm()); 538 return Imm.Val; 539 } 540 541 ImmTy getImmTy() const { 542 assert(isImm()); 543 return Imm.Type; 544 } 545 546 unsigned getReg() const override { 547 return Reg.RegNo; 548 } 549 550 SMLoc getStartLoc() const override { 551 return StartLoc; 552 } 553 554 SMLoc getEndLoc() const override { 555 return EndLoc; 556 } 557 558 SMRange getLocRange() const { 559 return SMRange(StartLoc, EndLoc); 560 } 561 562 Modifiers getModifiers() const { 563 assert(isRegKind() || isImmTy(ImmTyNone)); 564 return isRegKind() ? Reg.Mods : Imm.Mods; 565 } 566 567 void setModifiers(Modifiers Mods) { 568 assert(isRegKind() || isImmTy(ImmTyNone)); 569 if (isRegKind()) 570 Reg.Mods = Mods; 571 else 572 Imm.Mods = Mods; 573 } 574 575 bool hasModifiers() const { 576 return getModifiers().hasModifiers(); 577 } 578 579 bool hasFPModifiers() const { 580 return getModifiers().hasFPModifiers(); 581 } 582 583 bool hasIntModifiers() const { 584 return getModifiers().hasIntModifiers(); 585 } 586 587 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 588 589 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 590 591 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 592 593 template <unsigned Bitwidth> 594 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 595 596 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 597 addKImmFPOperands<16>(Inst, N); 598 } 599 600 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 601 addKImmFPOperands<32>(Inst, N); 602 } 603 604 void addRegOperands(MCInst &Inst, unsigned N) const; 605 606 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 607 if (isRegKind()) 608 addRegOperands(Inst, N); 609 else if (isExpr()) 610 Inst.addOperand(MCOperand::createExpr(Expr)); 611 else 612 addImmOperands(Inst, N); 613 } 614 615 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 616 Modifiers Mods = getModifiers(); 617 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 618 if (isRegKind()) { 619 addRegOperands(Inst, N); 620 } else { 621 addImmOperands(Inst, N, false); 622 } 623 } 624 625 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 626 assert(!hasIntModifiers()); 627 addRegOrImmWithInputModsOperands(Inst, N); 628 } 629 630 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 631 assert(!hasFPModifiers()); 632 addRegOrImmWithInputModsOperands(Inst, N); 633 } 634 635 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 636 Modifiers Mods = getModifiers(); 637 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 638 assert(isRegKind()); 639 addRegOperands(Inst, N); 640 } 641 642 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 643 assert(!hasIntModifiers()); 644 addRegWithInputModsOperands(Inst, N); 645 } 646 647 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 648 assert(!hasFPModifiers()); 649 addRegWithInputModsOperands(Inst, N); 650 } 651 652 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 653 if (isImm()) 654 addImmOperands(Inst, N); 655 else { 656 assert(isExpr()); 657 Inst.addOperand(MCOperand::createExpr(Expr)); 658 } 659 } 660 661 static void printImmTy(raw_ostream& OS, ImmTy Type) { 662 switch (Type) { 663 case ImmTyNone: OS << "None"; break; 664 case ImmTyGDS: OS << "GDS"; break; 665 case ImmTyLDS: OS << "LDS"; break; 666 case ImmTyOffen: OS << "Offen"; break; 667 case ImmTyIdxen: OS << "Idxen"; break; 668 case ImmTyAddr64: OS << "Addr64"; break; 669 case ImmTyOffset: OS << "Offset"; break; 670 case ImmTyInstOffset: OS << "InstOffset"; break; 671 case ImmTyOffset0: OS << "Offset0"; break; 672 case ImmTyOffset1: OS << "Offset1"; break; 673 case ImmTyGLC: OS << "GLC"; break; 674 case ImmTySLC: OS << "SLC"; break; 675 case ImmTyTFE: OS << "TFE"; break; 676 case ImmTyD16: OS << "D16"; break; 677 case ImmTyFORMAT: OS << "FORMAT"; break; 678 case ImmTyClampSI: OS << "ClampSI"; break; 679 case ImmTyOModSI: OS << "OModSI"; break; 680 case ImmTyDppCtrl: OS << "DppCtrl"; break; 681 case ImmTyDppRowMask: OS << "DppRowMask"; break; 682 case ImmTyDppBankMask: OS << "DppBankMask"; break; 683 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 684 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 685 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 686 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 687 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 688 case ImmTyDMask: OS << "DMask"; break; 689 case ImmTyUNorm: OS << "UNorm"; break; 690 case ImmTyDA: OS << "DA"; break; 691 case ImmTyR128A16: OS << "R128A16"; break; 692 case ImmTyLWE: OS << "LWE"; break; 693 case ImmTyOff: OS << "Off"; break; 694 case ImmTyExpTgt: OS << "ExpTgt"; break; 695 case ImmTyExpCompr: OS << "ExpCompr"; break; 696 case ImmTyExpVM: OS << "ExpVM"; break; 697 case ImmTyHwreg: OS << "Hwreg"; break; 698 case ImmTySendMsg: OS << "SendMsg"; break; 699 case ImmTyInterpSlot: OS << "InterpSlot"; break; 700 case ImmTyInterpAttr: OS << "InterpAttr"; break; 701 case ImmTyAttrChan: OS << "AttrChan"; break; 702 case ImmTyOpSel: OS << "OpSel"; break; 703 case ImmTyOpSelHi: OS << "OpSelHi"; break; 704 case ImmTyNegLo: OS << "NegLo"; break; 705 case ImmTyNegHi: OS << "NegHi"; break; 706 case ImmTySwizzle: OS << "Swizzle"; break; 707 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 708 case ImmTyHigh: OS << "High"; break; 709 } 710 } 711 712 void print(raw_ostream &OS) const override { 713 switch (Kind) { 714 case Register: 715 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 716 break; 717 case Immediate: 718 OS << '<' << getImm(); 719 if (getImmTy() != ImmTyNone) { 720 OS << " type: "; printImmTy(OS, getImmTy()); 721 } 722 OS << " mods: " << Imm.Mods << '>'; 723 break; 724 case Token: 725 OS << '\'' << getToken() << '\''; 726 break; 727 case Expression: 728 OS << "<expr " << *Expr << '>'; 729 break; 730 } 731 } 732 733 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 734 int64_t Val, SMLoc Loc, 735 ImmTy Type = ImmTyNone, 736 bool IsFPImm = false) { 737 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 738 Op->Imm.Val = Val; 739 Op->Imm.IsFPImm = IsFPImm; 740 Op->Imm.Type = Type; 741 Op->Imm.Mods = Modifiers(); 742 Op->StartLoc = Loc; 743 Op->EndLoc = Loc; 744 return Op; 745 } 746 747 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 748 StringRef Str, SMLoc Loc, 749 bool HasExplicitEncodingSize = true) { 750 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 751 Res->Tok.Data = Str.data(); 752 Res->Tok.Length = Str.size(); 753 Res->StartLoc = Loc; 754 Res->EndLoc = Loc; 755 return Res; 756 } 757 758 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 759 unsigned RegNo, SMLoc S, 760 SMLoc E, 761 bool ForceVOP3) { 762 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 763 Op->Reg.RegNo = RegNo; 764 Op->Reg.Mods = Modifiers(); 765 Op->Reg.IsForcedVOP3 = ForceVOP3; 766 Op->StartLoc = S; 767 Op->EndLoc = E; 768 return Op; 769 } 770 771 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 772 const class MCExpr *Expr, SMLoc S) { 773 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 774 Op->Expr = Expr; 775 Op->StartLoc = S; 776 Op->EndLoc = S; 777 return Op; 778 } 779 }; 780 781 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 782 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 783 return OS; 784 } 785 786 //===----------------------------------------------------------------------===// 787 // AsmParser 788 //===----------------------------------------------------------------------===// 789 790 // Holds info related to the current kernel, e.g. count of SGPRs used. 791 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 792 // .amdgpu_hsa_kernel or at EOF. 793 class KernelScopeInfo { 794 int SgprIndexUnusedMin = -1; 795 int VgprIndexUnusedMin = -1; 796 MCContext *Ctx = nullptr; 797 798 void usesSgprAt(int i) { 799 if (i >= SgprIndexUnusedMin) { 800 SgprIndexUnusedMin = ++i; 801 if (Ctx) { 802 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 803 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 804 } 805 } 806 } 807 808 void usesVgprAt(int i) { 809 if (i >= VgprIndexUnusedMin) { 810 VgprIndexUnusedMin = ++i; 811 if (Ctx) { 812 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 813 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 814 } 815 } 816 } 817 818 public: 819 KernelScopeInfo() = default; 820 821 void initialize(MCContext &Context) { 822 Ctx = &Context; 823 usesSgprAt(SgprIndexUnusedMin = -1); 824 usesVgprAt(VgprIndexUnusedMin = -1); 825 } 826 827 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 828 switch (RegKind) { 829 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 830 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 831 default: break; 832 } 833 } 834 }; 835 836 class AMDGPUAsmParser : public MCTargetAsmParser { 837 MCAsmParser &Parser; 838 839 // Number of extra operands parsed after the first optional operand. 840 // This may be necessary to skip hardcoded mandatory operands. 841 static const unsigned MAX_OPR_LOOKAHEAD = 8; 842 843 unsigned ForcedEncodingSize = 0; 844 bool ForcedDPP = false; 845 bool ForcedSDWA = false; 846 KernelScopeInfo KernelScope; 847 848 /// @name Auto-generated Match Functions 849 /// { 850 851 #define GET_ASSEMBLER_HEADER 852 #include "AMDGPUGenAsmMatcher.inc" 853 854 /// } 855 856 private: 857 bool ParseAsAbsoluteExpression(uint32_t &Ret); 858 bool OutOfRangeError(SMRange Range); 859 /// Calculate VGPR/SGPR blocks required for given target, reserved 860 /// registers, and user-specified NextFreeXGPR values. 861 /// 862 /// \param Features [in] Target features, used for bug corrections. 863 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 864 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 865 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 866 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 867 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 868 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 869 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 870 /// \param VGPRBlocks [out] Result VGPR block count. 871 /// \param SGPRBlocks [out] Result SGPR block count. 872 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 873 bool FlatScrUsed, bool XNACKUsed, 874 unsigned NextFreeVGPR, SMRange VGPRRange, 875 unsigned NextFreeSGPR, SMRange SGPRRange, 876 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 877 bool ParseDirectiveAMDGCNTarget(); 878 bool ParseDirectiveAMDHSAKernel(); 879 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 880 bool ParseDirectiveHSACodeObjectVersion(); 881 bool ParseDirectiveHSACodeObjectISA(); 882 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 883 bool ParseDirectiveAMDKernelCodeT(); 884 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 885 bool ParseDirectiveAMDGPUHsaKernel(); 886 887 bool ParseDirectiveISAVersion(); 888 bool ParseDirectiveHSAMetadata(); 889 bool ParseDirectivePALMetadata(); 890 891 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 892 RegisterKind RegKind, unsigned Reg1, 893 unsigned RegNum); 894 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 895 unsigned& RegNum, unsigned& RegWidth, 896 unsigned *DwordRegIndex); 897 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 898 void initializeGprCountSymbol(RegisterKind RegKind); 899 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 900 unsigned RegWidth); 901 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 902 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 903 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 904 bool IsGdsHardcoded); 905 906 public: 907 enum AMDGPUMatchResultTy { 908 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 909 }; 910 911 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 912 913 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 914 const MCInstrInfo &MII, 915 const MCTargetOptions &Options) 916 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 917 MCAsmParserExtension::Initialize(Parser); 918 919 if (getFeatureBits().none()) { 920 // Set default features. 921 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 922 } 923 924 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 925 926 { 927 // TODO: make those pre-defined variables read-only. 928 // Currently there is none suitable machinery in the core llvm-mc for this. 929 // MCSymbol::isRedefinable is intended for another purpose, and 930 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 931 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 932 MCContext &Ctx = getContext(); 933 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 934 MCSymbol *Sym = 935 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 936 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 937 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 938 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 939 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 940 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 941 } else { 942 MCSymbol *Sym = 943 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 944 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 945 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 946 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 947 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 948 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 949 } 950 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 951 initializeGprCountSymbol(IS_VGPR); 952 initializeGprCountSymbol(IS_SGPR); 953 } else 954 KernelScope.initialize(getContext()); 955 } 956 } 957 958 bool hasXNACK() const { 959 return AMDGPU::hasXNACK(getSTI()); 960 } 961 962 bool hasMIMG_R128() const { 963 return AMDGPU::hasMIMG_R128(getSTI()); 964 } 965 966 bool hasPackedD16() const { 967 return AMDGPU::hasPackedD16(getSTI()); 968 } 969 970 bool isSI() const { 971 return AMDGPU::isSI(getSTI()); 972 } 973 974 bool isCI() const { 975 return AMDGPU::isCI(getSTI()); 976 } 977 978 bool isVI() const { 979 return AMDGPU::isVI(getSTI()); 980 } 981 982 bool isGFX9() const { 983 return AMDGPU::isGFX9(getSTI()); 984 } 985 986 bool hasInv2PiInlineImm() const { 987 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 988 } 989 990 bool hasFlatOffsets() const { 991 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 992 } 993 994 bool hasSGPR102_SGPR103() const { 995 return !isVI(); 996 } 997 998 bool hasIntClamp() const { 999 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1000 } 1001 1002 AMDGPUTargetStreamer &getTargetStreamer() { 1003 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1004 return static_cast<AMDGPUTargetStreamer &>(TS); 1005 } 1006 1007 const MCRegisterInfo *getMRI() const { 1008 // We need this const_cast because for some reason getContext() is not const 1009 // in MCAsmParser. 1010 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1011 } 1012 1013 const MCInstrInfo *getMII() const { 1014 return &MII; 1015 } 1016 1017 const FeatureBitset &getFeatureBits() const { 1018 return getSTI().getFeatureBits(); 1019 } 1020 1021 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1022 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1023 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1024 1025 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1026 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1027 bool isForcedDPP() const { return ForcedDPP; } 1028 bool isForcedSDWA() const { return ForcedSDWA; } 1029 ArrayRef<unsigned> getMatchedVariants() const; 1030 1031 std::unique_ptr<AMDGPUOperand> parseRegister(); 1032 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1033 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1034 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1035 unsigned Kind) override; 1036 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1037 OperandVector &Operands, MCStreamer &Out, 1038 uint64_t &ErrorInfo, 1039 bool MatchingInlineAsm) override; 1040 bool ParseDirective(AsmToken DirectiveID) override; 1041 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1042 StringRef parseMnemonicSuffix(StringRef Name); 1043 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1044 SMLoc NameLoc, OperandVector &Operands) override; 1045 //bool ProcessInstruction(MCInst &Inst); 1046 1047 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1048 1049 OperandMatchResultTy 1050 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1051 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1052 bool (*ConvertResult)(int64_t &) = nullptr); 1053 1054 OperandMatchResultTy parseOperandArrayWithPrefix( 1055 const char *Prefix, 1056 OperandVector &Operands, 1057 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1058 bool (*ConvertResult)(int64_t&) = nullptr); 1059 1060 OperandMatchResultTy 1061 parseNamedBit(const char *Name, OperandVector &Operands, 1062 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1063 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1064 StringRef &Value); 1065 1066 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1067 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1068 OperandMatchResultTy parseReg(OperandVector &Operands); 1069 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1070 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1071 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1072 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1073 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1074 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1075 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1076 1077 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1078 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1079 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1080 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1081 1082 bool parseCnt(int64_t &IntVal); 1083 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1084 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1085 1086 private: 1087 struct OperandInfoTy { 1088 int64_t Id; 1089 bool IsSymbolic = false; 1090 1091 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1092 }; 1093 1094 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1095 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1096 1097 void errorExpTgt(); 1098 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1099 1100 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1101 bool validateSOPLiteral(const MCInst &Inst) const; 1102 bool validateConstantBusLimitations(const MCInst &Inst); 1103 bool validateEarlyClobberLimitations(const MCInst &Inst); 1104 bool validateIntClampSupported(const MCInst &Inst); 1105 bool validateMIMGAtomicDMask(const MCInst &Inst); 1106 bool validateMIMGGatherDMask(const MCInst &Inst); 1107 bool validateMIMGDataSize(const MCInst &Inst); 1108 bool validateMIMGD16(const MCInst &Inst); 1109 bool validateLdsDirect(const MCInst &Inst); 1110 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1111 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1112 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1113 1114 bool trySkipId(const StringRef Id); 1115 bool trySkipToken(const AsmToken::TokenKind Kind); 1116 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1117 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1118 bool parseExpr(int64_t &Imm); 1119 1120 public: 1121 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1122 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1123 1124 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1125 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1126 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1127 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1128 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1129 1130 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1131 const unsigned MinVal, 1132 const unsigned MaxVal, 1133 const StringRef ErrMsg); 1134 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1135 bool parseSwizzleOffset(int64_t &Imm); 1136 bool parseSwizzleMacro(int64_t &Imm); 1137 bool parseSwizzleQuadPerm(int64_t &Imm); 1138 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1139 bool parseSwizzleBroadcast(int64_t &Imm); 1140 bool parseSwizzleSwap(int64_t &Imm); 1141 bool parseSwizzleReverse(int64_t &Imm); 1142 1143 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1144 int64_t parseGPRIdxMacro(); 1145 1146 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1147 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1148 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1149 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1150 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1151 1152 AMDGPUOperand::Ptr defaultGLC() const; 1153 AMDGPUOperand::Ptr defaultSLC() const; 1154 1155 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1156 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1157 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1158 AMDGPUOperand::Ptr defaultOffsetU12() const; 1159 AMDGPUOperand::Ptr defaultOffsetS13() const; 1160 1161 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1162 1163 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1164 OptionalImmIndexMap &OptionalIdx); 1165 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1166 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1167 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1168 1169 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1170 1171 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1172 bool IsAtomic = false); 1173 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1174 1175 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1176 AMDGPUOperand::Ptr defaultRowMask() const; 1177 AMDGPUOperand::Ptr defaultBankMask() const; 1178 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1179 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1180 1181 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1182 AMDGPUOperand::ImmTy Type); 1183 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1184 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1185 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1186 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1187 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1188 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1189 uint64_t BasicInstType, bool skipVcc = false); 1190 }; 1191 1192 struct OptionalOperand { 1193 const char *Name; 1194 AMDGPUOperand::ImmTy Type; 1195 bool IsBit; 1196 bool (*ConvertResult)(int64_t&); 1197 }; 1198 1199 } // end anonymous namespace 1200 1201 // May be called with integer type with equivalent bitwidth. 1202 static const fltSemantics *getFltSemantics(unsigned Size) { 1203 switch (Size) { 1204 case 4: 1205 return &APFloat::IEEEsingle(); 1206 case 8: 1207 return &APFloat::IEEEdouble(); 1208 case 2: 1209 return &APFloat::IEEEhalf(); 1210 default: 1211 llvm_unreachable("unsupported fp type"); 1212 } 1213 } 1214 1215 static const fltSemantics *getFltSemantics(MVT VT) { 1216 return getFltSemantics(VT.getSizeInBits() / 8); 1217 } 1218 1219 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1220 switch (OperandType) { 1221 case AMDGPU::OPERAND_REG_IMM_INT32: 1222 case AMDGPU::OPERAND_REG_IMM_FP32: 1223 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1224 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1225 return &APFloat::IEEEsingle(); 1226 case AMDGPU::OPERAND_REG_IMM_INT64: 1227 case AMDGPU::OPERAND_REG_IMM_FP64: 1228 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1229 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1230 return &APFloat::IEEEdouble(); 1231 case AMDGPU::OPERAND_REG_IMM_INT16: 1232 case AMDGPU::OPERAND_REG_IMM_FP16: 1233 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1234 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1235 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1236 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1237 return &APFloat::IEEEhalf(); 1238 default: 1239 llvm_unreachable("unsupported fp type"); 1240 } 1241 } 1242 1243 //===----------------------------------------------------------------------===// 1244 // Operand 1245 //===----------------------------------------------------------------------===// 1246 1247 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1248 bool Lost; 1249 1250 // Convert literal to single precision 1251 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1252 APFloat::rmNearestTiesToEven, 1253 &Lost); 1254 // We allow precision lost but not overflow or underflow 1255 if (Status != APFloat::opOK && 1256 Lost && 1257 ((Status & APFloat::opOverflow) != 0 || 1258 (Status & APFloat::opUnderflow) != 0)) { 1259 return false; 1260 } 1261 1262 return true; 1263 } 1264 1265 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1266 if (!isImmTy(ImmTyNone)) { 1267 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1268 return false; 1269 } 1270 // TODO: We should avoid using host float here. It would be better to 1271 // check the float bit values which is what a few other places do. 1272 // We've had bot failures before due to weird NaN support on mips hosts. 1273 1274 APInt Literal(64, Imm.Val); 1275 1276 if (Imm.IsFPImm) { // We got fp literal token 1277 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1278 return AMDGPU::isInlinableLiteral64(Imm.Val, 1279 AsmParser->hasInv2PiInlineImm()); 1280 } 1281 1282 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1283 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1284 return false; 1285 1286 if (type.getScalarSizeInBits() == 16) { 1287 return AMDGPU::isInlinableLiteral16( 1288 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1289 AsmParser->hasInv2PiInlineImm()); 1290 } 1291 1292 // Check if single precision literal is inlinable 1293 return AMDGPU::isInlinableLiteral32( 1294 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1295 AsmParser->hasInv2PiInlineImm()); 1296 } 1297 1298 // We got int literal token. 1299 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1300 return AMDGPU::isInlinableLiteral64(Imm.Val, 1301 AsmParser->hasInv2PiInlineImm()); 1302 } 1303 1304 if (type.getScalarSizeInBits() == 16) { 1305 return AMDGPU::isInlinableLiteral16( 1306 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1307 AsmParser->hasInv2PiInlineImm()); 1308 } 1309 1310 return AMDGPU::isInlinableLiteral32( 1311 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1312 AsmParser->hasInv2PiInlineImm()); 1313 } 1314 1315 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1316 // Check that this immediate can be added as literal 1317 if (!isImmTy(ImmTyNone)) { 1318 return false; 1319 } 1320 1321 if (!Imm.IsFPImm) { 1322 // We got int literal token. 1323 1324 if (type == MVT::f64 && hasFPModifiers()) { 1325 // Cannot apply fp modifiers to int literals preserving the same semantics 1326 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1327 // disable these cases. 1328 return false; 1329 } 1330 1331 unsigned Size = type.getSizeInBits(); 1332 if (Size == 64) 1333 Size = 32; 1334 1335 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1336 // types. 1337 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1338 } 1339 1340 // We got fp literal token 1341 if (type == MVT::f64) { // Expected 64-bit fp operand 1342 // We would set low 64-bits of literal to zeroes but we accept this literals 1343 return true; 1344 } 1345 1346 if (type == MVT::i64) { // Expected 64-bit int operand 1347 // We don't allow fp literals in 64-bit integer instructions. It is 1348 // unclear how we should encode them. 1349 return false; 1350 } 1351 1352 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1353 return canLosslesslyConvertToFPType(FPLiteral, type); 1354 } 1355 1356 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1357 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1358 } 1359 1360 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1361 if (AsmParser->isVI()) 1362 return isVReg32(); 1363 else if (AsmParser->isGFX9()) 1364 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1365 else 1366 return false; 1367 } 1368 1369 bool AMDGPUOperand::isSDWAFP16Operand() const { 1370 return isSDWAOperand(MVT::f16); 1371 } 1372 1373 bool AMDGPUOperand::isSDWAFP32Operand() const { 1374 return isSDWAOperand(MVT::f32); 1375 } 1376 1377 bool AMDGPUOperand::isSDWAInt16Operand() const { 1378 return isSDWAOperand(MVT::i16); 1379 } 1380 1381 bool AMDGPUOperand::isSDWAInt32Operand() const { 1382 return isSDWAOperand(MVT::i32); 1383 } 1384 1385 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1386 { 1387 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1388 assert(Size == 2 || Size == 4 || Size == 8); 1389 1390 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1391 1392 if (Imm.Mods.Abs) { 1393 Val &= ~FpSignMask; 1394 } 1395 if (Imm.Mods.Neg) { 1396 Val ^= FpSignMask; 1397 } 1398 1399 return Val; 1400 } 1401 1402 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1403 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1404 Inst.getNumOperands())) { 1405 addLiteralImmOperand(Inst, Imm.Val, 1406 ApplyModifiers & 1407 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1408 } else { 1409 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1410 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1411 } 1412 } 1413 1414 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1415 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1416 auto OpNum = Inst.getNumOperands(); 1417 // Check that this operand accepts literals 1418 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1419 1420 if (ApplyModifiers) { 1421 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1422 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1423 Val = applyInputFPModifiers(Val, Size); 1424 } 1425 1426 APInt Literal(64, Val); 1427 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1428 1429 if (Imm.IsFPImm) { // We got fp literal token 1430 switch (OpTy) { 1431 case AMDGPU::OPERAND_REG_IMM_INT64: 1432 case AMDGPU::OPERAND_REG_IMM_FP64: 1433 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1434 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1435 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1436 AsmParser->hasInv2PiInlineImm())) { 1437 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1438 return; 1439 } 1440 1441 // Non-inlineable 1442 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1443 // For fp operands we check if low 32 bits are zeros 1444 if (Literal.getLoBits(32) != 0) { 1445 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1446 "Can't encode literal as exact 64-bit floating-point operand. " 1447 "Low 32-bits will be set to zero"); 1448 } 1449 1450 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1451 return; 1452 } 1453 1454 // We don't allow fp literals in 64-bit integer instructions. It is 1455 // unclear how we should encode them. This case should be checked earlier 1456 // in predicate methods (isLiteralImm()) 1457 llvm_unreachable("fp literal in 64-bit integer instruction."); 1458 1459 case AMDGPU::OPERAND_REG_IMM_INT32: 1460 case AMDGPU::OPERAND_REG_IMM_FP32: 1461 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1462 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1463 case AMDGPU::OPERAND_REG_IMM_INT16: 1464 case AMDGPU::OPERAND_REG_IMM_FP16: 1465 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1466 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1467 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1468 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1469 bool lost; 1470 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1471 // Convert literal to single precision 1472 FPLiteral.convert(*getOpFltSemantics(OpTy), 1473 APFloat::rmNearestTiesToEven, &lost); 1474 // We allow precision lost but not overflow or underflow. This should be 1475 // checked earlier in isLiteralImm() 1476 1477 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1478 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1479 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1480 ImmVal |= (ImmVal << 16); 1481 } 1482 1483 Inst.addOperand(MCOperand::createImm(ImmVal)); 1484 return; 1485 } 1486 default: 1487 llvm_unreachable("invalid operand size"); 1488 } 1489 1490 return; 1491 } 1492 1493 // We got int literal token. 1494 // Only sign extend inline immediates. 1495 // FIXME: No errors on truncation 1496 switch (OpTy) { 1497 case AMDGPU::OPERAND_REG_IMM_INT32: 1498 case AMDGPU::OPERAND_REG_IMM_FP32: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1501 if (isInt<32>(Val) && 1502 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1503 AsmParser->hasInv2PiInlineImm())) { 1504 Inst.addOperand(MCOperand::createImm(Val)); 1505 return; 1506 } 1507 1508 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1509 return; 1510 1511 case AMDGPU::OPERAND_REG_IMM_INT64: 1512 case AMDGPU::OPERAND_REG_IMM_FP64: 1513 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1514 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1515 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1516 Inst.addOperand(MCOperand::createImm(Val)); 1517 return; 1518 } 1519 1520 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1521 return; 1522 1523 case AMDGPU::OPERAND_REG_IMM_INT16: 1524 case AMDGPU::OPERAND_REG_IMM_FP16: 1525 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1526 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1527 if (isInt<16>(Val) && 1528 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1529 AsmParser->hasInv2PiInlineImm())) { 1530 Inst.addOperand(MCOperand::createImm(Val)); 1531 return; 1532 } 1533 1534 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1535 return; 1536 1537 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1539 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1540 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1541 AsmParser->hasInv2PiInlineImm())); 1542 1543 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1544 static_cast<uint32_t>(LiteralVal); 1545 Inst.addOperand(MCOperand::createImm(ImmVal)); 1546 return; 1547 } 1548 default: 1549 llvm_unreachable("invalid operand size"); 1550 } 1551 } 1552 1553 template <unsigned Bitwidth> 1554 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1555 APInt Literal(64, Imm.Val); 1556 1557 if (!Imm.IsFPImm) { 1558 // We got int literal token. 1559 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1560 return; 1561 } 1562 1563 bool Lost; 1564 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1565 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1566 APFloat::rmNearestTiesToEven, &Lost); 1567 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1568 } 1569 1570 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1571 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1572 } 1573 1574 //===----------------------------------------------------------------------===// 1575 // AsmParser 1576 //===----------------------------------------------------------------------===// 1577 1578 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1579 if (Is == IS_VGPR) { 1580 switch (RegWidth) { 1581 default: return -1; 1582 case 1: return AMDGPU::VGPR_32RegClassID; 1583 case 2: return AMDGPU::VReg_64RegClassID; 1584 case 3: return AMDGPU::VReg_96RegClassID; 1585 case 4: return AMDGPU::VReg_128RegClassID; 1586 case 8: return AMDGPU::VReg_256RegClassID; 1587 case 16: return AMDGPU::VReg_512RegClassID; 1588 } 1589 } else if (Is == IS_TTMP) { 1590 switch (RegWidth) { 1591 default: return -1; 1592 case 1: return AMDGPU::TTMP_32RegClassID; 1593 case 2: return AMDGPU::TTMP_64RegClassID; 1594 case 4: return AMDGPU::TTMP_128RegClassID; 1595 case 8: return AMDGPU::TTMP_256RegClassID; 1596 case 16: return AMDGPU::TTMP_512RegClassID; 1597 } 1598 } else if (Is == IS_SGPR) { 1599 switch (RegWidth) { 1600 default: return -1; 1601 case 1: return AMDGPU::SGPR_32RegClassID; 1602 case 2: return AMDGPU::SGPR_64RegClassID; 1603 case 4: return AMDGPU::SGPR_128RegClassID; 1604 case 8: return AMDGPU::SGPR_256RegClassID; 1605 case 16: return AMDGPU::SGPR_512RegClassID; 1606 } 1607 } 1608 return -1; 1609 } 1610 1611 static unsigned getSpecialRegForName(StringRef RegName) { 1612 return StringSwitch<unsigned>(RegName) 1613 .Case("exec", AMDGPU::EXEC) 1614 .Case("vcc", AMDGPU::VCC) 1615 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1616 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1617 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1618 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1619 .Case("m0", AMDGPU::M0) 1620 .Case("scc", AMDGPU::SCC) 1621 .Case("tba", AMDGPU::TBA) 1622 .Case("tma", AMDGPU::TMA) 1623 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1624 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1625 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1626 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1627 .Case("vcc_lo", AMDGPU::VCC_LO) 1628 .Case("vcc_hi", AMDGPU::VCC_HI) 1629 .Case("exec_lo", AMDGPU::EXEC_LO) 1630 .Case("exec_hi", AMDGPU::EXEC_HI) 1631 .Case("tma_lo", AMDGPU::TMA_LO) 1632 .Case("tma_hi", AMDGPU::TMA_HI) 1633 .Case("tba_lo", AMDGPU::TBA_LO) 1634 .Case("tba_hi", AMDGPU::TBA_HI) 1635 .Default(0); 1636 } 1637 1638 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1639 SMLoc &EndLoc) { 1640 auto R = parseRegister(); 1641 if (!R) return true; 1642 assert(R->isReg()); 1643 RegNo = R->getReg(); 1644 StartLoc = R->getStartLoc(); 1645 EndLoc = R->getEndLoc(); 1646 return false; 1647 } 1648 1649 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1650 RegisterKind RegKind, unsigned Reg1, 1651 unsigned RegNum) { 1652 switch (RegKind) { 1653 case IS_SPECIAL: 1654 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1655 Reg = AMDGPU::EXEC; 1656 RegWidth = 2; 1657 return true; 1658 } 1659 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1660 Reg = AMDGPU::FLAT_SCR; 1661 RegWidth = 2; 1662 return true; 1663 } 1664 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1665 Reg = AMDGPU::XNACK_MASK; 1666 RegWidth = 2; 1667 return true; 1668 } 1669 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1670 Reg = AMDGPU::VCC; 1671 RegWidth = 2; 1672 return true; 1673 } 1674 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1675 Reg = AMDGPU::TBA; 1676 RegWidth = 2; 1677 return true; 1678 } 1679 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1680 Reg = AMDGPU::TMA; 1681 RegWidth = 2; 1682 return true; 1683 } 1684 return false; 1685 case IS_VGPR: 1686 case IS_SGPR: 1687 case IS_TTMP: 1688 if (Reg1 != Reg + RegWidth) { 1689 return false; 1690 } 1691 RegWidth++; 1692 return true; 1693 default: 1694 llvm_unreachable("unexpected register kind"); 1695 } 1696 } 1697 1698 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1699 unsigned &RegNum, unsigned &RegWidth, 1700 unsigned *DwordRegIndex) { 1701 if (DwordRegIndex) { *DwordRegIndex = 0; } 1702 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1703 if (getLexer().is(AsmToken::Identifier)) { 1704 StringRef RegName = Parser.getTok().getString(); 1705 if ((Reg = getSpecialRegForName(RegName))) { 1706 Parser.Lex(); 1707 RegKind = IS_SPECIAL; 1708 } else { 1709 unsigned RegNumIndex = 0; 1710 if (RegName[0] == 'v') { 1711 RegNumIndex = 1; 1712 RegKind = IS_VGPR; 1713 } else if (RegName[0] == 's') { 1714 RegNumIndex = 1; 1715 RegKind = IS_SGPR; 1716 } else if (RegName.startswith("ttmp")) { 1717 RegNumIndex = strlen("ttmp"); 1718 RegKind = IS_TTMP; 1719 } else { 1720 return false; 1721 } 1722 if (RegName.size() > RegNumIndex) { 1723 // Single 32-bit register: vXX. 1724 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1725 return false; 1726 Parser.Lex(); 1727 RegWidth = 1; 1728 } else { 1729 // Range of registers: v[XX:YY]. ":YY" is optional. 1730 Parser.Lex(); 1731 int64_t RegLo, RegHi; 1732 if (getLexer().isNot(AsmToken::LBrac)) 1733 return false; 1734 Parser.Lex(); 1735 1736 if (getParser().parseAbsoluteExpression(RegLo)) 1737 return false; 1738 1739 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1740 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1741 return false; 1742 Parser.Lex(); 1743 1744 if (isRBrace) { 1745 RegHi = RegLo; 1746 } else { 1747 if (getParser().parseAbsoluteExpression(RegHi)) 1748 return false; 1749 1750 if (getLexer().isNot(AsmToken::RBrac)) 1751 return false; 1752 Parser.Lex(); 1753 } 1754 RegNum = (unsigned) RegLo; 1755 RegWidth = (RegHi - RegLo) + 1; 1756 } 1757 } 1758 } else if (getLexer().is(AsmToken::LBrac)) { 1759 // List of consecutive registers: [s0,s1,s2,s3] 1760 Parser.Lex(); 1761 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1762 return false; 1763 if (RegWidth != 1) 1764 return false; 1765 RegisterKind RegKind1; 1766 unsigned Reg1, RegNum1, RegWidth1; 1767 do { 1768 if (getLexer().is(AsmToken::Comma)) { 1769 Parser.Lex(); 1770 } else if (getLexer().is(AsmToken::RBrac)) { 1771 Parser.Lex(); 1772 break; 1773 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1774 if (RegWidth1 != 1) { 1775 return false; 1776 } 1777 if (RegKind1 != RegKind) { 1778 return false; 1779 } 1780 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1781 return false; 1782 } 1783 } else { 1784 return false; 1785 } 1786 } while (true); 1787 } else { 1788 return false; 1789 } 1790 switch (RegKind) { 1791 case IS_SPECIAL: 1792 RegNum = 0; 1793 RegWidth = 1; 1794 break; 1795 case IS_VGPR: 1796 case IS_SGPR: 1797 case IS_TTMP: 1798 { 1799 unsigned Size = 1; 1800 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1801 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1802 Size = std::min(RegWidth, 4u); 1803 } 1804 if (RegNum % Size != 0) 1805 return false; 1806 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1807 RegNum = RegNum / Size; 1808 int RCID = getRegClass(RegKind, RegWidth); 1809 if (RCID == -1) 1810 return false; 1811 const MCRegisterClass RC = TRI->getRegClass(RCID); 1812 if (RegNum >= RC.getNumRegs()) 1813 return false; 1814 Reg = RC.getRegister(RegNum); 1815 break; 1816 } 1817 1818 default: 1819 llvm_unreachable("unexpected register kind"); 1820 } 1821 1822 if (!subtargetHasRegister(*TRI, Reg)) 1823 return false; 1824 return true; 1825 } 1826 1827 Optional<StringRef> 1828 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1829 switch (RegKind) { 1830 case IS_VGPR: 1831 return StringRef(".amdgcn.next_free_vgpr"); 1832 case IS_SGPR: 1833 return StringRef(".amdgcn.next_free_sgpr"); 1834 default: 1835 return None; 1836 } 1837 } 1838 1839 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1840 auto SymbolName = getGprCountSymbolName(RegKind); 1841 assert(SymbolName && "initializing invalid register kind"); 1842 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1843 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1844 } 1845 1846 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1847 unsigned DwordRegIndex, 1848 unsigned RegWidth) { 1849 // Symbols are only defined for GCN targets 1850 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1851 return true; 1852 1853 auto SymbolName = getGprCountSymbolName(RegKind); 1854 if (!SymbolName) 1855 return true; 1856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1857 1858 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1859 int64_t OldCount; 1860 1861 if (!Sym->isVariable()) 1862 return !Error(getParser().getTok().getLoc(), 1863 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1864 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1865 return !Error( 1866 getParser().getTok().getLoc(), 1867 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1868 1869 if (OldCount <= NewMax) 1870 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1871 1872 return true; 1873 } 1874 1875 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1876 const auto &Tok = Parser.getTok(); 1877 SMLoc StartLoc = Tok.getLoc(); 1878 SMLoc EndLoc = Tok.getEndLoc(); 1879 RegisterKind RegKind; 1880 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1881 1882 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1883 return nullptr; 1884 } 1885 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1886 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1887 return nullptr; 1888 } else 1889 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1890 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1891 } 1892 1893 bool 1894 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1895 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1896 (getLexer().getKind() == AsmToken::Integer || 1897 getLexer().getKind() == AsmToken::Real)) { 1898 // This is a workaround for handling operands like these: 1899 // |1.0| 1900 // |-1| 1901 // This syntax is not compatible with syntax of standard 1902 // MC expressions (due to the trailing '|'). 1903 1904 SMLoc EndLoc; 1905 const MCExpr *Expr; 1906 1907 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1908 return true; 1909 } 1910 1911 return !Expr->evaluateAsAbsolute(Val); 1912 } 1913 1914 return getParser().parseAbsoluteExpression(Val); 1915 } 1916 1917 OperandMatchResultTy 1918 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1919 // TODO: add syntactic sugar for 1/(2*PI) 1920 bool Minus = false; 1921 if (getLexer().getKind() == AsmToken::Minus) { 1922 const AsmToken NextToken = getLexer().peekTok(); 1923 if (!NextToken.is(AsmToken::Integer) && 1924 !NextToken.is(AsmToken::Real)) { 1925 return MatchOperand_NoMatch; 1926 } 1927 Minus = true; 1928 Parser.Lex(); 1929 } 1930 1931 SMLoc S = Parser.getTok().getLoc(); 1932 switch(getLexer().getKind()) { 1933 case AsmToken::Integer: { 1934 int64_t IntVal; 1935 if (parseAbsoluteExpr(IntVal, AbsMod)) 1936 return MatchOperand_ParseFail; 1937 if (Minus) 1938 IntVal *= -1; 1939 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1940 return MatchOperand_Success; 1941 } 1942 case AsmToken::Real: { 1943 int64_t IntVal; 1944 if (parseAbsoluteExpr(IntVal, AbsMod)) 1945 return MatchOperand_ParseFail; 1946 1947 APFloat F(BitsToDouble(IntVal)); 1948 if (Minus) 1949 F.changeSign(); 1950 Operands.push_back( 1951 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1952 AMDGPUOperand::ImmTyNone, true)); 1953 return MatchOperand_Success; 1954 } 1955 default: 1956 return MatchOperand_NoMatch; 1957 } 1958 } 1959 1960 OperandMatchResultTy 1961 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1962 if (auto R = parseRegister()) { 1963 assert(R->isReg()); 1964 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1965 Operands.push_back(std::move(R)); 1966 return MatchOperand_Success; 1967 } 1968 return MatchOperand_NoMatch; 1969 } 1970 1971 OperandMatchResultTy 1972 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1973 auto res = parseImm(Operands, AbsMod); 1974 if (res != MatchOperand_NoMatch) { 1975 return res; 1976 } 1977 1978 return parseReg(Operands); 1979 } 1980 1981 OperandMatchResultTy 1982 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1983 bool AllowImm) { 1984 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1985 1986 if (getLexer().getKind()== AsmToken::Minus) { 1987 const AsmToken NextToken = getLexer().peekTok(); 1988 1989 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1990 if (NextToken.is(AsmToken::Minus)) { 1991 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1992 return MatchOperand_ParseFail; 1993 } 1994 1995 // '-' followed by an integer literal N should be interpreted as integer 1996 // negation rather than a floating-point NEG modifier applied to N. 1997 // Beside being contr-intuitive, such use of floating-point NEG modifier 1998 // results in different meaning of integer literals used with VOP1/2/C 1999 // and VOP3, for example: 2000 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2001 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2002 // Negative fp literals should be handled likewise for unifomtity 2003 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 2004 Parser.Lex(); 2005 Negate = true; 2006 } 2007 } 2008 2009 if (getLexer().getKind() == AsmToken::Identifier && 2010 Parser.getTok().getString() == "neg") { 2011 if (Negate) { 2012 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2013 return MatchOperand_ParseFail; 2014 } 2015 Parser.Lex(); 2016 Negate2 = true; 2017 if (getLexer().isNot(AsmToken::LParen)) { 2018 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2019 return MatchOperand_ParseFail; 2020 } 2021 Parser.Lex(); 2022 } 2023 2024 if (getLexer().getKind() == AsmToken::Identifier && 2025 Parser.getTok().getString() == "abs") { 2026 Parser.Lex(); 2027 Abs2 = true; 2028 if (getLexer().isNot(AsmToken::LParen)) { 2029 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2030 return MatchOperand_ParseFail; 2031 } 2032 Parser.Lex(); 2033 } 2034 2035 if (getLexer().getKind() == AsmToken::Pipe) { 2036 if (Abs2) { 2037 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2038 return MatchOperand_ParseFail; 2039 } 2040 Parser.Lex(); 2041 Abs = true; 2042 } 2043 2044 OperandMatchResultTy Res; 2045 if (AllowImm) { 2046 Res = parseRegOrImm(Operands, Abs); 2047 } else { 2048 Res = parseReg(Operands); 2049 } 2050 if (Res != MatchOperand_Success) { 2051 return Res; 2052 } 2053 2054 AMDGPUOperand::Modifiers Mods; 2055 if (Abs) { 2056 if (getLexer().getKind() != AsmToken::Pipe) { 2057 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2058 return MatchOperand_ParseFail; 2059 } 2060 Parser.Lex(); 2061 Mods.Abs = true; 2062 } 2063 if (Abs2) { 2064 if (getLexer().isNot(AsmToken::RParen)) { 2065 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2066 return MatchOperand_ParseFail; 2067 } 2068 Parser.Lex(); 2069 Mods.Abs = true; 2070 } 2071 2072 if (Negate) { 2073 Mods.Neg = true; 2074 } else if (Negate2) { 2075 if (getLexer().isNot(AsmToken::RParen)) { 2076 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2077 return MatchOperand_ParseFail; 2078 } 2079 Parser.Lex(); 2080 Mods.Neg = true; 2081 } 2082 2083 if (Mods.hasFPModifiers()) { 2084 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2085 Op.setModifiers(Mods); 2086 } 2087 return MatchOperand_Success; 2088 } 2089 2090 OperandMatchResultTy 2091 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2092 bool AllowImm) { 2093 bool Sext = false; 2094 2095 if (getLexer().getKind() == AsmToken::Identifier && 2096 Parser.getTok().getString() == "sext") { 2097 Parser.Lex(); 2098 Sext = true; 2099 if (getLexer().isNot(AsmToken::LParen)) { 2100 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2101 return MatchOperand_ParseFail; 2102 } 2103 Parser.Lex(); 2104 } 2105 2106 OperandMatchResultTy Res; 2107 if (AllowImm) { 2108 Res = parseRegOrImm(Operands); 2109 } else { 2110 Res = parseReg(Operands); 2111 } 2112 if (Res != MatchOperand_Success) { 2113 return Res; 2114 } 2115 2116 AMDGPUOperand::Modifiers Mods; 2117 if (Sext) { 2118 if (getLexer().isNot(AsmToken::RParen)) { 2119 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2120 return MatchOperand_ParseFail; 2121 } 2122 Parser.Lex(); 2123 Mods.Sext = true; 2124 } 2125 2126 if (Mods.hasIntModifiers()) { 2127 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2128 Op.setModifiers(Mods); 2129 } 2130 2131 return MatchOperand_Success; 2132 } 2133 2134 OperandMatchResultTy 2135 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2136 return parseRegOrImmWithFPInputMods(Operands, false); 2137 } 2138 2139 OperandMatchResultTy 2140 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2141 return parseRegOrImmWithIntInputMods(Operands, false); 2142 } 2143 2144 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2145 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2146 if (Reg) { 2147 Operands.push_back(std::move(Reg)); 2148 return MatchOperand_Success; 2149 } 2150 2151 const AsmToken &Tok = Parser.getTok(); 2152 if (Tok.getString() == "off") { 2153 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2154 AMDGPUOperand::ImmTyOff, false)); 2155 Parser.Lex(); 2156 return MatchOperand_Success; 2157 } 2158 2159 return MatchOperand_NoMatch; 2160 } 2161 2162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2163 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2164 2165 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2166 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2167 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2168 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2169 return Match_InvalidOperand; 2170 2171 if ((TSFlags & SIInstrFlags::VOP3) && 2172 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2173 getForcedEncodingSize() != 64) 2174 return Match_PreferE32; 2175 2176 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2177 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2178 // v_mac_f32/16 allow only dst_sel == DWORD; 2179 auto OpNum = 2180 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2181 const auto &Op = Inst.getOperand(OpNum); 2182 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2183 return Match_InvalidOperand; 2184 } 2185 } 2186 2187 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2188 // FIXME: Produces error without correct column reported. 2189 auto OpNum = 2190 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2191 const auto &Op = Inst.getOperand(OpNum); 2192 if (Op.getImm() != 0) 2193 return Match_InvalidOperand; 2194 } 2195 2196 return Match_Success; 2197 } 2198 2199 // What asm variants we should check 2200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2201 if (getForcedEncodingSize() == 32) { 2202 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2203 return makeArrayRef(Variants); 2204 } 2205 2206 if (isForcedVOP3()) { 2207 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2208 return makeArrayRef(Variants); 2209 } 2210 2211 if (isForcedSDWA()) { 2212 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2213 AMDGPUAsmVariants::SDWA9}; 2214 return makeArrayRef(Variants); 2215 } 2216 2217 if (isForcedDPP()) { 2218 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2219 return makeArrayRef(Variants); 2220 } 2221 2222 static const unsigned Variants[] = { 2223 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2224 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2225 }; 2226 2227 return makeArrayRef(Variants); 2228 } 2229 2230 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2231 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2232 const unsigned Num = Desc.getNumImplicitUses(); 2233 for (unsigned i = 0; i < Num; ++i) { 2234 unsigned Reg = Desc.ImplicitUses[i]; 2235 switch (Reg) { 2236 case AMDGPU::FLAT_SCR: 2237 case AMDGPU::VCC: 2238 case AMDGPU::M0: 2239 return Reg; 2240 default: 2241 break; 2242 } 2243 } 2244 return AMDGPU::NoRegister; 2245 } 2246 2247 // NB: This code is correct only when used to check constant 2248 // bus limitations because GFX7 support no f16 inline constants. 2249 // Note that there are no cases when a GFX7 opcode violates 2250 // constant bus limitations due to the use of an f16 constant. 2251 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2252 unsigned OpIdx) const { 2253 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2254 2255 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2256 return false; 2257 } 2258 2259 const MCOperand &MO = Inst.getOperand(OpIdx); 2260 2261 int64_t Val = MO.getImm(); 2262 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2263 2264 switch (OpSize) { // expected operand size 2265 case 8: 2266 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2267 case 4: 2268 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2269 case 2: { 2270 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2271 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2272 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2273 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2274 } else { 2275 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2276 } 2277 } 2278 default: 2279 llvm_unreachable("invalid operand size"); 2280 } 2281 } 2282 2283 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2284 const MCOperand &MO = Inst.getOperand(OpIdx); 2285 if (MO.isImm()) { 2286 return !isInlineConstant(Inst, OpIdx); 2287 } 2288 return !MO.isReg() || 2289 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2290 } 2291 2292 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2293 const unsigned Opcode = Inst.getOpcode(); 2294 const MCInstrDesc &Desc = MII.get(Opcode); 2295 unsigned ConstantBusUseCount = 0; 2296 2297 if (Desc.TSFlags & 2298 (SIInstrFlags::VOPC | 2299 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2300 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2301 SIInstrFlags::SDWA)) { 2302 // Check special imm operands (used by madmk, etc) 2303 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2304 ++ConstantBusUseCount; 2305 } 2306 2307 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2308 if (SGPRUsed != AMDGPU::NoRegister) { 2309 ++ConstantBusUseCount; 2310 } 2311 2312 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2313 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2314 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2315 2316 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2317 2318 for (int OpIdx : OpIndices) { 2319 if (OpIdx == -1) break; 2320 2321 const MCOperand &MO = Inst.getOperand(OpIdx); 2322 if (usesConstantBus(Inst, OpIdx)) { 2323 if (MO.isReg()) { 2324 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2325 // Pairs of registers with a partial intersections like these 2326 // s0, s[0:1] 2327 // flat_scratch_lo, flat_scratch 2328 // flat_scratch_lo, flat_scratch_hi 2329 // are theoretically valid but they are disabled anyway. 2330 // Note that this code mimics SIInstrInfo::verifyInstruction 2331 if (Reg != SGPRUsed) { 2332 ++ConstantBusUseCount; 2333 } 2334 SGPRUsed = Reg; 2335 } else { // Expression or a literal 2336 ++ConstantBusUseCount; 2337 } 2338 } 2339 } 2340 } 2341 2342 return ConstantBusUseCount <= 1; 2343 } 2344 2345 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2346 const unsigned Opcode = Inst.getOpcode(); 2347 const MCInstrDesc &Desc = MII.get(Opcode); 2348 2349 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2350 if (DstIdx == -1 || 2351 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2352 return true; 2353 } 2354 2355 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2356 2357 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2358 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2359 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2360 2361 assert(DstIdx != -1); 2362 const MCOperand &Dst = Inst.getOperand(DstIdx); 2363 assert(Dst.isReg()); 2364 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2365 2366 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2367 2368 for (int SrcIdx : SrcIndices) { 2369 if (SrcIdx == -1) break; 2370 const MCOperand &Src = Inst.getOperand(SrcIdx); 2371 if (Src.isReg()) { 2372 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2373 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2374 return false; 2375 } 2376 } 2377 } 2378 2379 return true; 2380 } 2381 2382 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2383 2384 const unsigned Opc = Inst.getOpcode(); 2385 const MCInstrDesc &Desc = MII.get(Opc); 2386 2387 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2388 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2389 assert(ClampIdx != -1); 2390 return Inst.getOperand(ClampIdx).getImm() == 0; 2391 } 2392 2393 return true; 2394 } 2395 2396 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2397 2398 const unsigned Opc = Inst.getOpcode(); 2399 const MCInstrDesc &Desc = MII.get(Opc); 2400 2401 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2402 return true; 2403 2404 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2405 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2406 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2407 2408 assert(VDataIdx != -1); 2409 assert(DMaskIdx != -1); 2410 assert(TFEIdx != -1); 2411 2412 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2413 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2414 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2415 if (DMask == 0) 2416 DMask = 1; 2417 2418 unsigned DataSize = 2419 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2420 if (hasPackedD16()) { 2421 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2422 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2423 DataSize = (DataSize + 1) / 2; 2424 } 2425 2426 return (VDataSize / 4) == DataSize + TFESize; 2427 } 2428 2429 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2430 2431 const unsigned Opc = Inst.getOpcode(); 2432 const MCInstrDesc &Desc = MII.get(Opc); 2433 2434 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2435 return true; 2436 if (!Desc.mayLoad() || !Desc.mayStore()) 2437 return true; // Not atomic 2438 2439 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2440 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2441 2442 // This is an incomplete check because image_atomic_cmpswap 2443 // may only use 0x3 and 0xf while other atomic operations 2444 // may use 0x1 and 0x3. However these limitations are 2445 // verified when we check that dmask matches dst size. 2446 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2447 } 2448 2449 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2450 2451 const unsigned Opc = Inst.getOpcode(); 2452 const MCInstrDesc &Desc = MII.get(Opc); 2453 2454 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2455 return true; 2456 2457 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2458 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2459 2460 // GATHER4 instructions use dmask in a different fashion compared to 2461 // other MIMG instructions. The only useful DMASK values are 2462 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2463 // (red,red,red,red) etc.) The ISA document doesn't mention 2464 // this. 2465 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2466 } 2467 2468 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2469 2470 const unsigned Opc = Inst.getOpcode(); 2471 const MCInstrDesc &Desc = MII.get(Opc); 2472 2473 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2474 return true; 2475 2476 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2477 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2478 if (isCI() || isSI()) 2479 return false; 2480 } 2481 2482 return true; 2483 } 2484 2485 static bool IsRevOpcode(const unsigned Opcode) 2486 { 2487 switch (Opcode) { 2488 case AMDGPU::V_SUBREV_F32_e32: 2489 case AMDGPU::V_SUBREV_F32_e64: 2490 case AMDGPU::V_SUBREV_F32_e32_si: 2491 case AMDGPU::V_SUBREV_F32_e32_vi: 2492 case AMDGPU::V_SUBREV_F32_e64_si: 2493 case AMDGPU::V_SUBREV_F32_e64_vi: 2494 case AMDGPU::V_SUBREV_I32_e32: 2495 case AMDGPU::V_SUBREV_I32_e64: 2496 case AMDGPU::V_SUBREV_I32_e32_si: 2497 case AMDGPU::V_SUBREV_I32_e64_si: 2498 case AMDGPU::V_SUBBREV_U32_e32: 2499 case AMDGPU::V_SUBBREV_U32_e64: 2500 case AMDGPU::V_SUBBREV_U32_e32_si: 2501 case AMDGPU::V_SUBBREV_U32_e32_vi: 2502 case AMDGPU::V_SUBBREV_U32_e64_si: 2503 case AMDGPU::V_SUBBREV_U32_e64_vi: 2504 case AMDGPU::V_SUBREV_U32_e32: 2505 case AMDGPU::V_SUBREV_U32_e64: 2506 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2507 case AMDGPU::V_SUBREV_U32_e32_vi: 2508 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2509 case AMDGPU::V_SUBREV_U32_e64_vi: 2510 case AMDGPU::V_SUBREV_F16_e32: 2511 case AMDGPU::V_SUBREV_F16_e64: 2512 case AMDGPU::V_SUBREV_F16_e32_vi: 2513 case AMDGPU::V_SUBREV_F16_e64_vi: 2514 case AMDGPU::V_SUBREV_U16_e32: 2515 case AMDGPU::V_SUBREV_U16_e64: 2516 case AMDGPU::V_SUBREV_U16_e32_vi: 2517 case AMDGPU::V_SUBREV_U16_e64_vi: 2518 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2519 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2520 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2521 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2522 case AMDGPU::V_LSHLREV_B32_e32_si: 2523 case AMDGPU::V_LSHLREV_B32_e64_si: 2524 case AMDGPU::V_LSHLREV_B16_e32_vi: 2525 case AMDGPU::V_LSHLREV_B16_e64_vi: 2526 case AMDGPU::V_LSHLREV_B32_e32_vi: 2527 case AMDGPU::V_LSHLREV_B32_e64_vi: 2528 case AMDGPU::V_LSHLREV_B64_vi: 2529 case AMDGPU::V_LSHRREV_B32_e32_si: 2530 case AMDGPU::V_LSHRREV_B32_e64_si: 2531 case AMDGPU::V_LSHRREV_B16_e32_vi: 2532 case AMDGPU::V_LSHRREV_B16_e64_vi: 2533 case AMDGPU::V_LSHRREV_B32_e32_vi: 2534 case AMDGPU::V_LSHRREV_B32_e64_vi: 2535 case AMDGPU::V_LSHRREV_B64_vi: 2536 case AMDGPU::V_ASHRREV_I32_e64_si: 2537 case AMDGPU::V_ASHRREV_I32_e32_si: 2538 case AMDGPU::V_ASHRREV_I16_e32_vi: 2539 case AMDGPU::V_ASHRREV_I16_e64_vi: 2540 case AMDGPU::V_ASHRREV_I32_e32_vi: 2541 case AMDGPU::V_ASHRREV_I32_e64_vi: 2542 case AMDGPU::V_ASHRREV_I64_vi: 2543 case AMDGPU::V_PK_LSHLREV_B16_vi: 2544 case AMDGPU::V_PK_LSHRREV_B16_vi: 2545 case AMDGPU::V_PK_ASHRREV_I16_vi: 2546 return true; 2547 default: 2548 return false; 2549 } 2550 } 2551 2552 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2553 2554 using namespace SIInstrFlags; 2555 const unsigned Opcode = Inst.getOpcode(); 2556 const MCInstrDesc &Desc = MII.get(Opcode); 2557 2558 // lds_direct register is defined so that it can be used 2559 // with 9-bit operands only. Ignore encodings which do not accept these. 2560 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2561 return true; 2562 2563 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2564 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2565 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2566 2567 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2568 2569 // lds_direct cannot be specified as either src1 or src2. 2570 for (int SrcIdx : SrcIndices) { 2571 if (SrcIdx == -1) break; 2572 const MCOperand &Src = Inst.getOperand(SrcIdx); 2573 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2574 return false; 2575 } 2576 } 2577 2578 if (Src0Idx == -1) 2579 return true; 2580 2581 const MCOperand &Src = Inst.getOperand(Src0Idx); 2582 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2583 return true; 2584 2585 // lds_direct is specified as src0. Check additional limitations. 2586 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2587 } 2588 2589 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2590 unsigned Opcode = Inst.getOpcode(); 2591 const MCInstrDesc &Desc = MII.get(Opcode); 2592 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2593 return true; 2594 2595 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2596 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2597 2598 const int OpIndices[] = { Src0Idx, Src1Idx }; 2599 2600 unsigned NumLiterals = 0; 2601 uint32_t LiteralValue; 2602 2603 for (int OpIdx : OpIndices) { 2604 if (OpIdx == -1) break; 2605 2606 const MCOperand &MO = Inst.getOperand(OpIdx); 2607 if (MO.isImm() && 2608 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2609 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2610 !isInlineConstant(Inst, OpIdx)) { 2611 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2612 if (NumLiterals == 0 || LiteralValue != Value) { 2613 LiteralValue = Value; 2614 ++NumLiterals; 2615 } 2616 } 2617 } 2618 2619 return NumLiterals <= 1; 2620 } 2621 2622 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2623 const SMLoc &IDLoc) { 2624 if (!validateLdsDirect(Inst)) { 2625 Error(IDLoc, 2626 "invalid use of lds_direct"); 2627 return false; 2628 } 2629 if (!validateSOPLiteral(Inst)) { 2630 Error(IDLoc, 2631 "only one literal operand is allowed"); 2632 return false; 2633 } 2634 if (!validateConstantBusLimitations(Inst)) { 2635 Error(IDLoc, 2636 "invalid operand (violates constant bus restrictions)"); 2637 return false; 2638 } 2639 if (!validateEarlyClobberLimitations(Inst)) { 2640 Error(IDLoc, 2641 "destination must be different than all sources"); 2642 return false; 2643 } 2644 if (!validateIntClampSupported(Inst)) { 2645 Error(IDLoc, 2646 "integer clamping is not supported on this GPU"); 2647 return false; 2648 } 2649 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2650 if (!validateMIMGD16(Inst)) { 2651 Error(IDLoc, 2652 "d16 modifier is not supported on this GPU"); 2653 return false; 2654 } 2655 if (!validateMIMGDataSize(Inst)) { 2656 Error(IDLoc, 2657 "image data size does not match dmask and tfe"); 2658 return false; 2659 } 2660 if (!validateMIMGAtomicDMask(Inst)) { 2661 Error(IDLoc, 2662 "invalid atomic image dmask"); 2663 return false; 2664 } 2665 if (!validateMIMGGatherDMask(Inst)) { 2666 Error(IDLoc, 2667 "invalid image_gather dmask: only one bit must be set"); 2668 return false; 2669 } 2670 2671 return true; 2672 } 2673 2674 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2675 const FeatureBitset &FBS, 2676 unsigned VariantID = 0); 2677 2678 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2679 OperandVector &Operands, 2680 MCStreamer &Out, 2681 uint64_t &ErrorInfo, 2682 bool MatchingInlineAsm) { 2683 MCInst Inst; 2684 unsigned Result = Match_Success; 2685 for (auto Variant : getMatchedVariants()) { 2686 uint64_t EI; 2687 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2688 Variant); 2689 // We order match statuses from least to most specific. We use most specific 2690 // status as resulting 2691 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2692 if ((R == Match_Success) || 2693 (R == Match_PreferE32) || 2694 (R == Match_MissingFeature && Result != Match_PreferE32) || 2695 (R == Match_InvalidOperand && Result != Match_MissingFeature 2696 && Result != Match_PreferE32) || 2697 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2698 && Result != Match_MissingFeature 2699 && Result != Match_PreferE32)) { 2700 Result = R; 2701 ErrorInfo = EI; 2702 } 2703 if (R == Match_Success) 2704 break; 2705 } 2706 2707 switch (Result) { 2708 default: break; 2709 case Match_Success: 2710 if (!validateInstruction(Inst, IDLoc)) { 2711 return true; 2712 } 2713 Inst.setLoc(IDLoc); 2714 Out.EmitInstruction(Inst, getSTI()); 2715 return false; 2716 2717 case Match_MissingFeature: 2718 return Error(IDLoc, "instruction not supported on this GPU"); 2719 2720 case Match_MnemonicFail: { 2721 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2722 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2723 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2724 return Error(IDLoc, "invalid instruction" + Suggestion, 2725 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2726 } 2727 2728 case Match_InvalidOperand: { 2729 SMLoc ErrorLoc = IDLoc; 2730 if (ErrorInfo != ~0ULL) { 2731 if (ErrorInfo >= Operands.size()) { 2732 return Error(IDLoc, "too few operands for instruction"); 2733 } 2734 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2735 if (ErrorLoc == SMLoc()) 2736 ErrorLoc = IDLoc; 2737 } 2738 return Error(ErrorLoc, "invalid operand for instruction"); 2739 } 2740 2741 case Match_PreferE32: 2742 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2743 "should be encoded as e32"); 2744 } 2745 llvm_unreachable("Implement any new match types added!"); 2746 } 2747 2748 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2749 int64_t Tmp = -1; 2750 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2751 return true; 2752 } 2753 if (getParser().parseAbsoluteExpression(Tmp)) { 2754 return true; 2755 } 2756 Ret = static_cast<uint32_t>(Tmp); 2757 return false; 2758 } 2759 2760 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2761 uint32_t &Minor) { 2762 if (ParseAsAbsoluteExpression(Major)) 2763 return TokError("invalid major version"); 2764 2765 if (getLexer().isNot(AsmToken::Comma)) 2766 return TokError("minor version number required, comma expected"); 2767 Lex(); 2768 2769 if (ParseAsAbsoluteExpression(Minor)) 2770 return TokError("invalid minor version"); 2771 2772 return false; 2773 } 2774 2775 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2776 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2777 return TokError("directive only supported for amdgcn architecture"); 2778 2779 std::string Target; 2780 2781 SMLoc TargetStart = getTok().getLoc(); 2782 if (getParser().parseEscapedString(Target)) 2783 return true; 2784 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2785 2786 std::string ExpectedTarget; 2787 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2788 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2789 2790 if (Target != ExpectedTargetOS.str()) 2791 return getParser().Error(TargetRange.Start, "target must match options", 2792 TargetRange); 2793 2794 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2795 return false; 2796 } 2797 2798 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2799 return getParser().Error(Range.Start, "value out of range", Range); 2800 } 2801 2802 bool AMDGPUAsmParser::calculateGPRBlocks( 2803 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2804 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2805 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2806 unsigned &SGPRBlocks) { 2807 // TODO(scott.linder): These calculations are duplicated from 2808 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2809 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2810 2811 unsigned NumVGPRs = NextFreeVGPR; 2812 unsigned NumSGPRs = NextFreeSGPR; 2813 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2814 2815 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2816 NumSGPRs > MaxAddressableNumSGPRs) 2817 return OutOfRangeError(SGPRRange); 2818 2819 NumSGPRs += 2820 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2821 2822 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2823 NumSGPRs > MaxAddressableNumSGPRs) 2824 return OutOfRangeError(SGPRRange); 2825 2826 if (Features.test(FeatureSGPRInitBug)) 2827 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2828 2829 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2830 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2831 2832 return false; 2833 } 2834 2835 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2836 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2837 return TokError("directive only supported for amdgcn architecture"); 2838 2839 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2840 return TokError("directive only supported for amdhsa OS"); 2841 2842 StringRef KernelName; 2843 if (getParser().parseIdentifier(KernelName)) 2844 return true; 2845 2846 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2847 2848 StringSet<> Seen; 2849 2850 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2851 2852 SMRange VGPRRange; 2853 uint64_t NextFreeVGPR = 0; 2854 SMRange SGPRRange; 2855 uint64_t NextFreeSGPR = 0; 2856 unsigned UserSGPRCount = 0; 2857 bool ReserveVCC = true; 2858 bool ReserveFlatScr = true; 2859 bool ReserveXNACK = hasXNACK(); 2860 2861 while (true) { 2862 while (getLexer().is(AsmToken::EndOfStatement)) 2863 Lex(); 2864 2865 if (getLexer().isNot(AsmToken::Identifier)) 2866 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2867 2868 StringRef ID = getTok().getIdentifier(); 2869 SMRange IDRange = getTok().getLocRange(); 2870 Lex(); 2871 2872 if (ID == ".end_amdhsa_kernel") 2873 break; 2874 2875 if (Seen.find(ID) != Seen.end()) 2876 return TokError(".amdhsa_ directives cannot be repeated"); 2877 Seen.insert(ID); 2878 2879 SMLoc ValStart = getTok().getLoc(); 2880 int64_t IVal; 2881 if (getParser().parseAbsoluteExpression(IVal)) 2882 return true; 2883 SMLoc ValEnd = getTok().getLoc(); 2884 SMRange ValRange = SMRange(ValStart, ValEnd); 2885 2886 if (IVal < 0) 2887 return OutOfRangeError(ValRange); 2888 2889 uint64_t Val = IVal; 2890 2891 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2892 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2893 return OutOfRangeError(RANGE); \ 2894 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2895 2896 if (ID == ".amdhsa_group_segment_fixed_size") { 2897 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2898 return OutOfRangeError(ValRange); 2899 KD.group_segment_fixed_size = Val; 2900 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2901 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2902 return OutOfRangeError(ValRange); 2903 KD.private_segment_fixed_size = Val; 2904 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2905 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2906 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2907 Val, ValRange); 2908 UserSGPRCount++; 2909 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2910 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2911 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2912 ValRange); 2913 UserSGPRCount++; 2914 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2915 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2916 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2917 ValRange); 2918 UserSGPRCount++; 2919 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2920 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2921 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2922 Val, ValRange); 2923 UserSGPRCount++; 2924 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2927 ValRange); 2928 UserSGPRCount++; 2929 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2930 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2931 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2932 ValRange); 2933 UserSGPRCount++; 2934 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2935 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2936 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2937 Val, ValRange); 2938 UserSGPRCount++; 2939 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2940 PARSE_BITS_ENTRY( 2941 KD.compute_pgm_rsrc2, 2942 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2943 ValRange); 2944 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2946 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2947 ValRange); 2948 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2949 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2950 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2951 ValRange); 2952 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2953 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2954 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2955 ValRange); 2956 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2957 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2958 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2959 ValRange); 2960 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2961 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2962 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2963 ValRange); 2964 } else if (ID == ".amdhsa_next_free_vgpr") { 2965 VGPRRange = ValRange; 2966 NextFreeVGPR = Val; 2967 } else if (ID == ".amdhsa_next_free_sgpr") { 2968 SGPRRange = ValRange; 2969 NextFreeSGPR = Val; 2970 } else if (ID == ".amdhsa_reserve_vcc") { 2971 if (!isUInt<1>(Val)) 2972 return OutOfRangeError(ValRange); 2973 ReserveVCC = Val; 2974 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2975 if (IVersion.Major < 7) 2976 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2977 IDRange); 2978 if (!isUInt<1>(Val)) 2979 return OutOfRangeError(ValRange); 2980 ReserveFlatScr = Val; 2981 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2982 if (IVersion.Major < 8) 2983 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2984 IDRange); 2985 if (!isUInt<1>(Val)) 2986 return OutOfRangeError(ValRange); 2987 ReserveXNACK = Val; 2988 } else if (ID == ".amdhsa_float_round_mode_32") { 2989 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2990 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2991 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2992 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2993 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2994 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2995 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2996 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2997 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2998 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2999 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3000 ValRange); 3001 } else if (ID == ".amdhsa_dx10_clamp") { 3002 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3003 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3004 } else if (ID == ".amdhsa_ieee_mode") { 3005 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3006 Val, ValRange); 3007 } else if (ID == ".amdhsa_fp16_overflow") { 3008 if (IVersion.Major < 9) 3009 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3010 IDRange); 3011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3012 ValRange); 3013 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3014 PARSE_BITS_ENTRY( 3015 KD.compute_pgm_rsrc2, 3016 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3017 ValRange); 3018 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3020 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3021 Val, ValRange); 3022 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3023 PARSE_BITS_ENTRY( 3024 KD.compute_pgm_rsrc2, 3025 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3026 ValRange); 3027 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3029 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3030 Val, ValRange); 3031 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3032 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3033 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3034 Val, ValRange); 3035 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3036 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3037 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3038 Val, ValRange); 3039 } else if (ID == ".amdhsa_exception_int_div_zero") { 3040 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3041 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3042 Val, ValRange); 3043 } else { 3044 return getParser().Error(IDRange.Start, 3045 "unknown .amdhsa_kernel directive", IDRange); 3046 } 3047 3048 #undef PARSE_BITS_ENTRY 3049 } 3050 3051 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3052 return TokError(".amdhsa_next_free_vgpr directive is required"); 3053 3054 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3055 return TokError(".amdhsa_next_free_sgpr directive is required"); 3056 3057 unsigned VGPRBlocks; 3058 unsigned SGPRBlocks; 3059 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3060 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3061 SGPRRange, VGPRBlocks, SGPRBlocks)) 3062 return true; 3063 3064 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3065 VGPRBlocks)) 3066 return OutOfRangeError(VGPRRange); 3067 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3068 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3069 3070 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3071 SGPRBlocks)) 3072 return OutOfRangeError(SGPRRange); 3073 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3074 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3075 SGPRBlocks); 3076 3077 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3078 return TokError("too many user SGPRs enabled"); 3079 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3080 UserSGPRCount); 3081 3082 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3083 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3084 ReserveFlatScr, ReserveXNACK); 3085 return false; 3086 } 3087 3088 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3089 uint32_t Major; 3090 uint32_t Minor; 3091 3092 if (ParseDirectiveMajorMinor(Major, Minor)) 3093 return true; 3094 3095 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3096 return false; 3097 } 3098 3099 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3100 uint32_t Major; 3101 uint32_t Minor; 3102 uint32_t Stepping; 3103 StringRef VendorName; 3104 StringRef ArchName; 3105 3106 // If this directive has no arguments, then use the ISA version for the 3107 // targeted GPU. 3108 if (getLexer().is(AsmToken::EndOfStatement)) { 3109 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3110 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3111 ISA.Stepping, 3112 "AMD", "AMDGPU"); 3113 return false; 3114 } 3115 3116 if (ParseDirectiveMajorMinor(Major, Minor)) 3117 return true; 3118 3119 if (getLexer().isNot(AsmToken::Comma)) 3120 return TokError("stepping version number required, comma expected"); 3121 Lex(); 3122 3123 if (ParseAsAbsoluteExpression(Stepping)) 3124 return TokError("invalid stepping version"); 3125 3126 if (getLexer().isNot(AsmToken::Comma)) 3127 return TokError("vendor name required, comma expected"); 3128 Lex(); 3129 3130 if (getLexer().isNot(AsmToken::String)) 3131 return TokError("invalid vendor name"); 3132 3133 VendorName = getLexer().getTok().getStringContents(); 3134 Lex(); 3135 3136 if (getLexer().isNot(AsmToken::Comma)) 3137 return TokError("arch name required, comma expected"); 3138 Lex(); 3139 3140 if (getLexer().isNot(AsmToken::String)) 3141 return TokError("invalid arch name"); 3142 3143 ArchName = getLexer().getTok().getStringContents(); 3144 Lex(); 3145 3146 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3147 VendorName, ArchName); 3148 return false; 3149 } 3150 3151 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3152 amd_kernel_code_t &Header) { 3153 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3154 // assembly for backwards compatibility. 3155 if (ID == "max_scratch_backing_memory_byte_size") { 3156 Parser.eatToEndOfStatement(); 3157 return false; 3158 } 3159 3160 SmallString<40> ErrStr; 3161 raw_svector_ostream Err(ErrStr); 3162 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3163 return TokError(Err.str()); 3164 } 3165 Lex(); 3166 return false; 3167 } 3168 3169 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3170 amd_kernel_code_t Header; 3171 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3172 3173 while (true) { 3174 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3175 // will set the current token to EndOfStatement. 3176 while(getLexer().is(AsmToken::EndOfStatement)) 3177 Lex(); 3178 3179 if (getLexer().isNot(AsmToken::Identifier)) 3180 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3181 3182 StringRef ID = getLexer().getTok().getIdentifier(); 3183 Lex(); 3184 3185 if (ID == ".end_amd_kernel_code_t") 3186 break; 3187 3188 if (ParseAMDKernelCodeTValue(ID, Header)) 3189 return true; 3190 } 3191 3192 getTargetStreamer().EmitAMDKernelCodeT(Header); 3193 3194 return false; 3195 } 3196 3197 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3198 if (getLexer().isNot(AsmToken::Identifier)) 3199 return TokError("expected symbol name"); 3200 3201 StringRef KernelName = Parser.getTok().getString(); 3202 3203 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3204 ELF::STT_AMDGPU_HSA_KERNEL); 3205 Lex(); 3206 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3207 KernelScope.initialize(getContext()); 3208 return false; 3209 } 3210 3211 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3212 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3213 return Error(getParser().getTok().getLoc(), 3214 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3215 "architectures"); 3216 } 3217 3218 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3219 3220 std::string ISAVersionStringFromSTI; 3221 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3222 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3223 3224 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3225 return Error(getParser().getTok().getLoc(), 3226 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3227 "arguments specified through the command line"); 3228 } 3229 3230 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3231 Lex(); 3232 3233 return false; 3234 } 3235 3236 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3237 const char *AssemblerDirectiveBegin; 3238 const char *AssemblerDirectiveEnd; 3239 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3240 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3241 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3242 HSAMD::V3::AssemblerDirectiveEnd) 3243 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3244 HSAMD::AssemblerDirectiveEnd); 3245 3246 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3247 return Error(getParser().getTok().getLoc(), 3248 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3249 "not available on non-amdhsa OSes")).str()); 3250 } 3251 3252 std::string HSAMetadataString; 3253 raw_string_ostream YamlStream(HSAMetadataString); 3254 3255 getLexer().setSkipSpace(false); 3256 3257 bool FoundEnd = false; 3258 while (!getLexer().is(AsmToken::Eof)) { 3259 while (getLexer().is(AsmToken::Space)) { 3260 YamlStream << getLexer().getTok().getString(); 3261 Lex(); 3262 } 3263 3264 if (getLexer().is(AsmToken::Identifier)) { 3265 StringRef ID = getLexer().getTok().getIdentifier(); 3266 if (ID == AssemblerDirectiveEnd) { 3267 Lex(); 3268 FoundEnd = true; 3269 break; 3270 } 3271 } 3272 3273 YamlStream << Parser.parseStringToEndOfStatement() 3274 << getContext().getAsmInfo()->getSeparatorString(); 3275 3276 Parser.eatToEndOfStatement(); 3277 } 3278 3279 getLexer().setSkipSpace(true); 3280 3281 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3282 return TokError(Twine("expected directive ") + 3283 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3284 } 3285 3286 YamlStream.flush(); 3287 3288 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3289 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3290 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3291 } else { 3292 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3293 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3294 } 3295 3296 return false; 3297 } 3298 3299 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3300 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3301 return Error(getParser().getTok().getLoc(), 3302 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3303 "not available on non-amdpal OSes")).str()); 3304 } 3305 3306 PALMD::Metadata PALMetadata; 3307 for (;;) { 3308 uint32_t Value; 3309 if (ParseAsAbsoluteExpression(Value)) { 3310 return TokError(Twine("invalid value in ") + 3311 Twine(PALMD::AssemblerDirective)); 3312 } 3313 PALMetadata.push_back(Value); 3314 if (getLexer().isNot(AsmToken::Comma)) 3315 break; 3316 Lex(); 3317 } 3318 getTargetStreamer().EmitPALMetadata(PALMetadata); 3319 return false; 3320 } 3321 3322 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3323 StringRef IDVal = DirectiveID.getString(); 3324 3325 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3326 if (IDVal == ".amdgcn_target") 3327 return ParseDirectiveAMDGCNTarget(); 3328 3329 if (IDVal == ".amdhsa_kernel") 3330 return ParseDirectiveAMDHSAKernel(); 3331 3332 // TODO: Restructure/combine with PAL metadata directive. 3333 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3334 return ParseDirectiveHSAMetadata(); 3335 } else { 3336 if (IDVal == ".hsa_code_object_version") 3337 return ParseDirectiveHSACodeObjectVersion(); 3338 3339 if (IDVal == ".hsa_code_object_isa") 3340 return ParseDirectiveHSACodeObjectISA(); 3341 3342 if (IDVal == ".amd_kernel_code_t") 3343 return ParseDirectiveAMDKernelCodeT(); 3344 3345 if (IDVal == ".amdgpu_hsa_kernel") 3346 return ParseDirectiveAMDGPUHsaKernel(); 3347 3348 if (IDVal == ".amd_amdgpu_isa") 3349 return ParseDirectiveISAVersion(); 3350 3351 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3352 return ParseDirectiveHSAMetadata(); 3353 } 3354 3355 if (IDVal == PALMD::AssemblerDirective) 3356 return ParseDirectivePALMetadata(); 3357 3358 return true; 3359 } 3360 3361 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3362 unsigned RegNo) const { 3363 3364 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3365 R.isValid(); ++R) { 3366 if (*R == RegNo) 3367 return isGFX9(); 3368 } 3369 3370 switch (RegNo) { 3371 case AMDGPU::TBA: 3372 case AMDGPU::TBA_LO: 3373 case AMDGPU::TBA_HI: 3374 case AMDGPU::TMA: 3375 case AMDGPU::TMA_LO: 3376 case AMDGPU::TMA_HI: 3377 return !isGFX9(); 3378 case AMDGPU::XNACK_MASK: 3379 case AMDGPU::XNACK_MASK_LO: 3380 case AMDGPU::XNACK_MASK_HI: 3381 return !isCI() && !isSI() && hasXNACK(); 3382 default: 3383 break; 3384 } 3385 3386 if (isCI()) 3387 return true; 3388 3389 if (isSI()) { 3390 // No flat_scr 3391 switch (RegNo) { 3392 case AMDGPU::FLAT_SCR: 3393 case AMDGPU::FLAT_SCR_LO: 3394 case AMDGPU::FLAT_SCR_HI: 3395 return false; 3396 default: 3397 return true; 3398 } 3399 } 3400 3401 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3402 // SI/CI have. 3403 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3404 R.isValid(); ++R) { 3405 if (*R == RegNo) 3406 return false; 3407 } 3408 3409 return true; 3410 } 3411 3412 OperandMatchResultTy 3413 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3414 // Try to parse with a custom parser 3415 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3416 3417 // If we successfully parsed the operand or if there as an error parsing, 3418 // we are done. 3419 // 3420 // If we are parsing after we reach EndOfStatement then this means we 3421 // are appending default values to the Operands list. This is only done 3422 // by custom parser, so we shouldn't continue on to the generic parsing. 3423 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3424 getLexer().is(AsmToken::EndOfStatement)) 3425 return ResTy; 3426 3427 ResTy = parseRegOrImm(Operands); 3428 3429 if (ResTy == MatchOperand_Success) 3430 return ResTy; 3431 3432 const auto &Tok = Parser.getTok(); 3433 SMLoc S = Tok.getLoc(); 3434 3435 const MCExpr *Expr = nullptr; 3436 if (!Parser.parseExpression(Expr)) { 3437 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3438 return MatchOperand_Success; 3439 } 3440 3441 // Possibly this is an instruction flag like 'gds'. 3442 if (Tok.getKind() == AsmToken::Identifier) { 3443 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3444 Parser.Lex(); 3445 return MatchOperand_Success; 3446 } 3447 3448 return MatchOperand_NoMatch; 3449 } 3450 3451 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3452 // Clear any forced encodings from the previous instruction. 3453 setForcedEncodingSize(0); 3454 setForcedDPP(false); 3455 setForcedSDWA(false); 3456 3457 if (Name.endswith("_e64")) { 3458 setForcedEncodingSize(64); 3459 return Name.substr(0, Name.size() - 4); 3460 } else if (Name.endswith("_e32")) { 3461 setForcedEncodingSize(32); 3462 return Name.substr(0, Name.size() - 4); 3463 } else if (Name.endswith("_dpp")) { 3464 setForcedDPP(true); 3465 return Name.substr(0, Name.size() - 4); 3466 } else if (Name.endswith("_sdwa")) { 3467 setForcedSDWA(true); 3468 return Name.substr(0, Name.size() - 5); 3469 } 3470 return Name; 3471 } 3472 3473 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3474 StringRef Name, 3475 SMLoc NameLoc, OperandVector &Operands) { 3476 // Add the instruction mnemonic 3477 Name = parseMnemonicSuffix(Name); 3478 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3479 3480 while (!getLexer().is(AsmToken::EndOfStatement)) { 3481 OperandMatchResultTy Res = parseOperand(Operands, Name); 3482 3483 // Eat the comma or space if there is one. 3484 if (getLexer().is(AsmToken::Comma)) 3485 Parser.Lex(); 3486 3487 switch (Res) { 3488 case MatchOperand_Success: break; 3489 case MatchOperand_ParseFail: 3490 Error(getLexer().getLoc(), "failed parsing operand."); 3491 while (!getLexer().is(AsmToken::EndOfStatement)) { 3492 Parser.Lex(); 3493 } 3494 return true; 3495 case MatchOperand_NoMatch: 3496 Error(getLexer().getLoc(), "not a valid operand."); 3497 while (!getLexer().is(AsmToken::EndOfStatement)) { 3498 Parser.Lex(); 3499 } 3500 return true; 3501 } 3502 } 3503 3504 return false; 3505 } 3506 3507 //===----------------------------------------------------------------------===// 3508 // Utility functions 3509 //===----------------------------------------------------------------------===// 3510 3511 OperandMatchResultTy 3512 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3513 switch(getLexer().getKind()) { 3514 default: return MatchOperand_NoMatch; 3515 case AsmToken::Identifier: { 3516 StringRef Name = Parser.getTok().getString(); 3517 if (!Name.equals(Prefix)) { 3518 return MatchOperand_NoMatch; 3519 } 3520 3521 Parser.Lex(); 3522 if (getLexer().isNot(AsmToken::Colon)) 3523 return MatchOperand_ParseFail; 3524 3525 Parser.Lex(); 3526 3527 bool IsMinus = false; 3528 if (getLexer().getKind() == AsmToken::Minus) { 3529 Parser.Lex(); 3530 IsMinus = true; 3531 } 3532 3533 if (getLexer().isNot(AsmToken::Integer)) 3534 return MatchOperand_ParseFail; 3535 3536 if (getParser().parseAbsoluteExpression(Int)) 3537 return MatchOperand_ParseFail; 3538 3539 if (IsMinus) 3540 Int = -Int; 3541 break; 3542 } 3543 } 3544 return MatchOperand_Success; 3545 } 3546 3547 OperandMatchResultTy 3548 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3549 AMDGPUOperand::ImmTy ImmTy, 3550 bool (*ConvertResult)(int64_t&)) { 3551 SMLoc S = Parser.getTok().getLoc(); 3552 int64_t Value = 0; 3553 3554 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3555 if (Res != MatchOperand_Success) 3556 return Res; 3557 3558 if (ConvertResult && !ConvertResult(Value)) { 3559 return MatchOperand_ParseFail; 3560 } 3561 3562 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3563 return MatchOperand_Success; 3564 } 3565 3566 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3567 const char *Prefix, 3568 OperandVector &Operands, 3569 AMDGPUOperand::ImmTy ImmTy, 3570 bool (*ConvertResult)(int64_t&)) { 3571 StringRef Name = Parser.getTok().getString(); 3572 if (!Name.equals(Prefix)) 3573 return MatchOperand_NoMatch; 3574 3575 Parser.Lex(); 3576 if (getLexer().isNot(AsmToken::Colon)) 3577 return MatchOperand_ParseFail; 3578 3579 Parser.Lex(); 3580 if (getLexer().isNot(AsmToken::LBrac)) 3581 return MatchOperand_ParseFail; 3582 Parser.Lex(); 3583 3584 unsigned Val = 0; 3585 SMLoc S = Parser.getTok().getLoc(); 3586 3587 // FIXME: How to verify the number of elements matches the number of src 3588 // operands? 3589 for (int I = 0; I < 4; ++I) { 3590 if (I != 0) { 3591 if (getLexer().is(AsmToken::RBrac)) 3592 break; 3593 3594 if (getLexer().isNot(AsmToken::Comma)) 3595 return MatchOperand_ParseFail; 3596 Parser.Lex(); 3597 } 3598 3599 if (getLexer().isNot(AsmToken::Integer)) 3600 return MatchOperand_ParseFail; 3601 3602 int64_t Op; 3603 if (getParser().parseAbsoluteExpression(Op)) 3604 return MatchOperand_ParseFail; 3605 3606 if (Op != 0 && Op != 1) 3607 return MatchOperand_ParseFail; 3608 Val |= (Op << I); 3609 } 3610 3611 Parser.Lex(); 3612 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3613 return MatchOperand_Success; 3614 } 3615 3616 OperandMatchResultTy 3617 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3618 AMDGPUOperand::ImmTy ImmTy) { 3619 int64_t Bit = 0; 3620 SMLoc S = Parser.getTok().getLoc(); 3621 3622 // We are at the end of the statement, and this is a default argument, so 3623 // use a default value. 3624 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3625 switch(getLexer().getKind()) { 3626 case AsmToken::Identifier: { 3627 StringRef Tok = Parser.getTok().getString(); 3628 if (Tok == Name) { 3629 if (Tok == "r128" && isGFX9()) 3630 Error(S, "r128 modifier is not supported on this GPU"); 3631 if (Tok == "a16" && !isGFX9()) 3632 Error(S, "a16 modifier is not supported on this GPU"); 3633 Bit = 1; 3634 Parser.Lex(); 3635 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3636 Bit = 0; 3637 Parser.Lex(); 3638 } else { 3639 return MatchOperand_NoMatch; 3640 } 3641 break; 3642 } 3643 default: 3644 return MatchOperand_NoMatch; 3645 } 3646 } 3647 3648 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3649 return MatchOperand_Success; 3650 } 3651 3652 static void addOptionalImmOperand( 3653 MCInst& Inst, const OperandVector& Operands, 3654 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3655 AMDGPUOperand::ImmTy ImmT, 3656 int64_t Default = 0) { 3657 auto i = OptionalIdx.find(ImmT); 3658 if (i != OptionalIdx.end()) { 3659 unsigned Idx = i->second; 3660 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3661 } else { 3662 Inst.addOperand(MCOperand::createImm(Default)); 3663 } 3664 } 3665 3666 OperandMatchResultTy 3667 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3668 if (getLexer().isNot(AsmToken::Identifier)) { 3669 return MatchOperand_NoMatch; 3670 } 3671 StringRef Tok = Parser.getTok().getString(); 3672 if (Tok != Prefix) { 3673 return MatchOperand_NoMatch; 3674 } 3675 3676 Parser.Lex(); 3677 if (getLexer().isNot(AsmToken::Colon)) { 3678 return MatchOperand_ParseFail; 3679 } 3680 3681 Parser.Lex(); 3682 if (getLexer().isNot(AsmToken::Identifier)) { 3683 return MatchOperand_ParseFail; 3684 } 3685 3686 Value = Parser.getTok().getString(); 3687 return MatchOperand_Success; 3688 } 3689 3690 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3691 // values to live in a joint format operand in the MCInst encoding. 3692 OperandMatchResultTy 3693 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3694 SMLoc S = Parser.getTok().getLoc(); 3695 int64_t Dfmt = 0, Nfmt = 0; 3696 // dfmt and nfmt can appear in either order, and each is optional. 3697 bool GotDfmt = false, GotNfmt = false; 3698 while (!GotDfmt || !GotNfmt) { 3699 if (!GotDfmt) { 3700 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3701 if (Res != MatchOperand_NoMatch) { 3702 if (Res != MatchOperand_Success) 3703 return Res; 3704 if (Dfmt >= 16) { 3705 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3706 return MatchOperand_ParseFail; 3707 } 3708 GotDfmt = true; 3709 Parser.Lex(); 3710 continue; 3711 } 3712 } 3713 if (!GotNfmt) { 3714 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3715 if (Res != MatchOperand_NoMatch) { 3716 if (Res != MatchOperand_Success) 3717 return Res; 3718 if (Nfmt >= 8) { 3719 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3720 return MatchOperand_ParseFail; 3721 } 3722 GotNfmt = true; 3723 Parser.Lex(); 3724 continue; 3725 } 3726 } 3727 break; 3728 } 3729 if (!GotDfmt && !GotNfmt) 3730 return MatchOperand_NoMatch; 3731 auto Format = Dfmt | Nfmt << 4; 3732 Operands.push_back( 3733 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3734 return MatchOperand_Success; 3735 } 3736 3737 //===----------------------------------------------------------------------===// 3738 // ds 3739 //===----------------------------------------------------------------------===// 3740 3741 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3742 const OperandVector &Operands) { 3743 OptionalImmIndexMap OptionalIdx; 3744 3745 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3746 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3747 3748 // Add the register arguments 3749 if (Op.isReg()) { 3750 Op.addRegOperands(Inst, 1); 3751 continue; 3752 } 3753 3754 // Handle optional arguments 3755 OptionalIdx[Op.getImmTy()] = i; 3756 } 3757 3758 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3761 3762 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3763 } 3764 3765 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3766 bool IsGdsHardcoded) { 3767 OptionalImmIndexMap OptionalIdx; 3768 3769 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3770 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3771 3772 // Add the register arguments 3773 if (Op.isReg()) { 3774 Op.addRegOperands(Inst, 1); 3775 continue; 3776 } 3777 3778 if (Op.isToken() && Op.getToken() == "gds") { 3779 IsGdsHardcoded = true; 3780 continue; 3781 } 3782 3783 // Handle optional arguments 3784 OptionalIdx[Op.getImmTy()] = i; 3785 } 3786 3787 AMDGPUOperand::ImmTy OffsetType = 3788 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3789 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3790 AMDGPUOperand::ImmTyOffset; 3791 3792 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3793 3794 if (!IsGdsHardcoded) { 3795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3796 } 3797 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3798 } 3799 3800 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3801 OptionalImmIndexMap OptionalIdx; 3802 3803 unsigned OperandIdx[4]; 3804 unsigned EnMask = 0; 3805 int SrcIdx = 0; 3806 3807 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3808 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3809 3810 // Add the register arguments 3811 if (Op.isReg()) { 3812 assert(SrcIdx < 4); 3813 OperandIdx[SrcIdx] = Inst.size(); 3814 Op.addRegOperands(Inst, 1); 3815 ++SrcIdx; 3816 continue; 3817 } 3818 3819 if (Op.isOff()) { 3820 assert(SrcIdx < 4); 3821 OperandIdx[SrcIdx] = Inst.size(); 3822 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3823 ++SrcIdx; 3824 continue; 3825 } 3826 3827 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3828 Op.addImmOperands(Inst, 1); 3829 continue; 3830 } 3831 3832 if (Op.isToken() && Op.getToken() == "done") 3833 continue; 3834 3835 // Handle optional arguments 3836 OptionalIdx[Op.getImmTy()] = i; 3837 } 3838 3839 assert(SrcIdx == 4); 3840 3841 bool Compr = false; 3842 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3843 Compr = true; 3844 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3845 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3846 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3847 } 3848 3849 for (auto i = 0; i < SrcIdx; ++i) { 3850 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3851 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3852 } 3853 } 3854 3855 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3856 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3857 3858 Inst.addOperand(MCOperand::createImm(EnMask)); 3859 } 3860 3861 //===----------------------------------------------------------------------===// 3862 // s_waitcnt 3863 //===----------------------------------------------------------------------===// 3864 3865 static bool 3866 encodeCnt( 3867 const AMDGPU::IsaVersion ISA, 3868 int64_t &IntVal, 3869 int64_t CntVal, 3870 bool Saturate, 3871 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3872 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3873 { 3874 bool Failed = false; 3875 3876 IntVal = encode(ISA, IntVal, CntVal); 3877 if (CntVal != decode(ISA, IntVal)) { 3878 if (Saturate) { 3879 IntVal = encode(ISA, IntVal, -1); 3880 } else { 3881 Failed = true; 3882 } 3883 } 3884 return Failed; 3885 } 3886 3887 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3888 StringRef CntName = Parser.getTok().getString(); 3889 int64_t CntVal; 3890 3891 Parser.Lex(); 3892 if (getLexer().isNot(AsmToken::LParen)) 3893 return true; 3894 3895 Parser.Lex(); 3896 if (getLexer().isNot(AsmToken::Integer)) 3897 return true; 3898 3899 SMLoc ValLoc = Parser.getTok().getLoc(); 3900 if (getParser().parseAbsoluteExpression(CntVal)) 3901 return true; 3902 3903 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3904 3905 bool Failed = true; 3906 bool Sat = CntName.endswith("_sat"); 3907 3908 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3909 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3910 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3911 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3912 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3913 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3914 } 3915 3916 if (Failed) { 3917 Error(ValLoc, "too large value for " + CntName); 3918 return true; 3919 } 3920 3921 if (getLexer().isNot(AsmToken::RParen)) { 3922 return true; 3923 } 3924 3925 Parser.Lex(); 3926 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3927 const AsmToken NextToken = getLexer().peekTok(); 3928 if (NextToken.is(AsmToken::Identifier)) { 3929 Parser.Lex(); 3930 } 3931 } 3932 3933 return false; 3934 } 3935 3936 OperandMatchResultTy 3937 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3938 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3939 int64_t Waitcnt = getWaitcntBitMask(ISA); 3940 SMLoc S = Parser.getTok().getLoc(); 3941 3942 switch(getLexer().getKind()) { 3943 default: return MatchOperand_ParseFail; 3944 case AsmToken::Integer: 3945 // The operand can be an integer value. 3946 if (getParser().parseAbsoluteExpression(Waitcnt)) 3947 return MatchOperand_ParseFail; 3948 break; 3949 3950 case AsmToken::Identifier: 3951 do { 3952 if (parseCnt(Waitcnt)) 3953 return MatchOperand_ParseFail; 3954 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3955 break; 3956 } 3957 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3958 return MatchOperand_Success; 3959 } 3960 3961 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3962 int64_t &Width) { 3963 using namespace llvm::AMDGPU::Hwreg; 3964 3965 if (Parser.getTok().getString() != "hwreg") 3966 return true; 3967 Parser.Lex(); 3968 3969 if (getLexer().isNot(AsmToken::LParen)) 3970 return true; 3971 Parser.Lex(); 3972 3973 if (getLexer().is(AsmToken::Identifier)) { 3974 HwReg.IsSymbolic = true; 3975 HwReg.Id = ID_UNKNOWN_; 3976 const StringRef tok = Parser.getTok().getString(); 3977 int Last = ID_SYMBOLIC_LAST_; 3978 if (isSI() || isCI() || isVI()) 3979 Last = ID_SYMBOLIC_FIRST_GFX9_; 3980 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3981 if (tok == IdSymbolic[i]) { 3982 HwReg.Id = i; 3983 break; 3984 } 3985 } 3986 Parser.Lex(); 3987 } else { 3988 HwReg.IsSymbolic = false; 3989 if (getLexer().isNot(AsmToken::Integer)) 3990 return true; 3991 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3992 return true; 3993 } 3994 3995 if (getLexer().is(AsmToken::RParen)) { 3996 Parser.Lex(); 3997 return false; 3998 } 3999 4000 // optional params 4001 if (getLexer().isNot(AsmToken::Comma)) 4002 return true; 4003 Parser.Lex(); 4004 4005 if (getLexer().isNot(AsmToken::Integer)) 4006 return true; 4007 if (getParser().parseAbsoluteExpression(Offset)) 4008 return true; 4009 4010 if (getLexer().isNot(AsmToken::Comma)) 4011 return true; 4012 Parser.Lex(); 4013 4014 if (getLexer().isNot(AsmToken::Integer)) 4015 return true; 4016 if (getParser().parseAbsoluteExpression(Width)) 4017 return true; 4018 4019 if (getLexer().isNot(AsmToken::RParen)) 4020 return true; 4021 Parser.Lex(); 4022 4023 return false; 4024 } 4025 4026 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4027 using namespace llvm::AMDGPU::Hwreg; 4028 4029 int64_t Imm16Val = 0; 4030 SMLoc S = Parser.getTok().getLoc(); 4031 4032 switch(getLexer().getKind()) { 4033 default: return MatchOperand_NoMatch; 4034 case AsmToken::Integer: 4035 // The operand can be an integer value. 4036 if (getParser().parseAbsoluteExpression(Imm16Val)) 4037 return MatchOperand_NoMatch; 4038 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4039 Error(S, "invalid immediate: only 16-bit values are legal"); 4040 // Do not return error code, but create an imm operand anyway and proceed 4041 // to the next operand, if any. That avoids unneccessary error messages. 4042 } 4043 break; 4044 4045 case AsmToken::Identifier: { 4046 OperandInfoTy HwReg(ID_UNKNOWN_); 4047 int64_t Offset = OFFSET_DEFAULT_; 4048 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4049 if (parseHwregConstruct(HwReg, Offset, Width)) 4050 return MatchOperand_ParseFail; 4051 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4052 if (HwReg.IsSymbolic) 4053 Error(S, "invalid symbolic name of hardware register"); 4054 else 4055 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4056 } 4057 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4058 Error(S, "invalid bit offset: only 5-bit values are legal"); 4059 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4060 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4061 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4062 } 4063 break; 4064 } 4065 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4066 return MatchOperand_Success; 4067 } 4068 4069 bool AMDGPUOperand::isSWaitCnt() const { 4070 return isImm(); 4071 } 4072 4073 bool AMDGPUOperand::isHwreg() const { 4074 return isImmTy(ImmTyHwreg); 4075 } 4076 4077 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4078 using namespace llvm::AMDGPU::SendMsg; 4079 4080 if (Parser.getTok().getString() != "sendmsg") 4081 return true; 4082 Parser.Lex(); 4083 4084 if (getLexer().isNot(AsmToken::LParen)) 4085 return true; 4086 Parser.Lex(); 4087 4088 if (getLexer().is(AsmToken::Identifier)) { 4089 Msg.IsSymbolic = true; 4090 Msg.Id = ID_UNKNOWN_; 4091 const std::string tok = Parser.getTok().getString(); 4092 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4093 switch(i) { 4094 default: continue; // Omit gaps. 4095 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4096 } 4097 if (tok == IdSymbolic[i]) { 4098 Msg.Id = i; 4099 break; 4100 } 4101 } 4102 Parser.Lex(); 4103 } else { 4104 Msg.IsSymbolic = false; 4105 if (getLexer().isNot(AsmToken::Integer)) 4106 return true; 4107 if (getParser().parseAbsoluteExpression(Msg.Id)) 4108 return true; 4109 if (getLexer().is(AsmToken::Integer)) 4110 if (getParser().parseAbsoluteExpression(Msg.Id)) 4111 Msg.Id = ID_UNKNOWN_; 4112 } 4113 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4114 return false; 4115 4116 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4117 if (getLexer().isNot(AsmToken::RParen)) 4118 return true; 4119 Parser.Lex(); 4120 return false; 4121 } 4122 4123 if (getLexer().isNot(AsmToken::Comma)) 4124 return true; 4125 Parser.Lex(); 4126 4127 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4128 Operation.Id = ID_UNKNOWN_; 4129 if (getLexer().is(AsmToken::Identifier)) { 4130 Operation.IsSymbolic = true; 4131 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4132 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4133 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4134 const StringRef Tok = Parser.getTok().getString(); 4135 for (int i = F; i < L; ++i) { 4136 if (Tok == S[i]) { 4137 Operation.Id = i; 4138 break; 4139 } 4140 } 4141 Parser.Lex(); 4142 } else { 4143 Operation.IsSymbolic = false; 4144 if (getLexer().isNot(AsmToken::Integer)) 4145 return true; 4146 if (getParser().parseAbsoluteExpression(Operation.Id)) 4147 return true; 4148 } 4149 4150 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4151 // Stream id is optional. 4152 if (getLexer().is(AsmToken::RParen)) { 4153 Parser.Lex(); 4154 return false; 4155 } 4156 4157 if (getLexer().isNot(AsmToken::Comma)) 4158 return true; 4159 Parser.Lex(); 4160 4161 if (getLexer().isNot(AsmToken::Integer)) 4162 return true; 4163 if (getParser().parseAbsoluteExpression(StreamId)) 4164 return true; 4165 } 4166 4167 if (getLexer().isNot(AsmToken::RParen)) 4168 return true; 4169 Parser.Lex(); 4170 return false; 4171 } 4172 4173 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4174 if (getLexer().getKind() != AsmToken::Identifier) 4175 return MatchOperand_NoMatch; 4176 4177 StringRef Str = Parser.getTok().getString(); 4178 int Slot = StringSwitch<int>(Str) 4179 .Case("p10", 0) 4180 .Case("p20", 1) 4181 .Case("p0", 2) 4182 .Default(-1); 4183 4184 SMLoc S = Parser.getTok().getLoc(); 4185 if (Slot == -1) 4186 return MatchOperand_ParseFail; 4187 4188 Parser.Lex(); 4189 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4190 AMDGPUOperand::ImmTyInterpSlot)); 4191 return MatchOperand_Success; 4192 } 4193 4194 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4195 if (getLexer().getKind() != AsmToken::Identifier) 4196 return MatchOperand_NoMatch; 4197 4198 StringRef Str = Parser.getTok().getString(); 4199 if (!Str.startswith("attr")) 4200 return MatchOperand_NoMatch; 4201 4202 StringRef Chan = Str.take_back(2); 4203 int AttrChan = StringSwitch<int>(Chan) 4204 .Case(".x", 0) 4205 .Case(".y", 1) 4206 .Case(".z", 2) 4207 .Case(".w", 3) 4208 .Default(-1); 4209 if (AttrChan == -1) 4210 return MatchOperand_ParseFail; 4211 4212 Str = Str.drop_back(2).drop_front(4); 4213 4214 uint8_t Attr; 4215 if (Str.getAsInteger(10, Attr)) 4216 return MatchOperand_ParseFail; 4217 4218 SMLoc S = Parser.getTok().getLoc(); 4219 Parser.Lex(); 4220 if (Attr > 63) { 4221 Error(S, "out of bounds attr"); 4222 return MatchOperand_Success; 4223 } 4224 4225 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4226 4227 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4228 AMDGPUOperand::ImmTyInterpAttr)); 4229 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4230 AMDGPUOperand::ImmTyAttrChan)); 4231 return MatchOperand_Success; 4232 } 4233 4234 void AMDGPUAsmParser::errorExpTgt() { 4235 Error(Parser.getTok().getLoc(), "invalid exp target"); 4236 } 4237 4238 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4239 uint8_t &Val) { 4240 if (Str == "null") { 4241 Val = 9; 4242 return MatchOperand_Success; 4243 } 4244 4245 if (Str.startswith("mrt")) { 4246 Str = Str.drop_front(3); 4247 if (Str == "z") { // == mrtz 4248 Val = 8; 4249 return MatchOperand_Success; 4250 } 4251 4252 if (Str.getAsInteger(10, Val)) 4253 return MatchOperand_ParseFail; 4254 4255 if (Val > 7) 4256 errorExpTgt(); 4257 4258 return MatchOperand_Success; 4259 } 4260 4261 if (Str.startswith("pos")) { 4262 Str = Str.drop_front(3); 4263 if (Str.getAsInteger(10, Val)) 4264 return MatchOperand_ParseFail; 4265 4266 if (Val > 3) 4267 errorExpTgt(); 4268 4269 Val += 12; 4270 return MatchOperand_Success; 4271 } 4272 4273 if (Str.startswith("param")) { 4274 Str = Str.drop_front(5); 4275 if (Str.getAsInteger(10, Val)) 4276 return MatchOperand_ParseFail; 4277 4278 if (Val >= 32) 4279 errorExpTgt(); 4280 4281 Val += 32; 4282 return MatchOperand_Success; 4283 } 4284 4285 if (Str.startswith("invalid_target_")) { 4286 Str = Str.drop_front(15); 4287 if (Str.getAsInteger(10, Val)) 4288 return MatchOperand_ParseFail; 4289 4290 errorExpTgt(); 4291 return MatchOperand_Success; 4292 } 4293 4294 return MatchOperand_NoMatch; 4295 } 4296 4297 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4298 uint8_t Val; 4299 StringRef Str = Parser.getTok().getString(); 4300 4301 auto Res = parseExpTgtImpl(Str, Val); 4302 if (Res != MatchOperand_Success) 4303 return Res; 4304 4305 SMLoc S = Parser.getTok().getLoc(); 4306 Parser.Lex(); 4307 4308 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4309 AMDGPUOperand::ImmTyExpTgt)); 4310 return MatchOperand_Success; 4311 } 4312 4313 OperandMatchResultTy 4314 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4315 using namespace llvm::AMDGPU::SendMsg; 4316 4317 int64_t Imm16Val = 0; 4318 SMLoc S = Parser.getTok().getLoc(); 4319 4320 switch(getLexer().getKind()) { 4321 default: 4322 return MatchOperand_NoMatch; 4323 case AsmToken::Integer: 4324 // The operand can be an integer value. 4325 if (getParser().parseAbsoluteExpression(Imm16Val)) 4326 return MatchOperand_NoMatch; 4327 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4328 Error(S, "invalid immediate: only 16-bit values are legal"); 4329 // Do not return error code, but create an imm operand anyway and proceed 4330 // to the next operand, if any. That avoids unneccessary error messages. 4331 } 4332 break; 4333 case AsmToken::Identifier: { 4334 OperandInfoTy Msg(ID_UNKNOWN_); 4335 OperandInfoTy Operation(OP_UNKNOWN_); 4336 int64_t StreamId = STREAM_ID_DEFAULT_; 4337 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4338 return MatchOperand_ParseFail; 4339 do { 4340 // Validate and encode message ID. 4341 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4342 || Msg.Id == ID_SYSMSG)) { 4343 if (Msg.IsSymbolic) 4344 Error(S, "invalid/unsupported symbolic name of message"); 4345 else 4346 Error(S, "invalid/unsupported code of message"); 4347 break; 4348 } 4349 Imm16Val = (Msg.Id << ID_SHIFT_); 4350 // Validate and encode operation ID. 4351 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4352 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4353 if (Operation.IsSymbolic) 4354 Error(S, "invalid symbolic name of GS_OP"); 4355 else 4356 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4357 break; 4358 } 4359 if (Operation.Id == OP_GS_NOP 4360 && Msg.Id != ID_GS_DONE) { 4361 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4362 break; 4363 } 4364 Imm16Val |= (Operation.Id << OP_SHIFT_); 4365 } 4366 if (Msg.Id == ID_SYSMSG) { 4367 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4368 if (Operation.IsSymbolic) 4369 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4370 else 4371 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4372 break; 4373 } 4374 Imm16Val |= (Operation.Id << OP_SHIFT_); 4375 } 4376 // Validate and encode stream ID. 4377 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4378 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4379 Error(S, "invalid stream id: only 2-bit values are legal"); 4380 break; 4381 } 4382 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4383 } 4384 } while (false); 4385 } 4386 break; 4387 } 4388 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4389 return MatchOperand_Success; 4390 } 4391 4392 bool AMDGPUOperand::isSendMsg() const { 4393 return isImmTy(ImmTySendMsg); 4394 } 4395 4396 //===----------------------------------------------------------------------===// 4397 // parser helpers 4398 //===----------------------------------------------------------------------===// 4399 4400 bool 4401 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4402 if (getLexer().getKind() == AsmToken::Identifier && 4403 Parser.getTok().getString() == Id) { 4404 Parser.Lex(); 4405 return true; 4406 } 4407 return false; 4408 } 4409 4410 bool 4411 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4412 if (getLexer().getKind() == Kind) { 4413 Parser.Lex(); 4414 return true; 4415 } 4416 return false; 4417 } 4418 4419 bool 4420 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4421 const StringRef ErrMsg) { 4422 if (!trySkipToken(Kind)) { 4423 Error(Parser.getTok().getLoc(), ErrMsg); 4424 return false; 4425 } 4426 return true; 4427 } 4428 4429 bool 4430 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4431 return !getParser().parseAbsoluteExpression(Imm); 4432 } 4433 4434 bool 4435 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4436 SMLoc S = Parser.getTok().getLoc(); 4437 if (getLexer().getKind() == AsmToken::String) { 4438 Val = Parser.getTok().getStringContents(); 4439 Parser.Lex(); 4440 return true; 4441 } else { 4442 Error(S, ErrMsg); 4443 return false; 4444 } 4445 } 4446 4447 //===----------------------------------------------------------------------===// 4448 // swizzle 4449 //===----------------------------------------------------------------------===// 4450 4451 LLVM_READNONE 4452 static unsigned 4453 encodeBitmaskPerm(const unsigned AndMask, 4454 const unsigned OrMask, 4455 const unsigned XorMask) { 4456 using namespace llvm::AMDGPU::Swizzle; 4457 4458 return BITMASK_PERM_ENC | 4459 (AndMask << BITMASK_AND_SHIFT) | 4460 (OrMask << BITMASK_OR_SHIFT) | 4461 (XorMask << BITMASK_XOR_SHIFT); 4462 } 4463 4464 bool 4465 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4466 const unsigned MinVal, 4467 const unsigned MaxVal, 4468 const StringRef ErrMsg) { 4469 for (unsigned i = 0; i < OpNum; ++i) { 4470 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4471 return false; 4472 } 4473 SMLoc ExprLoc = Parser.getTok().getLoc(); 4474 if (!parseExpr(Op[i])) { 4475 return false; 4476 } 4477 if (Op[i] < MinVal || Op[i] > MaxVal) { 4478 Error(ExprLoc, ErrMsg); 4479 return false; 4480 } 4481 } 4482 4483 return true; 4484 } 4485 4486 bool 4487 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4488 using namespace llvm::AMDGPU::Swizzle; 4489 4490 int64_t Lane[LANE_NUM]; 4491 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4492 "expected a 2-bit lane id")) { 4493 Imm = QUAD_PERM_ENC; 4494 for (unsigned I = 0; I < LANE_NUM; ++I) { 4495 Imm |= Lane[I] << (LANE_SHIFT * I); 4496 } 4497 return true; 4498 } 4499 return false; 4500 } 4501 4502 bool 4503 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4504 using namespace llvm::AMDGPU::Swizzle; 4505 4506 SMLoc S = Parser.getTok().getLoc(); 4507 int64_t GroupSize; 4508 int64_t LaneIdx; 4509 4510 if (!parseSwizzleOperands(1, &GroupSize, 4511 2, 32, 4512 "group size must be in the interval [2,32]")) { 4513 return false; 4514 } 4515 if (!isPowerOf2_64(GroupSize)) { 4516 Error(S, "group size must be a power of two"); 4517 return false; 4518 } 4519 if (parseSwizzleOperands(1, &LaneIdx, 4520 0, GroupSize - 1, 4521 "lane id must be in the interval [0,group size - 1]")) { 4522 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4523 return true; 4524 } 4525 return false; 4526 } 4527 4528 bool 4529 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4530 using namespace llvm::AMDGPU::Swizzle; 4531 4532 SMLoc S = Parser.getTok().getLoc(); 4533 int64_t GroupSize; 4534 4535 if (!parseSwizzleOperands(1, &GroupSize, 4536 2, 32, "group size must be in the interval [2,32]")) { 4537 return false; 4538 } 4539 if (!isPowerOf2_64(GroupSize)) { 4540 Error(S, "group size must be a power of two"); 4541 return false; 4542 } 4543 4544 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4545 return true; 4546 } 4547 4548 bool 4549 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4550 using namespace llvm::AMDGPU::Swizzle; 4551 4552 SMLoc S = Parser.getTok().getLoc(); 4553 int64_t GroupSize; 4554 4555 if (!parseSwizzleOperands(1, &GroupSize, 4556 1, 16, "group size must be in the interval [1,16]")) { 4557 return false; 4558 } 4559 if (!isPowerOf2_64(GroupSize)) { 4560 Error(S, "group size must be a power of two"); 4561 return false; 4562 } 4563 4564 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4565 return true; 4566 } 4567 4568 bool 4569 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4570 using namespace llvm::AMDGPU::Swizzle; 4571 4572 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4573 return false; 4574 } 4575 4576 StringRef Ctl; 4577 SMLoc StrLoc = Parser.getTok().getLoc(); 4578 if (!parseString(Ctl)) { 4579 return false; 4580 } 4581 if (Ctl.size() != BITMASK_WIDTH) { 4582 Error(StrLoc, "expected a 5-character mask"); 4583 return false; 4584 } 4585 4586 unsigned AndMask = 0; 4587 unsigned OrMask = 0; 4588 unsigned XorMask = 0; 4589 4590 for (size_t i = 0; i < Ctl.size(); ++i) { 4591 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4592 switch(Ctl[i]) { 4593 default: 4594 Error(StrLoc, "invalid mask"); 4595 return false; 4596 case '0': 4597 break; 4598 case '1': 4599 OrMask |= Mask; 4600 break; 4601 case 'p': 4602 AndMask |= Mask; 4603 break; 4604 case 'i': 4605 AndMask |= Mask; 4606 XorMask |= Mask; 4607 break; 4608 } 4609 } 4610 4611 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4612 return true; 4613 } 4614 4615 bool 4616 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4617 4618 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4619 4620 if (!parseExpr(Imm)) { 4621 return false; 4622 } 4623 if (!isUInt<16>(Imm)) { 4624 Error(OffsetLoc, "expected a 16-bit offset"); 4625 return false; 4626 } 4627 return true; 4628 } 4629 4630 bool 4631 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4632 using namespace llvm::AMDGPU::Swizzle; 4633 4634 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4635 4636 SMLoc ModeLoc = Parser.getTok().getLoc(); 4637 bool Ok = false; 4638 4639 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4640 Ok = parseSwizzleQuadPerm(Imm); 4641 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4642 Ok = parseSwizzleBitmaskPerm(Imm); 4643 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4644 Ok = parseSwizzleBroadcast(Imm); 4645 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4646 Ok = parseSwizzleSwap(Imm); 4647 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4648 Ok = parseSwizzleReverse(Imm); 4649 } else { 4650 Error(ModeLoc, "expected a swizzle mode"); 4651 } 4652 4653 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4654 } 4655 4656 return false; 4657 } 4658 4659 OperandMatchResultTy 4660 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4661 SMLoc S = Parser.getTok().getLoc(); 4662 int64_t Imm = 0; 4663 4664 if (trySkipId("offset")) { 4665 4666 bool Ok = false; 4667 if (skipToken(AsmToken::Colon, "expected a colon")) { 4668 if (trySkipId("swizzle")) { 4669 Ok = parseSwizzleMacro(Imm); 4670 } else { 4671 Ok = parseSwizzleOffset(Imm); 4672 } 4673 } 4674 4675 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4676 4677 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4678 } else { 4679 // Swizzle "offset" operand is optional. 4680 // If it is omitted, try parsing other optional operands. 4681 return parseOptionalOpr(Operands); 4682 } 4683 } 4684 4685 bool 4686 AMDGPUOperand::isSwizzle() const { 4687 return isImmTy(ImmTySwizzle); 4688 } 4689 4690 //===----------------------------------------------------------------------===// 4691 // VGPR Index Mode 4692 //===----------------------------------------------------------------------===// 4693 4694 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4695 4696 using namespace llvm::AMDGPU::VGPRIndexMode; 4697 4698 if (trySkipToken(AsmToken::RParen)) { 4699 return OFF; 4700 } 4701 4702 int64_t Imm = 0; 4703 4704 while (true) { 4705 unsigned Mode = 0; 4706 SMLoc S = Parser.getTok().getLoc(); 4707 4708 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4709 if (trySkipId(IdSymbolic[ModeId])) { 4710 Mode = 1 << ModeId; 4711 break; 4712 } 4713 } 4714 4715 if (Mode == 0) { 4716 Error(S, (Imm == 0)? 4717 "expected a VGPR index mode or a closing parenthesis" : 4718 "expected a VGPR index mode"); 4719 break; 4720 } 4721 4722 if (Imm & Mode) { 4723 Error(S, "duplicate VGPR index mode"); 4724 break; 4725 } 4726 Imm |= Mode; 4727 4728 if (trySkipToken(AsmToken::RParen)) 4729 break; 4730 if (!skipToken(AsmToken::Comma, 4731 "expected a comma or a closing parenthesis")) 4732 break; 4733 } 4734 4735 return Imm; 4736 } 4737 4738 OperandMatchResultTy 4739 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4740 4741 int64_t Imm = 0; 4742 SMLoc S = Parser.getTok().getLoc(); 4743 4744 if (getLexer().getKind() == AsmToken::Identifier && 4745 Parser.getTok().getString() == "gpr_idx" && 4746 getLexer().peekTok().is(AsmToken::LParen)) { 4747 4748 Parser.Lex(); 4749 Parser.Lex(); 4750 4751 // If parse failed, trigger an error but do not return error code 4752 // to avoid excessive error messages. 4753 Imm = parseGPRIdxMacro(); 4754 4755 } else { 4756 if (getParser().parseAbsoluteExpression(Imm)) 4757 return MatchOperand_NoMatch; 4758 if (Imm < 0 || !isUInt<4>(Imm)) { 4759 Error(S, "invalid immediate: only 4-bit values are legal"); 4760 } 4761 } 4762 4763 Operands.push_back( 4764 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4765 return MatchOperand_Success; 4766 } 4767 4768 bool AMDGPUOperand::isGPRIdxMode() const { 4769 return isImmTy(ImmTyGprIdxMode); 4770 } 4771 4772 //===----------------------------------------------------------------------===// 4773 // sopp branch targets 4774 //===----------------------------------------------------------------------===// 4775 4776 OperandMatchResultTy 4777 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4778 SMLoc S = Parser.getTok().getLoc(); 4779 4780 switch (getLexer().getKind()) { 4781 default: return MatchOperand_ParseFail; 4782 case AsmToken::Integer: { 4783 int64_t Imm; 4784 if (getParser().parseAbsoluteExpression(Imm)) 4785 return MatchOperand_ParseFail; 4786 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4787 return MatchOperand_Success; 4788 } 4789 4790 case AsmToken::Identifier: 4791 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4792 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4793 Parser.getTok().getString()), getContext()), S)); 4794 Parser.Lex(); 4795 return MatchOperand_Success; 4796 } 4797 } 4798 4799 //===----------------------------------------------------------------------===// 4800 // mubuf 4801 //===----------------------------------------------------------------------===// 4802 4803 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4804 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4805 } 4806 4807 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4808 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4809 } 4810 4811 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4812 const OperandVector &Operands, 4813 bool IsAtomic, 4814 bool IsAtomicReturn, 4815 bool IsLds) { 4816 bool IsLdsOpcode = IsLds; 4817 bool HasLdsModifier = false; 4818 OptionalImmIndexMap OptionalIdx; 4819 assert(IsAtomicReturn ? IsAtomic : true); 4820 4821 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4822 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4823 4824 // Add the register arguments 4825 if (Op.isReg()) { 4826 Op.addRegOperands(Inst, 1); 4827 continue; 4828 } 4829 4830 // Handle the case where soffset is an immediate 4831 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4832 Op.addImmOperands(Inst, 1); 4833 continue; 4834 } 4835 4836 HasLdsModifier = Op.isLDS(); 4837 4838 // Handle tokens like 'offen' which are sometimes hard-coded into the 4839 // asm string. There are no MCInst operands for these. 4840 if (Op.isToken()) { 4841 continue; 4842 } 4843 assert(Op.isImm()); 4844 4845 // Handle optional arguments 4846 OptionalIdx[Op.getImmTy()] = i; 4847 } 4848 4849 // This is a workaround for an llvm quirk which may result in an 4850 // incorrect instruction selection. Lds and non-lds versions of 4851 // MUBUF instructions are identical except that lds versions 4852 // have mandatory 'lds' modifier. However this modifier follows 4853 // optional modifiers and llvm asm matcher regards this 'lds' 4854 // modifier as an optional one. As a result, an lds version 4855 // of opcode may be selected even if it has no 'lds' modifier. 4856 if (IsLdsOpcode && !HasLdsModifier) { 4857 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4858 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4859 Inst.setOpcode(NoLdsOpcode); 4860 IsLdsOpcode = false; 4861 } 4862 } 4863 4864 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4865 if (IsAtomicReturn) { 4866 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4867 Inst.insert(I, *I); 4868 } 4869 4870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4871 if (!IsAtomic) { // glc is hard-coded. 4872 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4873 } 4874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4875 4876 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4878 } 4879 } 4880 4881 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4882 OptionalImmIndexMap OptionalIdx; 4883 4884 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4885 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4886 4887 // Add the register arguments 4888 if (Op.isReg()) { 4889 Op.addRegOperands(Inst, 1); 4890 continue; 4891 } 4892 4893 // Handle the case where soffset is an immediate 4894 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4895 Op.addImmOperands(Inst, 1); 4896 continue; 4897 } 4898 4899 // Handle tokens like 'offen' which are sometimes hard-coded into the 4900 // asm string. There are no MCInst operands for these. 4901 if (Op.isToken()) { 4902 continue; 4903 } 4904 assert(Op.isImm()); 4905 4906 // Handle optional arguments 4907 OptionalIdx[Op.getImmTy()] = i; 4908 } 4909 4910 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4911 AMDGPUOperand::ImmTyOffset); 4912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4916 } 4917 4918 //===----------------------------------------------------------------------===// 4919 // mimg 4920 //===----------------------------------------------------------------------===// 4921 4922 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4923 bool IsAtomic) { 4924 unsigned I = 1; 4925 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4926 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4927 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4928 } 4929 4930 if (IsAtomic) { 4931 // Add src, same as dst 4932 assert(Desc.getNumDefs() == 1); 4933 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4934 } 4935 4936 OptionalImmIndexMap OptionalIdx; 4937 4938 for (unsigned E = Operands.size(); I != E; ++I) { 4939 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4940 4941 // Add the register arguments 4942 if (Op.isReg()) { 4943 Op.addRegOperands(Inst, 1); 4944 } else if (Op.isImmModifier()) { 4945 OptionalIdx[Op.getImmTy()] = I; 4946 } else { 4947 llvm_unreachable("unexpected operand type"); 4948 } 4949 } 4950 4951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4954 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4960 } 4961 4962 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4963 cvtMIMG(Inst, Operands, true); 4964 } 4965 4966 //===----------------------------------------------------------------------===// 4967 // smrd 4968 //===----------------------------------------------------------------------===// 4969 4970 bool AMDGPUOperand::isSMRDOffset8() const { 4971 return isImm() && isUInt<8>(getImm()); 4972 } 4973 4974 bool AMDGPUOperand::isSMRDOffset20() const { 4975 return isImm() && isUInt<20>(getImm()); 4976 } 4977 4978 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4979 // 32-bit literals are only supported on CI and we only want to use them 4980 // when the offset is > 8-bits. 4981 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4982 } 4983 4984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4985 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4986 } 4987 4988 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4989 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4990 } 4991 4992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4993 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4994 } 4995 4996 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4997 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4998 } 4999 5000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5001 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5002 } 5003 5004 //===----------------------------------------------------------------------===// 5005 // vop3 5006 //===----------------------------------------------------------------------===// 5007 5008 static bool ConvertOmodMul(int64_t &Mul) { 5009 if (Mul != 1 && Mul != 2 && Mul != 4) 5010 return false; 5011 5012 Mul >>= 1; 5013 return true; 5014 } 5015 5016 static bool ConvertOmodDiv(int64_t &Div) { 5017 if (Div == 1) { 5018 Div = 0; 5019 return true; 5020 } 5021 5022 if (Div == 2) { 5023 Div = 3; 5024 return true; 5025 } 5026 5027 return false; 5028 } 5029 5030 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5031 if (BoundCtrl == 0) { 5032 BoundCtrl = 1; 5033 return true; 5034 } 5035 5036 if (BoundCtrl == -1) { 5037 BoundCtrl = 0; 5038 return true; 5039 } 5040 5041 return false; 5042 } 5043 5044 // Note: the order in this table matches the order of operands in AsmString. 5045 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5046 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5047 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5048 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5049 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5050 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5051 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5052 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5053 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5054 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5055 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5056 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5057 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5058 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5059 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5060 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5061 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5062 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5063 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5064 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5065 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5066 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5067 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5068 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5069 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5070 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5071 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5072 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5073 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5074 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5075 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5076 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5077 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5078 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5079 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5080 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5081 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5082 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5083 }; 5084 5085 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5086 unsigned size = Operands.size(); 5087 assert(size > 0); 5088 5089 OperandMatchResultTy res = parseOptionalOpr(Operands); 5090 5091 // This is a hack to enable hardcoded mandatory operands which follow 5092 // optional operands. 5093 // 5094 // Current design assumes that all operands after the first optional operand 5095 // are also optional. However implementation of some instructions violates 5096 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5097 // 5098 // To alleviate this problem, we have to (implicitly) parse extra operands 5099 // to make sure autogenerated parser of custom operands never hit hardcoded 5100 // mandatory operands. 5101 5102 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5103 5104 // We have parsed the first optional operand. 5105 // Parse as many operands as necessary to skip all mandatory operands. 5106 5107 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5108 if (res != MatchOperand_Success || 5109 getLexer().is(AsmToken::EndOfStatement)) break; 5110 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5111 res = parseOptionalOpr(Operands); 5112 } 5113 } 5114 5115 return res; 5116 } 5117 5118 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5119 OperandMatchResultTy res; 5120 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5121 // try to parse any optional operand here 5122 if (Op.IsBit) { 5123 res = parseNamedBit(Op.Name, Operands, Op.Type); 5124 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5125 res = parseOModOperand(Operands); 5126 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5127 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5128 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5129 res = parseSDWASel(Operands, Op.Name, Op.Type); 5130 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5131 res = parseSDWADstUnused(Operands); 5132 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5133 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5134 Op.Type == AMDGPUOperand::ImmTyNegLo || 5135 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5136 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5137 Op.ConvertResult); 5138 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5139 res = parseDfmtNfmt(Operands); 5140 } else { 5141 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5142 } 5143 if (res != MatchOperand_NoMatch) { 5144 return res; 5145 } 5146 } 5147 return MatchOperand_NoMatch; 5148 } 5149 5150 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5151 StringRef Name = Parser.getTok().getString(); 5152 if (Name == "mul") { 5153 return parseIntWithPrefix("mul", Operands, 5154 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5155 } 5156 5157 if (Name == "div") { 5158 return parseIntWithPrefix("div", Operands, 5159 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5160 } 5161 5162 return MatchOperand_NoMatch; 5163 } 5164 5165 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5166 cvtVOP3P(Inst, Operands); 5167 5168 int Opc = Inst.getOpcode(); 5169 5170 int SrcNum; 5171 const int Ops[] = { AMDGPU::OpName::src0, 5172 AMDGPU::OpName::src1, 5173 AMDGPU::OpName::src2 }; 5174 for (SrcNum = 0; 5175 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5176 ++SrcNum); 5177 assert(SrcNum > 0); 5178 5179 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5180 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5181 5182 if ((OpSel & (1 << SrcNum)) != 0) { 5183 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5184 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5185 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5186 } 5187 } 5188 5189 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5190 // 1. This operand is input modifiers 5191 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5192 // 2. This is not last operand 5193 && Desc.NumOperands > (OpNum + 1) 5194 // 3. Next operand is register class 5195 && Desc.OpInfo[OpNum + 1].RegClass != -1 5196 // 4. Next register is not tied to any other operand 5197 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5198 } 5199 5200 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5201 { 5202 OptionalImmIndexMap OptionalIdx; 5203 unsigned Opc = Inst.getOpcode(); 5204 5205 unsigned I = 1; 5206 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5207 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5208 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5209 } 5210 5211 for (unsigned E = Operands.size(); I != E; ++I) { 5212 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5213 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5214 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5215 } else if (Op.isInterpSlot() || 5216 Op.isInterpAttr() || 5217 Op.isAttrChan()) { 5218 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5219 } else if (Op.isImmModifier()) { 5220 OptionalIdx[Op.getImmTy()] = I; 5221 } else { 5222 llvm_unreachable("unhandled operand type"); 5223 } 5224 } 5225 5226 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5228 } 5229 5230 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5232 } 5233 5234 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5236 } 5237 } 5238 5239 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5240 OptionalImmIndexMap &OptionalIdx) { 5241 unsigned Opc = Inst.getOpcode(); 5242 5243 unsigned I = 1; 5244 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5245 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5246 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5247 } 5248 5249 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5250 // This instruction has src modifiers 5251 for (unsigned E = Operands.size(); I != E; ++I) { 5252 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5253 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5254 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5255 } else if (Op.isImmModifier()) { 5256 OptionalIdx[Op.getImmTy()] = I; 5257 } else if (Op.isRegOrImm()) { 5258 Op.addRegOrImmOperands(Inst, 1); 5259 } else { 5260 llvm_unreachable("unhandled operand type"); 5261 } 5262 } 5263 } else { 5264 // No src modifiers 5265 for (unsigned E = Operands.size(); I != E; ++I) { 5266 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5267 if (Op.isMod()) { 5268 OptionalIdx[Op.getImmTy()] = I; 5269 } else { 5270 Op.addRegOrImmOperands(Inst, 1); 5271 } 5272 } 5273 } 5274 5275 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5276 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5277 } 5278 5279 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5281 } 5282 5283 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5284 // it has src2 register operand that is tied to dst operand 5285 // we don't allow modifiers for this operand in assembler so src2_modifiers 5286 // should be 0. 5287 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5288 Opc == AMDGPU::V_MAC_F32_e64_vi || 5289 Opc == AMDGPU::V_MAC_F16_e64_vi || 5290 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5291 auto it = Inst.begin(); 5292 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5293 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5294 ++it; 5295 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5296 } 5297 } 5298 5299 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5300 OptionalImmIndexMap OptionalIdx; 5301 cvtVOP3(Inst, Operands, OptionalIdx); 5302 } 5303 5304 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5305 const OperandVector &Operands) { 5306 OptionalImmIndexMap OptIdx; 5307 const int Opc = Inst.getOpcode(); 5308 const MCInstrDesc &Desc = MII.get(Opc); 5309 5310 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5311 5312 cvtVOP3(Inst, Operands, OptIdx); 5313 5314 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5315 assert(!IsPacked); 5316 Inst.addOperand(Inst.getOperand(0)); 5317 } 5318 5319 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5320 // instruction, and then figure out where to actually put the modifiers 5321 5322 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5323 5324 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5325 if (OpSelHiIdx != -1) { 5326 int DefaultVal = IsPacked ? -1 : 0; 5327 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5328 DefaultVal); 5329 } 5330 5331 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5332 if (NegLoIdx != -1) { 5333 assert(IsPacked); 5334 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5335 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5336 } 5337 5338 const int Ops[] = { AMDGPU::OpName::src0, 5339 AMDGPU::OpName::src1, 5340 AMDGPU::OpName::src2 }; 5341 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5342 AMDGPU::OpName::src1_modifiers, 5343 AMDGPU::OpName::src2_modifiers }; 5344 5345 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5346 5347 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5348 unsigned OpSelHi = 0; 5349 unsigned NegLo = 0; 5350 unsigned NegHi = 0; 5351 5352 if (OpSelHiIdx != -1) { 5353 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5354 } 5355 5356 if (NegLoIdx != -1) { 5357 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5358 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5359 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5360 } 5361 5362 for (int J = 0; J < 3; ++J) { 5363 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5364 if (OpIdx == -1) 5365 break; 5366 5367 uint32_t ModVal = 0; 5368 5369 if ((OpSel & (1 << J)) != 0) 5370 ModVal |= SISrcMods::OP_SEL_0; 5371 5372 if ((OpSelHi & (1 << J)) != 0) 5373 ModVal |= SISrcMods::OP_SEL_1; 5374 5375 if ((NegLo & (1 << J)) != 0) 5376 ModVal |= SISrcMods::NEG; 5377 5378 if ((NegHi & (1 << J)) != 0) 5379 ModVal |= SISrcMods::NEG_HI; 5380 5381 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5382 5383 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5384 } 5385 } 5386 5387 //===----------------------------------------------------------------------===// 5388 // dpp 5389 //===----------------------------------------------------------------------===// 5390 5391 bool AMDGPUOperand::isDPPCtrl() const { 5392 using namespace AMDGPU::DPP; 5393 5394 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5395 if (result) { 5396 int64_t Imm = getImm(); 5397 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5398 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5399 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5400 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5401 (Imm == DppCtrl::WAVE_SHL1) || 5402 (Imm == DppCtrl::WAVE_ROL1) || 5403 (Imm == DppCtrl::WAVE_SHR1) || 5404 (Imm == DppCtrl::WAVE_ROR1) || 5405 (Imm == DppCtrl::ROW_MIRROR) || 5406 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5407 (Imm == DppCtrl::BCAST15) || 5408 (Imm == DppCtrl::BCAST31); 5409 } 5410 return false; 5411 } 5412 5413 bool AMDGPUOperand::isS16Imm() const { 5414 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5415 } 5416 5417 bool AMDGPUOperand::isU16Imm() const { 5418 return isImm() && isUInt<16>(getImm()); 5419 } 5420 5421 OperandMatchResultTy 5422 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5423 using namespace AMDGPU::DPP; 5424 5425 SMLoc S = Parser.getTok().getLoc(); 5426 StringRef Prefix; 5427 int64_t Int; 5428 5429 if (getLexer().getKind() == AsmToken::Identifier) { 5430 Prefix = Parser.getTok().getString(); 5431 } else { 5432 return MatchOperand_NoMatch; 5433 } 5434 5435 if (Prefix == "row_mirror") { 5436 Int = DppCtrl::ROW_MIRROR; 5437 Parser.Lex(); 5438 } else if (Prefix == "row_half_mirror") { 5439 Int = DppCtrl::ROW_HALF_MIRROR; 5440 Parser.Lex(); 5441 } else { 5442 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5443 if (Prefix != "quad_perm" 5444 && Prefix != "row_shl" 5445 && Prefix != "row_shr" 5446 && Prefix != "row_ror" 5447 && Prefix != "wave_shl" 5448 && Prefix != "wave_rol" 5449 && Prefix != "wave_shr" 5450 && Prefix != "wave_ror" 5451 && Prefix != "row_bcast") { 5452 return MatchOperand_NoMatch; 5453 } 5454 5455 Parser.Lex(); 5456 if (getLexer().isNot(AsmToken::Colon)) 5457 return MatchOperand_ParseFail; 5458 5459 if (Prefix == "quad_perm") { 5460 // quad_perm:[%d,%d,%d,%d] 5461 Parser.Lex(); 5462 if (getLexer().isNot(AsmToken::LBrac)) 5463 return MatchOperand_ParseFail; 5464 Parser.Lex(); 5465 5466 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5467 return MatchOperand_ParseFail; 5468 5469 for (int i = 0; i < 3; ++i) { 5470 if (getLexer().isNot(AsmToken::Comma)) 5471 return MatchOperand_ParseFail; 5472 Parser.Lex(); 5473 5474 int64_t Temp; 5475 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5476 return MatchOperand_ParseFail; 5477 const int shift = i*2 + 2; 5478 Int += (Temp << shift); 5479 } 5480 5481 if (getLexer().isNot(AsmToken::RBrac)) 5482 return MatchOperand_ParseFail; 5483 Parser.Lex(); 5484 } else { 5485 // sel:%d 5486 Parser.Lex(); 5487 if (getParser().parseAbsoluteExpression(Int)) 5488 return MatchOperand_ParseFail; 5489 5490 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5491 Int |= DppCtrl::ROW_SHL0; 5492 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5493 Int |= DppCtrl::ROW_SHR0; 5494 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5495 Int |= DppCtrl::ROW_ROR0; 5496 } else if (Prefix == "wave_shl" && 1 == Int) { 5497 Int = DppCtrl::WAVE_SHL1; 5498 } else if (Prefix == "wave_rol" && 1 == Int) { 5499 Int = DppCtrl::WAVE_ROL1; 5500 } else if (Prefix == "wave_shr" && 1 == Int) { 5501 Int = DppCtrl::WAVE_SHR1; 5502 } else if (Prefix == "wave_ror" && 1 == Int) { 5503 Int = DppCtrl::WAVE_ROR1; 5504 } else if (Prefix == "row_bcast") { 5505 if (Int == 15) { 5506 Int = DppCtrl::BCAST15; 5507 } else if (Int == 31) { 5508 Int = DppCtrl::BCAST31; 5509 } else { 5510 return MatchOperand_ParseFail; 5511 } 5512 } else { 5513 return MatchOperand_ParseFail; 5514 } 5515 } 5516 } 5517 5518 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5519 return MatchOperand_Success; 5520 } 5521 5522 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5523 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5524 } 5525 5526 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5527 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5528 } 5529 5530 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5531 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5532 } 5533 5534 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5535 OptionalImmIndexMap OptionalIdx; 5536 5537 unsigned I = 1; 5538 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5539 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5540 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5541 } 5542 5543 for (unsigned E = Operands.size(); I != E; ++I) { 5544 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5545 MCOI::TIED_TO); 5546 if (TiedTo != -1) { 5547 assert((unsigned)TiedTo < Inst.getNumOperands()); 5548 // handle tied old or src2 for MAC instructions 5549 Inst.addOperand(Inst.getOperand(TiedTo)); 5550 } 5551 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5552 // Add the register arguments 5553 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5554 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5555 // Skip it. 5556 continue; 5557 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5558 Op.addRegWithFPInputModsOperands(Inst, 2); 5559 } else if (Op.isDPPCtrl()) { 5560 Op.addImmOperands(Inst, 1); 5561 } else if (Op.isImm()) { 5562 // Handle optional arguments 5563 OptionalIdx[Op.getImmTy()] = I; 5564 } else { 5565 llvm_unreachable("Invalid operand type"); 5566 } 5567 } 5568 5569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5572 } 5573 5574 //===----------------------------------------------------------------------===// 5575 // sdwa 5576 //===----------------------------------------------------------------------===// 5577 5578 OperandMatchResultTy 5579 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5580 AMDGPUOperand::ImmTy Type) { 5581 using namespace llvm::AMDGPU::SDWA; 5582 5583 SMLoc S = Parser.getTok().getLoc(); 5584 StringRef Value; 5585 OperandMatchResultTy res; 5586 5587 res = parseStringWithPrefix(Prefix, Value); 5588 if (res != MatchOperand_Success) { 5589 return res; 5590 } 5591 5592 int64_t Int; 5593 Int = StringSwitch<int64_t>(Value) 5594 .Case("BYTE_0", SdwaSel::BYTE_0) 5595 .Case("BYTE_1", SdwaSel::BYTE_1) 5596 .Case("BYTE_2", SdwaSel::BYTE_2) 5597 .Case("BYTE_3", SdwaSel::BYTE_3) 5598 .Case("WORD_0", SdwaSel::WORD_0) 5599 .Case("WORD_1", SdwaSel::WORD_1) 5600 .Case("DWORD", SdwaSel::DWORD) 5601 .Default(0xffffffff); 5602 Parser.Lex(); // eat last token 5603 5604 if (Int == 0xffffffff) { 5605 return MatchOperand_ParseFail; 5606 } 5607 5608 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5609 return MatchOperand_Success; 5610 } 5611 5612 OperandMatchResultTy 5613 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5614 using namespace llvm::AMDGPU::SDWA; 5615 5616 SMLoc S = Parser.getTok().getLoc(); 5617 StringRef Value; 5618 OperandMatchResultTy res; 5619 5620 res = parseStringWithPrefix("dst_unused", Value); 5621 if (res != MatchOperand_Success) { 5622 return res; 5623 } 5624 5625 int64_t Int; 5626 Int = StringSwitch<int64_t>(Value) 5627 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5628 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5629 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5630 .Default(0xffffffff); 5631 Parser.Lex(); // eat last token 5632 5633 if (Int == 0xffffffff) { 5634 return MatchOperand_ParseFail; 5635 } 5636 5637 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5638 return MatchOperand_Success; 5639 } 5640 5641 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5642 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5643 } 5644 5645 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5646 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5647 } 5648 5649 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5650 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5651 } 5652 5653 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5654 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5655 } 5656 5657 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5658 uint64_t BasicInstType, bool skipVcc) { 5659 using namespace llvm::AMDGPU::SDWA; 5660 5661 OptionalImmIndexMap OptionalIdx; 5662 bool skippedVcc = false; 5663 5664 unsigned I = 1; 5665 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5666 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5667 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5668 } 5669 5670 for (unsigned E = Operands.size(); I != E; ++I) { 5671 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5672 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5673 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5674 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5675 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5676 // Skip VCC only if we didn't skip it on previous iteration. 5677 if (BasicInstType == SIInstrFlags::VOP2 && 5678 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5679 skippedVcc = true; 5680 continue; 5681 } else if (BasicInstType == SIInstrFlags::VOPC && 5682 Inst.getNumOperands() == 0) { 5683 skippedVcc = true; 5684 continue; 5685 } 5686 } 5687 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5688 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5689 } else if (Op.isImm()) { 5690 // Handle optional arguments 5691 OptionalIdx[Op.getImmTy()] = I; 5692 } else { 5693 llvm_unreachable("Invalid operand type"); 5694 } 5695 skippedVcc = false; 5696 } 5697 5698 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5699 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5700 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5701 switch (BasicInstType) { 5702 case SIInstrFlags::VOP1: 5703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5704 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5706 } 5707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5710 break; 5711 5712 case SIInstrFlags::VOP2: 5713 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5714 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5716 } 5717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5721 break; 5722 5723 case SIInstrFlags::VOPC: 5724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5727 break; 5728 5729 default: 5730 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5731 } 5732 } 5733 5734 // special case v_mac_{f16, f32}: 5735 // it has src2 register operand that is tied to dst operand 5736 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5737 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5738 auto it = Inst.begin(); 5739 std::advance( 5740 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5741 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5742 } 5743 } 5744 5745 /// Force static initialization. 5746 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5747 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5748 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5749 } 5750 5751 #define GET_REGISTER_MATCHER 5752 #define GET_MATCHER_IMPLEMENTATION 5753 #define GET_MNEMONIC_SPELL_CHECKER 5754 #include "AMDGPUGenAsmMatcher.inc" 5755 5756 // This fuction should be defined after auto-generated include so that we have 5757 // MatchClassKind enum defined 5758 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5759 unsigned Kind) { 5760 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5761 // But MatchInstructionImpl() expects to meet token and fails to validate 5762 // operand. This method checks if we are given immediate operand but expect to 5763 // get corresponding token. 5764 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5765 switch (Kind) { 5766 case MCK_addr64: 5767 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5768 case MCK_gds: 5769 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5770 case MCK_lds: 5771 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5772 case MCK_glc: 5773 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5774 case MCK_idxen: 5775 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5776 case MCK_offen: 5777 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5778 case MCK_SSrcB32: 5779 // When operands have expression values, they will return true for isToken, 5780 // because it is not possible to distinguish between a token and an 5781 // expression at parse time. MatchInstructionImpl() will always try to 5782 // match an operand as a token, when isToken returns true, and when the 5783 // name of the expression is not a valid token, the match will fail, 5784 // so we need to handle it here. 5785 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5786 case MCK_SSrcF32: 5787 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5788 case MCK_SoppBrTarget: 5789 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5790 case MCK_VReg32OrOff: 5791 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5792 case MCK_InterpSlot: 5793 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5794 case MCK_Attr: 5795 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5796 case MCK_AttrChan: 5797 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5798 default: 5799 return Match_InvalidOperand; 5800 } 5801 } 5802