1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyEndpgm, 183 ImmTyHigh 184 }; 185 186 private: 187 struct TokOp { 188 const char *Data; 189 unsigned Length; 190 }; 191 192 struct ImmOp { 193 int64_t Val; 194 ImmTy Type; 195 bool IsFPImm; 196 Modifiers Mods; 197 }; 198 199 struct RegOp { 200 unsigned RegNo; 201 Modifiers Mods; 202 }; 203 204 union { 205 TokOp Tok; 206 ImmOp Imm; 207 RegOp Reg; 208 const MCExpr *Expr; 209 }; 210 211 public: 212 bool isToken() const override { 213 if (Kind == Token) 214 return true; 215 216 if (Kind != Expression || !Expr) 217 return false; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isa<MCSymbolRefExpr>(Expr); 224 } 225 226 bool isImm() const override { 227 return Kind == Immediate; 228 } 229 230 bool isInlinableImm(MVT type) const; 231 bool isLiteralImm(MVT type) const; 232 233 bool isRegKind() const { 234 return Kind == Register; 235 } 236 237 bool isReg() const override { 238 return isRegKind() && !hasModifiers(); 239 } 240 241 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 242 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 243 } 244 245 bool isRegOrImmWithInt16InputMods() const { 246 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 247 } 248 249 bool isRegOrImmWithInt32InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 251 } 252 253 bool isRegOrImmWithInt64InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 255 } 256 257 bool isRegOrImmWithFP16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 259 } 260 261 bool isRegOrImmWithFP32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 263 } 264 265 bool isRegOrImmWithFP64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 267 } 268 269 bool isVReg() const { 270 return isRegClass(AMDGPU::VGPR_32RegClassID) || 271 isRegClass(AMDGPU::VReg_64RegClassID) || 272 isRegClass(AMDGPU::VReg_96RegClassID) || 273 isRegClass(AMDGPU::VReg_128RegClassID) || 274 isRegClass(AMDGPU::VReg_256RegClassID) || 275 isRegClass(AMDGPU::VReg_512RegClassID); 276 } 277 278 bool isVReg32() const { 279 return isRegClass(AMDGPU::VGPR_32RegClassID); 280 } 281 282 bool isVReg32OrOff() const { 283 return isOff() || isVReg32(); 284 } 285 286 bool isSDWAOperand(MVT type) const; 287 bool isSDWAFP16Operand() const; 288 bool isSDWAFP32Operand() const; 289 bool isSDWAInt16Operand() const; 290 bool isSDWAInt32Operand() const; 291 292 bool isImmTy(ImmTy ImmT) const { 293 return isImm() && Imm.Type == ImmT; 294 } 295 296 bool isImmModifier() const { 297 return isImm() && Imm.Type != ImmTyNone; 298 } 299 300 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 301 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 302 bool isDMask() const { return isImmTy(ImmTyDMask); } 303 bool isDim() const { return isImmTy(ImmTyDim); } 304 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 305 bool isDA() const { return isImmTy(ImmTyDA); } 306 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 307 bool isLWE() const { return isImmTy(ImmTyLWE); } 308 bool isOff() const { return isImmTy(ImmTyOff); } 309 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 310 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 311 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 312 bool isOffen() const { return isImmTy(ImmTyOffen); } 313 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 314 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 315 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 316 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 317 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 318 319 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 320 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 321 bool isGDS() const { return isImmTy(ImmTyGDS); } 322 bool isLDS() const { return isImmTy(ImmTyLDS); } 323 bool isDLC() const { return isImmTy(ImmTyDLC); } 324 bool isGLC() const { return isImmTy(ImmTyGLC); } 325 bool isSLC() const { return isImmTy(ImmTySLC); } 326 bool isTFE() const { return isImmTy(ImmTyTFE); } 327 bool isD16() const { return isImmTy(ImmTyD16); } 328 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 329 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 330 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 331 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 332 bool isFI() const { return isImmTy(ImmTyDppFi); } 333 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 334 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 335 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 336 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 337 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 338 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 339 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 340 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 341 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 342 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 343 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 344 bool isHigh() const { return isImmTy(ImmTyHigh); } 345 346 bool isMod() const { 347 return isClampSI() || isOModSI(); 348 } 349 350 bool isRegOrImm() const { 351 return isReg() || isImm(); 352 } 353 354 bool isRegClass(unsigned RCID) const; 355 356 bool isInlineValue() const; 357 358 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 359 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 360 } 361 362 bool isSCSrcB16() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 364 } 365 366 bool isSCSrcV2B16() const { 367 return isSCSrcB16(); 368 } 369 370 bool isSCSrcB32() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 372 } 373 374 bool isSCSrcB64() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 376 } 377 378 bool isBoolReg() const; 379 380 bool isSCSrcF16() const { 381 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 382 } 383 384 bool isSCSrcV2F16() const { 385 return isSCSrcF16(); 386 } 387 388 bool isSCSrcF32() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 390 } 391 392 bool isSCSrcF64() const { 393 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 394 } 395 396 bool isSSrcB32() const { 397 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 398 } 399 400 bool isSSrcB16() const { 401 return isSCSrcB16() || isLiteralImm(MVT::i16); 402 } 403 404 bool isSSrcV2B16() const { 405 llvm_unreachable("cannot happen"); 406 return isSSrcB16(); 407 } 408 409 bool isSSrcB64() const { 410 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 411 // See isVSrc64(). 412 return isSCSrcB64() || isLiteralImm(MVT::i64); 413 } 414 415 bool isSSrcF32() const { 416 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 417 } 418 419 bool isSSrcF64() const { 420 return isSCSrcB64() || isLiteralImm(MVT::f64); 421 } 422 423 bool isSSrcF16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::f16); 425 } 426 427 bool isSSrcV2F16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcF16(); 430 } 431 432 bool isSSrcOrLdsB32() const { 433 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 434 isLiteralImm(MVT::i32) || isExpr(); 435 } 436 437 bool isVCSrcB32() const { 438 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 439 } 440 441 bool isVCSrcB64() const { 442 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 443 } 444 445 bool isVCSrcB16() const { 446 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 447 } 448 449 bool isVCSrcV2B16() const { 450 return isVCSrcB16(); 451 } 452 453 bool isVCSrcF32() const { 454 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 455 } 456 457 bool isVCSrcF64() const { 458 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 459 } 460 461 bool isVCSrcF16() const { 462 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 463 } 464 465 bool isVCSrcV2F16() const { 466 return isVCSrcF16(); 467 } 468 469 bool isVSrcB32() const { 470 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVSrcB64() const { 474 return isVCSrcF64() || isLiteralImm(MVT::i64); 475 } 476 477 bool isVSrcB16() const { 478 return isVCSrcF16() || isLiteralImm(MVT::i16); 479 } 480 481 bool isVSrcV2B16() const { 482 return isVSrcB16() || isLiteralImm(MVT::v2i16); 483 } 484 485 bool isVSrcF32() const { 486 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 487 } 488 489 bool isVSrcF64() const { 490 return isVCSrcF64() || isLiteralImm(MVT::f64); 491 } 492 493 bool isVSrcF16() const { 494 return isVCSrcF16() || isLiteralImm(MVT::f16); 495 } 496 497 bool isVSrcV2F16() const { 498 return isVSrcF16() || isLiteralImm(MVT::v2f16); 499 } 500 501 bool isKImmFP32() const { 502 return isLiteralImm(MVT::f32); 503 } 504 505 bool isKImmFP16() const { 506 return isLiteralImm(MVT::f16); 507 } 508 509 bool isMem() const override { 510 return false; 511 } 512 513 bool isExpr() const { 514 return Kind == Expression; 515 } 516 517 bool isSoppBrTarget() const { 518 return isExpr() || isImm(); 519 } 520 521 bool isSWaitCnt() const; 522 bool isHwreg() const; 523 bool isSendMsg() const; 524 bool isSwizzle() const; 525 bool isSMRDOffset8() const; 526 bool isSMRDOffset20() const; 527 bool isSMRDLiteralOffset() const; 528 bool isDPP8() const; 529 bool isDPPCtrl() const; 530 bool isGPRIdxMode() const; 531 bool isS16Imm() const; 532 bool isU16Imm() const; 533 bool isEndpgm() const; 534 535 StringRef getExpressionAsToken() const { 536 assert(isExpr()); 537 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 538 return S->getSymbol().getName(); 539 } 540 541 StringRef getToken() const { 542 assert(isToken()); 543 544 if (Kind == Expression) 545 return getExpressionAsToken(); 546 547 return StringRef(Tok.Data, Tok.Length); 548 } 549 550 int64_t getImm() const { 551 assert(isImm()); 552 return Imm.Val; 553 } 554 555 ImmTy getImmTy() const { 556 assert(isImm()); 557 return Imm.Type; 558 } 559 560 unsigned getReg() const override { 561 assert(isRegKind()); 562 return Reg.RegNo; 563 } 564 565 SMLoc getStartLoc() const override { 566 return StartLoc; 567 } 568 569 SMLoc getEndLoc() const override { 570 return EndLoc; 571 } 572 573 SMRange getLocRange() const { 574 return SMRange(StartLoc, EndLoc); 575 } 576 577 Modifiers getModifiers() const { 578 assert(isRegKind() || isImmTy(ImmTyNone)); 579 return isRegKind() ? Reg.Mods : Imm.Mods; 580 } 581 582 void setModifiers(Modifiers Mods) { 583 assert(isRegKind() || isImmTy(ImmTyNone)); 584 if (isRegKind()) 585 Reg.Mods = Mods; 586 else 587 Imm.Mods = Mods; 588 } 589 590 bool hasModifiers() const { 591 return getModifiers().hasModifiers(); 592 } 593 594 bool hasFPModifiers() const { 595 return getModifiers().hasFPModifiers(); 596 } 597 598 bool hasIntModifiers() const { 599 return getModifiers().hasIntModifiers(); 600 } 601 602 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 603 604 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 605 606 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 607 608 template <unsigned Bitwidth> 609 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 610 611 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 612 addKImmFPOperands<16>(Inst, N); 613 } 614 615 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 616 addKImmFPOperands<32>(Inst, N); 617 } 618 619 void addRegOperands(MCInst &Inst, unsigned N) const; 620 621 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 622 addRegOperands(Inst, N); 623 } 624 625 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 626 if (isRegKind()) 627 addRegOperands(Inst, N); 628 else if (isExpr()) 629 Inst.addOperand(MCOperand::createExpr(Expr)); 630 else 631 addImmOperands(Inst, N); 632 } 633 634 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 635 Modifiers Mods = getModifiers(); 636 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 637 if (isRegKind()) { 638 addRegOperands(Inst, N); 639 } else { 640 addImmOperands(Inst, N, false); 641 } 642 } 643 644 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 645 assert(!hasIntModifiers()); 646 addRegOrImmWithInputModsOperands(Inst, N); 647 } 648 649 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 650 assert(!hasFPModifiers()); 651 addRegOrImmWithInputModsOperands(Inst, N); 652 } 653 654 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 655 Modifiers Mods = getModifiers(); 656 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 657 assert(isRegKind()); 658 addRegOperands(Inst, N); 659 } 660 661 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 662 assert(!hasIntModifiers()); 663 addRegWithInputModsOperands(Inst, N); 664 } 665 666 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 667 assert(!hasFPModifiers()); 668 addRegWithInputModsOperands(Inst, N); 669 } 670 671 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 672 if (isImm()) 673 addImmOperands(Inst, N); 674 else { 675 assert(isExpr()); 676 Inst.addOperand(MCOperand::createExpr(Expr)); 677 } 678 } 679 680 static void printImmTy(raw_ostream& OS, ImmTy Type) { 681 switch (Type) { 682 case ImmTyNone: OS << "None"; break; 683 case ImmTyGDS: OS << "GDS"; break; 684 case ImmTyLDS: OS << "LDS"; break; 685 case ImmTyOffen: OS << "Offen"; break; 686 case ImmTyIdxen: OS << "Idxen"; break; 687 case ImmTyAddr64: OS << "Addr64"; break; 688 case ImmTyOffset: OS << "Offset"; break; 689 case ImmTyInstOffset: OS << "InstOffset"; break; 690 case ImmTyOffset0: OS << "Offset0"; break; 691 case ImmTyOffset1: OS << "Offset1"; break; 692 case ImmTyDLC: OS << "DLC"; break; 693 case ImmTyGLC: OS << "GLC"; break; 694 case ImmTySLC: OS << "SLC"; break; 695 case ImmTyTFE: OS << "TFE"; break; 696 case ImmTyD16: OS << "D16"; break; 697 case ImmTyFORMAT: OS << "FORMAT"; break; 698 case ImmTyClampSI: OS << "ClampSI"; break; 699 case ImmTyOModSI: OS << "OModSI"; break; 700 case ImmTyDPP8: OS << "DPP8"; break; 701 case ImmTyDppCtrl: OS << "DppCtrl"; break; 702 case ImmTyDppRowMask: OS << "DppRowMask"; break; 703 case ImmTyDppBankMask: OS << "DppBankMask"; break; 704 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 705 case ImmTyDppFi: OS << "FI"; break; 706 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 707 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 708 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 709 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 710 case ImmTyDMask: OS << "DMask"; break; 711 case ImmTyDim: OS << "Dim"; break; 712 case ImmTyUNorm: OS << "UNorm"; break; 713 case ImmTyDA: OS << "DA"; break; 714 case ImmTyR128A16: OS << "R128A16"; break; 715 case ImmTyLWE: OS << "LWE"; break; 716 case ImmTyOff: OS << "Off"; break; 717 case ImmTyExpTgt: OS << "ExpTgt"; break; 718 case ImmTyExpCompr: OS << "ExpCompr"; break; 719 case ImmTyExpVM: OS << "ExpVM"; break; 720 case ImmTyHwreg: OS << "Hwreg"; break; 721 case ImmTySendMsg: OS << "SendMsg"; break; 722 case ImmTyInterpSlot: OS << "InterpSlot"; break; 723 case ImmTyInterpAttr: OS << "InterpAttr"; break; 724 case ImmTyAttrChan: OS << "AttrChan"; break; 725 case ImmTyOpSel: OS << "OpSel"; break; 726 case ImmTyOpSelHi: OS << "OpSelHi"; break; 727 case ImmTyNegLo: OS << "NegLo"; break; 728 case ImmTyNegHi: OS << "NegHi"; break; 729 case ImmTySwizzle: OS << "Swizzle"; break; 730 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 731 case ImmTyHigh: OS << "High"; break; 732 case ImmTyEndpgm: 733 OS << "Endpgm"; 734 break; 735 } 736 } 737 738 void print(raw_ostream &OS) const override { 739 switch (Kind) { 740 case Register: 741 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 742 break; 743 case Immediate: 744 OS << '<' << getImm(); 745 if (getImmTy() != ImmTyNone) { 746 OS << " type: "; printImmTy(OS, getImmTy()); 747 } 748 OS << " mods: " << Imm.Mods << '>'; 749 break; 750 case Token: 751 OS << '\'' << getToken() << '\''; 752 break; 753 case Expression: 754 OS << "<expr " << *Expr << '>'; 755 break; 756 } 757 } 758 759 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 760 int64_t Val, SMLoc Loc, 761 ImmTy Type = ImmTyNone, 762 bool IsFPImm = false) { 763 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 764 Op->Imm.Val = Val; 765 Op->Imm.IsFPImm = IsFPImm; 766 Op->Imm.Type = Type; 767 Op->Imm.Mods = Modifiers(); 768 Op->StartLoc = Loc; 769 Op->EndLoc = Loc; 770 return Op; 771 } 772 773 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 774 StringRef Str, SMLoc Loc, 775 bool HasExplicitEncodingSize = true) { 776 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 777 Res->Tok.Data = Str.data(); 778 Res->Tok.Length = Str.size(); 779 Res->StartLoc = Loc; 780 Res->EndLoc = Loc; 781 return Res; 782 } 783 784 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 785 unsigned RegNo, SMLoc S, 786 SMLoc E) { 787 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 788 Op->Reg.RegNo = RegNo; 789 Op->Reg.Mods = Modifiers(); 790 Op->StartLoc = S; 791 Op->EndLoc = E; 792 return Op; 793 } 794 795 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 796 const class MCExpr *Expr, SMLoc S) { 797 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 798 Op->Expr = Expr; 799 Op->StartLoc = S; 800 Op->EndLoc = S; 801 return Op; 802 } 803 }; 804 805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 806 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 807 return OS; 808 } 809 810 //===----------------------------------------------------------------------===// 811 // AsmParser 812 //===----------------------------------------------------------------------===// 813 814 // Holds info related to the current kernel, e.g. count of SGPRs used. 815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 816 // .amdgpu_hsa_kernel or at EOF. 817 class KernelScopeInfo { 818 int SgprIndexUnusedMin = -1; 819 int VgprIndexUnusedMin = -1; 820 MCContext *Ctx = nullptr; 821 822 void usesSgprAt(int i) { 823 if (i >= SgprIndexUnusedMin) { 824 SgprIndexUnusedMin = ++i; 825 if (Ctx) { 826 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 827 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 828 } 829 } 830 } 831 832 void usesVgprAt(int i) { 833 if (i >= VgprIndexUnusedMin) { 834 VgprIndexUnusedMin = ++i; 835 if (Ctx) { 836 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 837 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 838 } 839 } 840 } 841 842 public: 843 KernelScopeInfo() = default; 844 845 void initialize(MCContext &Context) { 846 Ctx = &Context; 847 usesSgprAt(SgprIndexUnusedMin = -1); 848 usesVgprAt(VgprIndexUnusedMin = -1); 849 } 850 851 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 852 switch (RegKind) { 853 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 854 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 855 default: break; 856 } 857 } 858 }; 859 860 class AMDGPUAsmParser : public MCTargetAsmParser { 861 MCAsmParser &Parser; 862 863 // Number of extra operands parsed after the first optional operand. 864 // This may be necessary to skip hardcoded mandatory operands. 865 static const unsigned MAX_OPR_LOOKAHEAD = 8; 866 867 unsigned ForcedEncodingSize = 0; 868 bool ForcedDPP = false; 869 bool ForcedSDWA = false; 870 KernelScopeInfo KernelScope; 871 872 /// @name Auto-generated Match Functions 873 /// { 874 875 #define GET_ASSEMBLER_HEADER 876 #include "AMDGPUGenAsmMatcher.inc" 877 878 /// } 879 880 private: 881 bool ParseAsAbsoluteExpression(uint32_t &Ret); 882 bool OutOfRangeError(SMRange Range); 883 /// Calculate VGPR/SGPR blocks required for given target, reserved 884 /// registers, and user-specified NextFreeXGPR values. 885 /// 886 /// \param Features [in] Target features, used for bug corrections. 887 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 888 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 889 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 890 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 891 /// descriptor field, if valid. 892 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 893 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 894 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 895 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 896 /// \param VGPRBlocks [out] Result VGPR block count. 897 /// \param SGPRBlocks [out] Result SGPR block count. 898 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 899 bool FlatScrUsed, bool XNACKUsed, 900 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 901 SMRange VGPRRange, unsigned NextFreeSGPR, 902 SMRange SGPRRange, unsigned &VGPRBlocks, 903 unsigned &SGPRBlocks); 904 bool ParseDirectiveAMDGCNTarget(); 905 bool ParseDirectiveAMDHSAKernel(); 906 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 907 bool ParseDirectiveHSACodeObjectVersion(); 908 bool ParseDirectiveHSACodeObjectISA(); 909 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 910 bool ParseDirectiveAMDKernelCodeT(); 911 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 912 bool ParseDirectiveAMDGPUHsaKernel(); 913 914 bool ParseDirectiveISAVersion(); 915 bool ParseDirectiveHSAMetadata(); 916 bool ParseDirectivePALMetadataBegin(); 917 bool ParseDirectivePALMetadata(); 918 bool ParseDirectiveAMDGPULDS(); 919 920 /// Common code to parse out a block of text (typically YAML) between start and 921 /// end directives. 922 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 923 const char *AssemblerDirectiveEnd, 924 std::string &CollectString); 925 926 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 927 RegisterKind RegKind, unsigned Reg1, 928 unsigned RegNum); 929 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 930 unsigned& RegNum, unsigned& RegWidth, 931 unsigned *DwordRegIndex); 932 bool isRegister(); 933 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 934 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 935 void initializeGprCountSymbol(RegisterKind RegKind); 936 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 937 unsigned RegWidth); 938 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 939 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 940 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 941 bool IsGdsHardcoded); 942 943 public: 944 enum AMDGPUMatchResultTy { 945 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 946 }; 947 enum OperandMode { 948 OperandMode_Default, 949 OperandMode_NSA, 950 }; 951 952 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 953 954 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 955 const MCInstrInfo &MII, 956 const MCTargetOptions &Options) 957 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 958 MCAsmParserExtension::Initialize(Parser); 959 960 if (getFeatureBits().none()) { 961 // Set default features. 962 copySTI().ToggleFeature("southern-islands"); 963 } 964 965 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 966 967 { 968 // TODO: make those pre-defined variables read-only. 969 // Currently there is none suitable machinery in the core llvm-mc for this. 970 // MCSymbol::isRedefinable is intended for another purpose, and 971 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 972 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 973 MCContext &Ctx = getContext(); 974 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 975 MCSymbol *Sym = 976 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 977 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 978 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 979 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 980 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 981 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 982 } else { 983 MCSymbol *Sym = 984 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 985 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 986 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 987 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 988 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 989 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 990 } 991 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 992 initializeGprCountSymbol(IS_VGPR); 993 initializeGprCountSymbol(IS_SGPR); 994 } else 995 KernelScope.initialize(getContext()); 996 } 997 } 998 999 bool hasXNACK() const { 1000 return AMDGPU::hasXNACK(getSTI()); 1001 } 1002 1003 bool hasMIMG_R128() const { 1004 return AMDGPU::hasMIMG_R128(getSTI()); 1005 } 1006 1007 bool hasPackedD16() const { 1008 return AMDGPU::hasPackedD16(getSTI()); 1009 } 1010 1011 bool isSI() const { 1012 return AMDGPU::isSI(getSTI()); 1013 } 1014 1015 bool isCI() const { 1016 return AMDGPU::isCI(getSTI()); 1017 } 1018 1019 bool isVI() const { 1020 return AMDGPU::isVI(getSTI()); 1021 } 1022 1023 bool isGFX9() const { 1024 return AMDGPU::isGFX9(getSTI()); 1025 } 1026 1027 bool isGFX10() const { 1028 return AMDGPU::isGFX10(getSTI()); 1029 } 1030 1031 bool hasInv2PiInlineImm() const { 1032 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1033 } 1034 1035 bool hasFlatOffsets() const { 1036 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1037 } 1038 1039 bool hasSGPR102_SGPR103() const { 1040 return !isVI() && !isGFX9(); 1041 } 1042 1043 bool hasSGPR104_SGPR105() const { 1044 return isGFX10(); 1045 } 1046 1047 bool hasIntClamp() const { 1048 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1049 } 1050 1051 AMDGPUTargetStreamer &getTargetStreamer() { 1052 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1053 return static_cast<AMDGPUTargetStreamer &>(TS); 1054 } 1055 1056 const MCRegisterInfo *getMRI() const { 1057 // We need this const_cast because for some reason getContext() is not const 1058 // in MCAsmParser. 1059 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1060 } 1061 1062 const MCInstrInfo *getMII() const { 1063 return &MII; 1064 } 1065 1066 const FeatureBitset &getFeatureBits() const { 1067 return getSTI().getFeatureBits(); 1068 } 1069 1070 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1071 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1072 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1073 1074 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1075 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1076 bool isForcedDPP() const { return ForcedDPP; } 1077 bool isForcedSDWA() const { return ForcedSDWA; } 1078 ArrayRef<unsigned> getMatchedVariants() const; 1079 1080 std::unique_ptr<AMDGPUOperand> parseRegister(); 1081 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1082 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1083 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1084 unsigned Kind) override; 1085 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1086 OperandVector &Operands, MCStreamer &Out, 1087 uint64_t &ErrorInfo, 1088 bool MatchingInlineAsm) override; 1089 bool ParseDirective(AsmToken DirectiveID) override; 1090 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1091 OperandMode Mode = OperandMode_Default); 1092 StringRef parseMnemonicSuffix(StringRef Name); 1093 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1094 SMLoc NameLoc, OperandVector &Operands) override; 1095 //bool ProcessInstruction(MCInst &Inst); 1096 1097 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1098 1099 OperandMatchResultTy 1100 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1101 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1102 bool (*ConvertResult)(int64_t &) = nullptr); 1103 1104 OperandMatchResultTy 1105 parseOperandArrayWithPrefix(const char *Prefix, 1106 OperandVector &Operands, 1107 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1108 bool (*ConvertResult)(int64_t&) = nullptr); 1109 1110 OperandMatchResultTy 1111 parseNamedBit(const char *Name, OperandVector &Operands, 1112 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1113 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1114 StringRef &Value); 1115 1116 bool isModifier(); 1117 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1118 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1119 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1120 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1121 bool parseSP3NegModifier(); 1122 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1123 OperandMatchResultTy parseReg(OperandVector &Operands); 1124 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1125 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1126 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1127 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1128 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1129 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1130 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1131 1132 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1133 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1134 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1135 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1136 1137 bool parseCnt(int64_t &IntVal); 1138 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1139 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1140 1141 private: 1142 struct OperandInfoTy { 1143 int64_t Id; 1144 bool IsSymbolic = false; 1145 1146 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1147 }; 1148 1149 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1150 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1151 void validateHwreg(const OperandInfoTy &HwReg, 1152 const int64_t Offset, 1153 const int64_t Width, 1154 const SMLoc Loc); 1155 1156 void errorExpTgt(); 1157 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1158 1159 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1160 bool validateSOPLiteral(const MCInst &Inst) const; 1161 bool validateConstantBusLimitations(const MCInst &Inst); 1162 bool validateEarlyClobberLimitations(const MCInst &Inst); 1163 bool validateIntClampSupported(const MCInst &Inst); 1164 bool validateMIMGAtomicDMask(const MCInst &Inst); 1165 bool validateMIMGGatherDMask(const MCInst &Inst); 1166 bool validateMIMGDataSize(const MCInst &Inst); 1167 bool validateMIMGAddrSize(const MCInst &Inst); 1168 bool validateMIMGD16(const MCInst &Inst); 1169 bool validateMIMGDim(const MCInst &Inst); 1170 bool validateLdsDirect(const MCInst &Inst); 1171 bool validateOpSel(const MCInst &Inst); 1172 bool validateVccOperand(unsigned Reg) const; 1173 bool validateVOP3Literal(const MCInst &Inst) const; 1174 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1175 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1176 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1177 1178 bool isId(const StringRef Id) const; 1179 bool isId(const AsmToken &Token, const StringRef Id) const; 1180 bool isToken(const AsmToken::TokenKind Kind) const; 1181 bool trySkipId(const StringRef Id); 1182 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1183 bool trySkipToken(const AsmToken::TokenKind Kind); 1184 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1185 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1186 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1187 AsmToken::TokenKind getTokenKind() const; 1188 bool parseExpr(int64_t &Imm); 1189 StringRef getTokenStr() const; 1190 AsmToken peekToken(); 1191 AsmToken getToken() const; 1192 SMLoc getLoc() const; 1193 void lex(); 1194 1195 public: 1196 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1197 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1198 1199 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1200 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1201 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1202 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1203 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1204 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1205 1206 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1207 const unsigned MinVal, 1208 const unsigned MaxVal, 1209 const StringRef ErrMsg); 1210 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1211 bool parseSwizzleOffset(int64_t &Imm); 1212 bool parseSwizzleMacro(int64_t &Imm); 1213 bool parseSwizzleQuadPerm(int64_t &Imm); 1214 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1215 bool parseSwizzleBroadcast(int64_t &Imm); 1216 bool parseSwizzleSwap(int64_t &Imm); 1217 bool parseSwizzleReverse(int64_t &Imm); 1218 1219 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1220 int64_t parseGPRIdxMacro(); 1221 1222 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1223 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1224 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1225 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1226 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1227 1228 AMDGPUOperand::Ptr defaultDLC() const; 1229 AMDGPUOperand::Ptr defaultGLC() const; 1230 AMDGPUOperand::Ptr defaultSLC() const; 1231 1232 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1233 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1234 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1235 AMDGPUOperand::Ptr defaultOffsetU12() const; 1236 AMDGPUOperand::Ptr defaultOffsetS13() const; 1237 1238 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1239 1240 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1241 OptionalImmIndexMap &OptionalIdx); 1242 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1243 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1244 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1245 1246 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1247 1248 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1249 bool IsAtomic = false); 1250 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1251 1252 OperandMatchResultTy parseDim(OperandVector &Operands); 1253 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1254 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1255 AMDGPUOperand::Ptr defaultRowMask() const; 1256 AMDGPUOperand::Ptr defaultBankMask() const; 1257 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1258 AMDGPUOperand::Ptr defaultFI() const; 1259 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1260 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1261 1262 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1263 AMDGPUOperand::ImmTy Type); 1264 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1265 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1266 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1267 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1268 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1269 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1270 uint64_t BasicInstType, bool skipVcc = false); 1271 1272 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1273 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1274 }; 1275 1276 struct OptionalOperand { 1277 const char *Name; 1278 AMDGPUOperand::ImmTy Type; 1279 bool IsBit; 1280 bool (*ConvertResult)(int64_t&); 1281 }; 1282 1283 } // end anonymous namespace 1284 1285 // May be called with integer type with equivalent bitwidth. 1286 static const fltSemantics *getFltSemantics(unsigned Size) { 1287 switch (Size) { 1288 case 4: 1289 return &APFloat::IEEEsingle(); 1290 case 8: 1291 return &APFloat::IEEEdouble(); 1292 case 2: 1293 return &APFloat::IEEEhalf(); 1294 default: 1295 llvm_unreachable("unsupported fp type"); 1296 } 1297 } 1298 1299 static const fltSemantics *getFltSemantics(MVT VT) { 1300 return getFltSemantics(VT.getSizeInBits() / 8); 1301 } 1302 1303 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1304 switch (OperandType) { 1305 case AMDGPU::OPERAND_REG_IMM_INT32: 1306 case AMDGPU::OPERAND_REG_IMM_FP32: 1307 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1308 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1309 return &APFloat::IEEEsingle(); 1310 case AMDGPU::OPERAND_REG_IMM_INT64: 1311 case AMDGPU::OPERAND_REG_IMM_FP64: 1312 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1313 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1314 return &APFloat::IEEEdouble(); 1315 case AMDGPU::OPERAND_REG_IMM_INT16: 1316 case AMDGPU::OPERAND_REG_IMM_FP16: 1317 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1318 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1319 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1320 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1321 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1322 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1323 return &APFloat::IEEEhalf(); 1324 default: 1325 llvm_unreachable("unsupported fp type"); 1326 } 1327 } 1328 1329 //===----------------------------------------------------------------------===// 1330 // Operand 1331 //===----------------------------------------------------------------------===// 1332 1333 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1334 bool Lost; 1335 1336 // Convert literal to single precision 1337 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1338 APFloat::rmNearestTiesToEven, 1339 &Lost); 1340 // We allow precision lost but not overflow or underflow 1341 if (Status != APFloat::opOK && 1342 Lost && 1343 ((Status & APFloat::opOverflow) != 0 || 1344 (Status & APFloat::opUnderflow) != 0)) { 1345 return false; 1346 } 1347 1348 return true; 1349 } 1350 1351 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1352 return isUIntN(Size, Val) || isIntN(Size, Val); 1353 } 1354 1355 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1356 1357 // This is a hack to enable named inline values like 1358 // shared_base with both 32-bit and 64-bit operands. 1359 // Note that these values are defined as 1360 // 32-bit operands only. 1361 if (isInlineValue()) { 1362 return true; 1363 } 1364 1365 if (!isImmTy(ImmTyNone)) { 1366 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1367 return false; 1368 } 1369 // TODO: We should avoid using host float here. It would be better to 1370 // check the float bit values which is what a few other places do. 1371 // We've had bot failures before due to weird NaN support on mips hosts. 1372 1373 APInt Literal(64, Imm.Val); 1374 1375 if (Imm.IsFPImm) { // We got fp literal token 1376 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1377 return AMDGPU::isInlinableLiteral64(Imm.Val, 1378 AsmParser->hasInv2PiInlineImm()); 1379 } 1380 1381 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1382 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1383 return false; 1384 1385 if (type.getScalarSizeInBits() == 16) { 1386 return AMDGPU::isInlinableLiteral16( 1387 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1388 AsmParser->hasInv2PiInlineImm()); 1389 } 1390 1391 // Check if single precision literal is inlinable 1392 return AMDGPU::isInlinableLiteral32( 1393 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1394 AsmParser->hasInv2PiInlineImm()); 1395 } 1396 1397 // We got int literal token. 1398 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1399 return AMDGPU::isInlinableLiteral64(Imm.Val, 1400 AsmParser->hasInv2PiInlineImm()); 1401 } 1402 1403 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1404 return false; 1405 } 1406 1407 if (type.getScalarSizeInBits() == 16) { 1408 return AMDGPU::isInlinableLiteral16( 1409 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1410 AsmParser->hasInv2PiInlineImm()); 1411 } 1412 1413 return AMDGPU::isInlinableLiteral32( 1414 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1415 AsmParser->hasInv2PiInlineImm()); 1416 } 1417 1418 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1419 // Check that this immediate can be added as literal 1420 if (!isImmTy(ImmTyNone)) { 1421 return false; 1422 } 1423 1424 if (!Imm.IsFPImm) { 1425 // We got int literal token. 1426 1427 if (type == MVT::f64 && hasFPModifiers()) { 1428 // Cannot apply fp modifiers to int literals preserving the same semantics 1429 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1430 // disable these cases. 1431 return false; 1432 } 1433 1434 unsigned Size = type.getSizeInBits(); 1435 if (Size == 64) 1436 Size = 32; 1437 1438 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1439 // types. 1440 return isSafeTruncation(Imm.Val, Size); 1441 } 1442 1443 // We got fp literal token 1444 if (type == MVT::f64) { // Expected 64-bit fp operand 1445 // We would set low 64-bits of literal to zeroes but we accept this literals 1446 return true; 1447 } 1448 1449 if (type == MVT::i64) { // Expected 64-bit int operand 1450 // We don't allow fp literals in 64-bit integer instructions. It is 1451 // unclear how we should encode them. 1452 return false; 1453 } 1454 1455 // We allow fp literals with f16x2 operands assuming that the specified 1456 // literal goes into the lower half and the upper half is zero. We also 1457 // require that the literal may be losslesly converted to f16. 1458 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1459 (type == MVT::v2i16)? MVT::i16 : type; 1460 1461 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1462 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1463 } 1464 1465 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1466 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1467 } 1468 1469 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1470 if (AsmParser->isVI()) 1471 return isVReg32(); 1472 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1473 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1474 else 1475 return false; 1476 } 1477 1478 bool AMDGPUOperand::isSDWAFP16Operand() const { 1479 return isSDWAOperand(MVT::f16); 1480 } 1481 1482 bool AMDGPUOperand::isSDWAFP32Operand() const { 1483 return isSDWAOperand(MVT::f32); 1484 } 1485 1486 bool AMDGPUOperand::isSDWAInt16Operand() const { 1487 return isSDWAOperand(MVT::i16); 1488 } 1489 1490 bool AMDGPUOperand::isSDWAInt32Operand() const { 1491 return isSDWAOperand(MVT::i32); 1492 } 1493 1494 bool AMDGPUOperand::isBoolReg() const { 1495 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1496 isSCSrcB64() : isSCSrcB32(); 1497 } 1498 1499 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1500 { 1501 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1502 assert(Size == 2 || Size == 4 || Size == 8); 1503 1504 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1505 1506 if (Imm.Mods.Abs) { 1507 Val &= ~FpSignMask; 1508 } 1509 if (Imm.Mods.Neg) { 1510 Val ^= FpSignMask; 1511 } 1512 1513 return Val; 1514 } 1515 1516 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1517 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1518 Inst.getNumOperands())) { 1519 addLiteralImmOperand(Inst, Imm.Val, 1520 ApplyModifiers & 1521 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1522 } else { 1523 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1524 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1525 } 1526 } 1527 1528 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1529 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1530 auto OpNum = Inst.getNumOperands(); 1531 // Check that this operand accepts literals 1532 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1533 1534 if (ApplyModifiers) { 1535 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1536 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1537 Val = applyInputFPModifiers(Val, Size); 1538 } 1539 1540 APInt Literal(64, Val); 1541 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1542 1543 if (Imm.IsFPImm) { // We got fp literal token 1544 switch (OpTy) { 1545 case AMDGPU::OPERAND_REG_IMM_INT64: 1546 case AMDGPU::OPERAND_REG_IMM_FP64: 1547 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1548 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1549 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1550 AsmParser->hasInv2PiInlineImm())) { 1551 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1552 return; 1553 } 1554 1555 // Non-inlineable 1556 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1557 // For fp operands we check if low 32 bits are zeros 1558 if (Literal.getLoBits(32) != 0) { 1559 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1560 "Can't encode literal as exact 64-bit floating-point operand. " 1561 "Low 32-bits will be set to zero"); 1562 } 1563 1564 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1565 return; 1566 } 1567 1568 // We don't allow fp literals in 64-bit integer instructions. It is 1569 // unclear how we should encode them. This case should be checked earlier 1570 // in predicate methods (isLiteralImm()) 1571 llvm_unreachable("fp literal in 64-bit integer instruction."); 1572 1573 case AMDGPU::OPERAND_REG_IMM_INT32: 1574 case AMDGPU::OPERAND_REG_IMM_FP32: 1575 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1576 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1577 case AMDGPU::OPERAND_REG_IMM_INT16: 1578 case AMDGPU::OPERAND_REG_IMM_FP16: 1579 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1580 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1581 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1582 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1583 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1584 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1585 bool lost; 1586 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1587 // Convert literal to single precision 1588 FPLiteral.convert(*getOpFltSemantics(OpTy), 1589 APFloat::rmNearestTiesToEven, &lost); 1590 // We allow precision lost but not overflow or underflow. This should be 1591 // checked earlier in isLiteralImm() 1592 1593 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1594 Inst.addOperand(MCOperand::createImm(ImmVal)); 1595 return; 1596 } 1597 default: 1598 llvm_unreachable("invalid operand size"); 1599 } 1600 1601 return; 1602 } 1603 1604 // We got int literal token. 1605 // Only sign extend inline immediates. 1606 switch (OpTy) { 1607 case AMDGPU::OPERAND_REG_IMM_INT32: 1608 case AMDGPU::OPERAND_REG_IMM_FP32: 1609 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1610 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1611 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1612 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1613 if (isSafeTruncation(Val, 32) && 1614 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1615 AsmParser->hasInv2PiInlineImm())) { 1616 Inst.addOperand(MCOperand::createImm(Val)); 1617 return; 1618 } 1619 1620 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1621 return; 1622 1623 case AMDGPU::OPERAND_REG_IMM_INT64: 1624 case AMDGPU::OPERAND_REG_IMM_FP64: 1625 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1626 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1627 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1628 Inst.addOperand(MCOperand::createImm(Val)); 1629 return; 1630 } 1631 1632 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1633 return; 1634 1635 case AMDGPU::OPERAND_REG_IMM_INT16: 1636 case AMDGPU::OPERAND_REG_IMM_FP16: 1637 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1638 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1639 if (isSafeTruncation(Val, 16) && 1640 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1641 AsmParser->hasInv2PiInlineImm())) { 1642 Inst.addOperand(MCOperand::createImm(Val)); 1643 return; 1644 } 1645 1646 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1647 return; 1648 1649 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1650 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1651 assert(isSafeTruncation(Val, 16)); 1652 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1653 AsmParser->hasInv2PiInlineImm())); 1654 1655 Inst.addOperand(MCOperand::createImm(Val)); 1656 return; 1657 } 1658 default: 1659 llvm_unreachable("invalid operand size"); 1660 } 1661 } 1662 1663 template <unsigned Bitwidth> 1664 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1665 APInt Literal(64, Imm.Val); 1666 1667 if (!Imm.IsFPImm) { 1668 // We got int literal token. 1669 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1670 return; 1671 } 1672 1673 bool Lost; 1674 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1675 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1676 APFloat::rmNearestTiesToEven, &Lost); 1677 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1678 } 1679 1680 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1681 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1682 } 1683 1684 static bool isInlineValue(unsigned Reg) { 1685 switch (Reg) { 1686 case AMDGPU::SRC_SHARED_BASE: 1687 case AMDGPU::SRC_SHARED_LIMIT: 1688 case AMDGPU::SRC_PRIVATE_BASE: 1689 case AMDGPU::SRC_PRIVATE_LIMIT: 1690 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1691 return true; 1692 case AMDGPU::SRC_VCCZ: 1693 case AMDGPU::SRC_EXECZ: 1694 case AMDGPU::SRC_SCC: 1695 return true; 1696 default: 1697 return false; 1698 } 1699 } 1700 1701 bool AMDGPUOperand::isInlineValue() const { 1702 return isRegKind() && ::isInlineValue(getReg()); 1703 } 1704 1705 //===----------------------------------------------------------------------===// 1706 // AsmParser 1707 //===----------------------------------------------------------------------===// 1708 1709 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1710 if (Is == IS_VGPR) { 1711 switch (RegWidth) { 1712 default: return -1; 1713 case 1: return AMDGPU::VGPR_32RegClassID; 1714 case 2: return AMDGPU::VReg_64RegClassID; 1715 case 3: return AMDGPU::VReg_96RegClassID; 1716 case 4: return AMDGPU::VReg_128RegClassID; 1717 case 8: return AMDGPU::VReg_256RegClassID; 1718 case 16: return AMDGPU::VReg_512RegClassID; 1719 } 1720 } else if (Is == IS_TTMP) { 1721 switch (RegWidth) { 1722 default: return -1; 1723 case 1: return AMDGPU::TTMP_32RegClassID; 1724 case 2: return AMDGPU::TTMP_64RegClassID; 1725 case 4: return AMDGPU::TTMP_128RegClassID; 1726 case 8: return AMDGPU::TTMP_256RegClassID; 1727 case 16: return AMDGPU::TTMP_512RegClassID; 1728 } 1729 } else if (Is == IS_SGPR) { 1730 switch (RegWidth) { 1731 default: return -1; 1732 case 1: return AMDGPU::SGPR_32RegClassID; 1733 case 2: return AMDGPU::SGPR_64RegClassID; 1734 case 4: return AMDGPU::SGPR_128RegClassID; 1735 case 8: return AMDGPU::SGPR_256RegClassID; 1736 case 16: return AMDGPU::SGPR_512RegClassID; 1737 } 1738 } 1739 return -1; 1740 } 1741 1742 static unsigned getSpecialRegForName(StringRef RegName) { 1743 return StringSwitch<unsigned>(RegName) 1744 .Case("exec", AMDGPU::EXEC) 1745 .Case("vcc", AMDGPU::VCC) 1746 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1747 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1748 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1749 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1750 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1751 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1752 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1753 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1754 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1755 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1756 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1757 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1758 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1759 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1760 .Case("m0", AMDGPU::M0) 1761 .Case("vccz", AMDGPU::SRC_VCCZ) 1762 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1763 .Case("execz", AMDGPU::SRC_EXECZ) 1764 .Case("src_execz", AMDGPU::SRC_EXECZ) 1765 .Case("scc", AMDGPU::SRC_SCC) 1766 .Case("src_scc", AMDGPU::SRC_SCC) 1767 .Case("tba", AMDGPU::TBA) 1768 .Case("tma", AMDGPU::TMA) 1769 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1770 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1771 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1772 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1773 .Case("vcc_lo", AMDGPU::VCC_LO) 1774 .Case("vcc_hi", AMDGPU::VCC_HI) 1775 .Case("exec_lo", AMDGPU::EXEC_LO) 1776 .Case("exec_hi", AMDGPU::EXEC_HI) 1777 .Case("tma_lo", AMDGPU::TMA_LO) 1778 .Case("tma_hi", AMDGPU::TMA_HI) 1779 .Case("tba_lo", AMDGPU::TBA_LO) 1780 .Case("tba_hi", AMDGPU::TBA_HI) 1781 .Case("null", AMDGPU::SGPR_NULL) 1782 .Default(0); 1783 } 1784 1785 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1786 SMLoc &EndLoc) { 1787 auto R = parseRegister(); 1788 if (!R) return true; 1789 assert(R->isReg()); 1790 RegNo = R->getReg(); 1791 StartLoc = R->getStartLoc(); 1792 EndLoc = R->getEndLoc(); 1793 return false; 1794 } 1795 1796 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1797 RegisterKind RegKind, unsigned Reg1, 1798 unsigned RegNum) { 1799 switch (RegKind) { 1800 case IS_SPECIAL: 1801 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1802 Reg = AMDGPU::EXEC; 1803 RegWidth = 2; 1804 return true; 1805 } 1806 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1807 Reg = AMDGPU::FLAT_SCR; 1808 RegWidth = 2; 1809 return true; 1810 } 1811 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1812 Reg = AMDGPU::XNACK_MASK; 1813 RegWidth = 2; 1814 return true; 1815 } 1816 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1817 Reg = AMDGPU::VCC; 1818 RegWidth = 2; 1819 return true; 1820 } 1821 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1822 Reg = AMDGPU::TBA; 1823 RegWidth = 2; 1824 return true; 1825 } 1826 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1827 Reg = AMDGPU::TMA; 1828 RegWidth = 2; 1829 return true; 1830 } 1831 return false; 1832 case IS_VGPR: 1833 case IS_SGPR: 1834 case IS_TTMP: 1835 if (Reg1 != Reg + RegWidth) { 1836 return false; 1837 } 1838 RegWidth++; 1839 return true; 1840 default: 1841 llvm_unreachable("unexpected register kind"); 1842 } 1843 } 1844 1845 static const StringRef Registers[] = { 1846 { "v" }, 1847 { "s" }, 1848 { "ttmp" }, 1849 }; 1850 1851 bool 1852 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1853 const AsmToken &NextToken) const { 1854 1855 // A list of consecutive registers: [s0,s1,s2,s3] 1856 if (Token.is(AsmToken::LBrac)) 1857 return true; 1858 1859 if (!Token.is(AsmToken::Identifier)) 1860 return false; 1861 1862 // A single register like s0 or a range of registers like s[0:1] 1863 1864 StringRef RegName = Token.getString(); 1865 1866 for (StringRef Reg : Registers) { 1867 if (RegName.startswith(Reg)) { 1868 if (Reg.size() < RegName.size()) { 1869 unsigned RegNum; 1870 // A single register with an index: rXX 1871 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1872 return true; 1873 } else { 1874 // A range of registers: r[XX:YY]. 1875 if (NextToken.is(AsmToken::LBrac)) 1876 return true; 1877 } 1878 } 1879 } 1880 1881 return getSpecialRegForName(RegName); 1882 } 1883 1884 bool 1885 AMDGPUAsmParser::isRegister() 1886 { 1887 return isRegister(getToken(), peekToken()); 1888 } 1889 1890 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1891 unsigned &RegNum, unsigned &RegWidth, 1892 unsigned *DwordRegIndex) { 1893 if (DwordRegIndex) { *DwordRegIndex = 0; } 1894 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1895 if (getLexer().is(AsmToken::Identifier)) { 1896 StringRef RegName = Parser.getTok().getString(); 1897 if ((Reg = getSpecialRegForName(RegName))) { 1898 Parser.Lex(); 1899 RegKind = IS_SPECIAL; 1900 } else { 1901 unsigned RegNumIndex = 0; 1902 if (RegName[0] == 'v') { 1903 RegNumIndex = 1; 1904 RegKind = IS_VGPR; 1905 } else if (RegName[0] == 's') { 1906 RegNumIndex = 1; 1907 RegKind = IS_SGPR; 1908 } else if (RegName.startswith("ttmp")) { 1909 RegNumIndex = strlen("ttmp"); 1910 RegKind = IS_TTMP; 1911 } else { 1912 return false; 1913 } 1914 if (RegName.size() > RegNumIndex) { 1915 // Single 32-bit register: vXX. 1916 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1917 return false; 1918 Parser.Lex(); 1919 RegWidth = 1; 1920 } else { 1921 // Range of registers: v[XX:YY]. ":YY" is optional. 1922 Parser.Lex(); 1923 int64_t RegLo, RegHi; 1924 if (getLexer().isNot(AsmToken::LBrac)) 1925 return false; 1926 Parser.Lex(); 1927 1928 if (getParser().parseAbsoluteExpression(RegLo)) 1929 return false; 1930 1931 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1932 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1933 return false; 1934 Parser.Lex(); 1935 1936 if (isRBrace) { 1937 RegHi = RegLo; 1938 } else { 1939 if (getParser().parseAbsoluteExpression(RegHi)) 1940 return false; 1941 1942 if (getLexer().isNot(AsmToken::RBrac)) 1943 return false; 1944 Parser.Lex(); 1945 } 1946 RegNum = (unsigned) RegLo; 1947 RegWidth = (RegHi - RegLo) + 1; 1948 } 1949 } 1950 } else if (getLexer().is(AsmToken::LBrac)) { 1951 // List of consecutive registers: [s0,s1,s2,s3] 1952 Parser.Lex(); 1953 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1954 return false; 1955 if (RegWidth != 1) 1956 return false; 1957 RegisterKind RegKind1; 1958 unsigned Reg1, RegNum1, RegWidth1; 1959 do { 1960 if (getLexer().is(AsmToken::Comma)) { 1961 Parser.Lex(); 1962 } else if (getLexer().is(AsmToken::RBrac)) { 1963 Parser.Lex(); 1964 break; 1965 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1966 if (RegWidth1 != 1) { 1967 return false; 1968 } 1969 if (RegKind1 != RegKind) { 1970 return false; 1971 } 1972 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1973 return false; 1974 } 1975 } else { 1976 return false; 1977 } 1978 } while (true); 1979 } else { 1980 return false; 1981 } 1982 switch (RegKind) { 1983 case IS_SPECIAL: 1984 RegNum = 0; 1985 RegWidth = 1; 1986 break; 1987 case IS_VGPR: 1988 case IS_SGPR: 1989 case IS_TTMP: 1990 { 1991 unsigned Size = 1; 1992 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1993 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1994 Size = std::min(RegWidth, 4u); 1995 } 1996 if (RegNum % Size != 0) 1997 return false; 1998 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1999 RegNum = RegNum / Size; 2000 int RCID = getRegClass(RegKind, RegWidth); 2001 if (RCID == -1) 2002 return false; 2003 const MCRegisterClass RC = TRI->getRegClass(RCID); 2004 if (RegNum >= RC.getNumRegs()) 2005 return false; 2006 Reg = RC.getRegister(RegNum); 2007 break; 2008 } 2009 2010 default: 2011 llvm_unreachable("unexpected register kind"); 2012 } 2013 2014 if (!subtargetHasRegister(*TRI, Reg)) 2015 return false; 2016 return true; 2017 } 2018 2019 Optional<StringRef> 2020 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2021 switch (RegKind) { 2022 case IS_VGPR: 2023 return StringRef(".amdgcn.next_free_vgpr"); 2024 case IS_SGPR: 2025 return StringRef(".amdgcn.next_free_sgpr"); 2026 default: 2027 return None; 2028 } 2029 } 2030 2031 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2032 auto SymbolName = getGprCountSymbolName(RegKind); 2033 assert(SymbolName && "initializing invalid register kind"); 2034 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2035 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2036 } 2037 2038 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2039 unsigned DwordRegIndex, 2040 unsigned RegWidth) { 2041 // Symbols are only defined for GCN targets 2042 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2043 return true; 2044 2045 auto SymbolName = getGprCountSymbolName(RegKind); 2046 if (!SymbolName) 2047 return true; 2048 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2049 2050 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2051 int64_t OldCount; 2052 2053 if (!Sym->isVariable()) 2054 return !Error(getParser().getTok().getLoc(), 2055 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2056 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2057 return !Error( 2058 getParser().getTok().getLoc(), 2059 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2060 2061 if (OldCount <= NewMax) 2062 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2063 2064 return true; 2065 } 2066 2067 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2068 const auto &Tok = Parser.getTok(); 2069 SMLoc StartLoc = Tok.getLoc(); 2070 SMLoc EndLoc = Tok.getEndLoc(); 2071 RegisterKind RegKind; 2072 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2073 2074 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2075 //FIXME: improve error messages (bug 41303). 2076 Error(StartLoc, "not a valid operand."); 2077 return nullptr; 2078 } 2079 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2080 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2081 return nullptr; 2082 } else 2083 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2084 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2085 } 2086 2087 OperandMatchResultTy 2088 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2089 // TODO: add syntactic sugar for 1/(2*PI) 2090 2091 assert(!isRegister()); 2092 assert(!isModifier()); 2093 2094 const auto& Tok = getToken(); 2095 const auto& NextTok = peekToken(); 2096 bool IsReal = Tok.is(AsmToken::Real); 2097 SMLoc S = getLoc(); 2098 bool Negate = false; 2099 2100 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2101 lex(); 2102 IsReal = true; 2103 Negate = true; 2104 } 2105 2106 if (IsReal) { 2107 // Floating-point expressions are not supported. 2108 // Can only allow floating-point literals with an 2109 // optional sign. 2110 2111 StringRef Num = getTokenStr(); 2112 lex(); 2113 2114 APFloat RealVal(APFloat::IEEEdouble()); 2115 auto roundMode = APFloat::rmNearestTiesToEven; 2116 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2117 return MatchOperand_ParseFail; 2118 } 2119 if (Negate) 2120 RealVal.changeSign(); 2121 2122 Operands.push_back( 2123 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2124 AMDGPUOperand::ImmTyNone, true)); 2125 2126 return MatchOperand_Success; 2127 2128 } else { 2129 int64_t IntVal; 2130 const MCExpr *Expr; 2131 SMLoc S = getLoc(); 2132 2133 if (HasSP3AbsModifier) { 2134 // This is a workaround for handling expressions 2135 // as arguments of SP3 'abs' modifier, for example: 2136 // |1.0| 2137 // |-1| 2138 // |1+x| 2139 // This syntax is not compatible with syntax of standard 2140 // MC expressions (due to the trailing '|'). 2141 SMLoc EndLoc; 2142 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2143 return MatchOperand_ParseFail; 2144 } else { 2145 if (Parser.parseExpression(Expr)) 2146 return MatchOperand_ParseFail; 2147 } 2148 2149 if (Expr->evaluateAsAbsolute(IntVal)) { 2150 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2151 } else { 2152 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2153 } 2154 2155 return MatchOperand_Success; 2156 } 2157 2158 return MatchOperand_NoMatch; 2159 } 2160 2161 OperandMatchResultTy 2162 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2163 if (!isRegister()) 2164 return MatchOperand_NoMatch; 2165 2166 if (auto R = parseRegister()) { 2167 assert(R->isReg()); 2168 Operands.push_back(std::move(R)); 2169 return MatchOperand_Success; 2170 } 2171 return MatchOperand_ParseFail; 2172 } 2173 2174 OperandMatchResultTy 2175 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2176 auto res = parseReg(Operands); 2177 if (res != MatchOperand_NoMatch) { 2178 return res; 2179 } else if (isModifier()) { 2180 return MatchOperand_NoMatch; 2181 } else { 2182 return parseImm(Operands, HasSP3AbsMod); 2183 } 2184 } 2185 2186 bool 2187 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2188 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2189 const auto &str = Token.getString(); 2190 return str == "abs" || str == "neg" || str == "sext"; 2191 } 2192 return false; 2193 } 2194 2195 bool 2196 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2197 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2198 } 2199 2200 bool 2201 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2202 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2203 } 2204 2205 bool 2206 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2207 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2208 } 2209 2210 // Check if this is an operand modifier or an opcode modifier 2211 // which may look like an expression but it is not. We should 2212 // avoid parsing these modifiers as expressions. Currently 2213 // recognized sequences are: 2214 // |...| 2215 // abs(...) 2216 // neg(...) 2217 // sext(...) 2218 // -reg 2219 // -|...| 2220 // -abs(...) 2221 // name:... 2222 // Note that simple opcode modifiers like 'gds' may be parsed as 2223 // expressions; this is a special case. See getExpressionAsToken. 2224 // 2225 bool 2226 AMDGPUAsmParser::isModifier() { 2227 2228 AsmToken Tok = getToken(); 2229 AsmToken NextToken[2]; 2230 peekTokens(NextToken); 2231 2232 return isOperandModifier(Tok, NextToken[0]) || 2233 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2234 isOpcodeModifierWithVal(Tok, NextToken[0]); 2235 } 2236 2237 // Check if the current token is an SP3 'neg' modifier. 2238 // Currently this modifier is allowed in the following context: 2239 // 2240 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2241 // 2. Before an 'abs' modifier: -abs(...) 2242 // 3. Before an SP3 'abs' modifier: -|...| 2243 // 2244 // In all other cases "-" is handled as a part 2245 // of an expression that follows the sign. 2246 // 2247 // Note: When "-" is followed by an integer literal, 2248 // this is interpreted as integer negation rather 2249 // than a floating-point NEG modifier applied to N. 2250 // Beside being contr-intuitive, such use of floating-point 2251 // NEG modifier would have resulted in different meaning 2252 // of integer literals used with VOP1/2/C and VOP3, 2253 // for example: 2254 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2255 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2256 // Negative fp literals with preceding "-" are 2257 // handled likewise for unifomtity 2258 // 2259 bool 2260 AMDGPUAsmParser::parseSP3NegModifier() { 2261 2262 AsmToken NextToken[2]; 2263 peekTokens(NextToken); 2264 2265 if (isToken(AsmToken::Minus) && 2266 (isRegister(NextToken[0], NextToken[1]) || 2267 NextToken[0].is(AsmToken::Pipe) || 2268 isId(NextToken[0], "abs"))) { 2269 lex(); 2270 return true; 2271 } 2272 2273 return false; 2274 } 2275 2276 OperandMatchResultTy 2277 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2278 bool AllowImm) { 2279 bool Neg, SP3Neg; 2280 bool Abs, SP3Abs; 2281 SMLoc Loc; 2282 2283 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2284 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2285 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2286 return MatchOperand_ParseFail; 2287 } 2288 2289 SP3Neg = parseSP3NegModifier(); 2290 2291 Loc = getLoc(); 2292 Neg = trySkipId("neg"); 2293 if (Neg && SP3Neg) { 2294 Error(Loc, "expected register or immediate"); 2295 return MatchOperand_ParseFail; 2296 } 2297 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2298 return MatchOperand_ParseFail; 2299 2300 Abs = trySkipId("abs"); 2301 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2302 return MatchOperand_ParseFail; 2303 2304 Loc = getLoc(); 2305 SP3Abs = trySkipToken(AsmToken::Pipe); 2306 if (Abs && SP3Abs) { 2307 Error(Loc, "expected register or immediate"); 2308 return MatchOperand_ParseFail; 2309 } 2310 2311 OperandMatchResultTy Res; 2312 if (AllowImm) { 2313 Res = parseRegOrImm(Operands, SP3Abs); 2314 } else { 2315 Res = parseReg(Operands); 2316 } 2317 if (Res != MatchOperand_Success) { 2318 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2319 } 2320 2321 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2322 return MatchOperand_ParseFail; 2323 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2324 return MatchOperand_ParseFail; 2325 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2326 return MatchOperand_ParseFail; 2327 2328 AMDGPUOperand::Modifiers Mods; 2329 Mods.Abs = Abs || SP3Abs; 2330 Mods.Neg = Neg || SP3Neg; 2331 2332 if (Mods.hasFPModifiers()) { 2333 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2334 if (Op.isExpr()) { 2335 Error(Op.getStartLoc(), "expected an absolute expression"); 2336 return MatchOperand_ParseFail; 2337 } 2338 Op.setModifiers(Mods); 2339 } 2340 return MatchOperand_Success; 2341 } 2342 2343 OperandMatchResultTy 2344 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2345 bool AllowImm) { 2346 bool Sext = trySkipId("sext"); 2347 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2348 return MatchOperand_ParseFail; 2349 2350 OperandMatchResultTy Res; 2351 if (AllowImm) { 2352 Res = parseRegOrImm(Operands); 2353 } else { 2354 Res = parseReg(Operands); 2355 } 2356 if (Res != MatchOperand_Success) { 2357 return Sext? MatchOperand_ParseFail : Res; 2358 } 2359 2360 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2361 return MatchOperand_ParseFail; 2362 2363 AMDGPUOperand::Modifiers Mods; 2364 Mods.Sext = Sext; 2365 2366 if (Mods.hasIntModifiers()) { 2367 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2368 if (Op.isExpr()) { 2369 Error(Op.getStartLoc(), "expected an absolute expression"); 2370 return MatchOperand_ParseFail; 2371 } 2372 Op.setModifiers(Mods); 2373 } 2374 2375 return MatchOperand_Success; 2376 } 2377 2378 OperandMatchResultTy 2379 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2380 return parseRegOrImmWithFPInputMods(Operands, false); 2381 } 2382 2383 OperandMatchResultTy 2384 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2385 return parseRegOrImmWithIntInputMods(Operands, false); 2386 } 2387 2388 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2389 auto Loc = getLoc(); 2390 if (trySkipId("off")) { 2391 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2392 AMDGPUOperand::ImmTyOff, false)); 2393 return MatchOperand_Success; 2394 } 2395 2396 if (!isRegister()) 2397 return MatchOperand_NoMatch; 2398 2399 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2400 if (Reg) { 2401 Operands.push_back(std::move(Reg)); 2402 return MatchOperand_Success; 2403 } 2404 2405 return MatchOperand_ParseFail; 2406 2407 } 2408 2409 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2410 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2411 2412 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2413 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2414 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2415 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2416 return Match_InvalidOperand; 2417 2418 if ((TSFlags & SIInstrFlags::VOP3) && 2419 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2420 getForcedEncodingSize() != 64) 2421 return Match_PreferE32; 2422 2423 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2424 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2425 // v_mac_f32/16 allow only dst_sel == DWORD; 2426 auto OpNum = 2427 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2428 const auto &Op = Inst.getOperand(OpNum); 2429 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2430 return Match_InvalidOperand; 2431 } 2432 } 2433 2434 if (TSFlags & SIInstrFlags::FLAT) { 2435 // FIXME: Produces error without correct column reported. 2436 auto Opcode = Inst.getOpcode(); 2437 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2438 2439 const auto &Op = Inst.getOperand(OpNum); 2440 if (!hasFlatOffsets() && Op.getImm() != 0) 2441 return Match_InvalidOperand; 2442 2443 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2444 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2445 if (isGFX10()) { 2446 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2447 if (!isInt<12>(Op.getImm())) 2448 return Match_InvalidOperand; 2449 } else { 2450 if (!isUInt<11>(Op.getImm())) 2451 return Match_InvalidOperand; 2452 } 2453 } 2454 } 2455 2456 return Match_Success; 2457 } 2458 2459 // What asm variants we should check 2460 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2461 if (getForcedEncodingSize() == 32) { 2462 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2463 return makeArrayRef(Variants); 2464 } 2465 2466 if (isForcedVOP3()) { 2467 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2468 return makeArrayRef(Variants); 2469 } 2470 2471 if (isForcedSDWA()) { 2472 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2473 AMDGPUAsmVariants::SDWA9}; 2474 return makeArrayRef(Variants); 2475 } 2476 2477 if (isForcedDPP()) { 2478 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2479 return makeArrayRef(Variants); 2480 } 2481 2482 static const unsigned Variants[] = { 2483 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2484 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2485 }; 2486 2487 return makeArrayRef(Variants); 2488 } 2489 2490 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2491 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2492 const unsigned Num = Desc.getNumImplicitUses(); 2493 for (unsigned i = 0; i < Num; ++i) { 2494 unsigned Reg = Desc.ImplicitUses[i]; 2495 switch (Reg) { 2496 case AMDGPU::FLAT_SCR: 2497 case AMDGPU::VCC: 2498 case AMDGPU::VCC_LO: 2499 case AMDGPU::VCC_HI: 2500 case AMDGPU::M0: 2501 case AMDGPU::SGPR_NULL: 2502 return Reg; 2503 default: 2504 break; 2505 } 2506 } 2507 return AMDGPU::NoRegister; 2508 } 2509 2510 // NB: This code is correct only when used to check constant 2511 // bus limitations because GFX7 support no f16 inline constants. 2512 // Note that there are no cases when a GFX7 opcode violates 2513 // constant bus limitations due to the use of an f16 constant. 2514 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2515 unsigned OpIdx) const { 2516 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2517 2518 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2519 return false; 2520 } 2521 2522 const MCOperand &MO = Inst.getOperand(OpIdx); 2523 2524 int64_t Val = MO.getImm(); 2525 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2526 2527 switch (OpSize) { // expected operand size 2528 case 8: 2529 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2530 case 4: 2531 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2532 case 2: { 2533 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2534 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2535 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2536 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2537 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2538 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2539 } else { 2540 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2541 } 2542 } 2543 default: 2544 llvm_unreachable("invalid operand size"); 2545 } 2546 } 2547 2548 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2549 const MCOperand &MO = Inst.getOperand(OpIdx); 2550 if (MO.isImm()) { 2551 return !isInlineConstant(Inst, OpIdx); 2552 } 2553 return !MO.isReg() || 2554 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2555 } 2556 2557 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2558 const unsigned Opcode = Inst.getOpcode(); 2559 const MCInstrDesc &Desc = MII.get(Opcode); 2560 unsigned ConstantBusUseCount = 0; 2561 unsigned NumLiterals = 0; 2562 unsigned LiteralSize; 2563 2564 if (Desc.TSFlags & 2565 (SIInstrFlags::VOPC | 2566 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2567 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2568 SIInstrFlags::SDWA)) { 2569 // Check special imm operands (used by madmk, etc) 2570 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2571 ++ConstantBusUseCount; 2572 } 2573 2574 SmallDenseSet<unsigned> SGPRsUsed; 2575 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2576 if (SGPRUsed != AMDGPU::NoRegister) { 2577 SGPRsUsed.insert(SGPRUsed); 2578 ++ConstantBusUseCount; 2579 } 2580 2581 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2582 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2583 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2584 2585 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2586 2587 for (int OpIdx : OpIndices) { 2588 if (OpIdx == -1) break; 2589 2590 const MCOperand &MO = Inst.getOperand(OpIdx); 2591 if (usesConstantBus(Inst, OpIdx)) { 2592 if (MO.isReg()) { 2593 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2594 // Pairs of registers with a partial intersections like these 2595 // s0, s[0:1] 2596 // flat_scratch_lo, flat_scratch 2597 // flat_scratch_lo, flat_scratch_hi 2598 // are theoretically valid but they are disabled anyway. 2599 // Note that this code mimics SIInstrInfo::verifyInstruction 2600 if (!SGPRsUsed.count(Reg)) { 2601 SGPRsUsed.insert(Reg); 2602 ++ConstantBusUseCount; 2603 } 2604 SGPRUsed = Reg; 2605 } else { // Expression or a literal 2606 2607 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2608 continue; // special operand like VINTERP attr_chan 2609 2610 // An instruction may use only one literal. 2611 // This has been validated on the previous step. 2612 // See validateVOP3Literal. 2613 // This literal may be used as more than one operand. 2614 // If all these operands are of the same size, 2615 // this literal counts as one scalar value. 2616 // Otherwise it counts as 2 scalar values. 2617 // See "GFX10 Shader Programming", section 3.6.2.3. 2618 2619 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2620 if (Size < 4) Size = 4; 2621 2622 if (NumLiterals == 0) { 2623 NumLiterals = 1; 2624 LiteralSize = Size; 2625 } else if (LiteralSize != Size) { 2626 NumLiterals = 2; 2627 } 2628 } 2629 } 2630 } 2631 } 2632 ConstantBusUseCount += NumLiterals; 2633 2634 if (isGFX10()) 2635 return ConstantBusUseCount <= 2; 2636 2637 return ConstantBusUseCount <= 1; 2638 } 2639 2640 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2641 const unsigned Opcode = Inst.getOpcode(); 2642 const MCInstrDesc &Desc = MII.get(Opcode); 2643 2644 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2645 if (DstIdx == -1 || 2646 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2647 return true; 2648 } 2649 2650 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2651 2652 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2653 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2654 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2655 2656 assert(DstIdx != -1); 2657 const MCOperand &Dst = Inst.getOperand(DstIdx); 2658 assert(Dst.isReg()); 2659 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2660 2661 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2662 2663 for (int SrcIdx : SrcIndices) { 2664 if (SrcIdx == -1) break; 2665 const MCOperand &Src = Inst.getOperand(SrcIdx); 2666 if (Src.isReg()) { 2667 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2668 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2669 return false; 2670 } 2671 } 2672 } 2673 2674 return true; 2675 } 2676 2677 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2678 2679 const unsigned Opc = Inst.getOpcode(); 2680 const MCInstrDesc &Desc = MII.get(Opc); 2681 2682 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2683 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2684 assert(ClampIdx != -1); 2685 return Inst.getOperand(ClampIdx).getImm() == 0; 2686 } 2687 2688 return true; 2689 } 2690 2691 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2692 2693 const unsigned Opc = Inst.getOpcode(); 2694 const MCInstrDesc &Desc = MII.get(Opc); 2695 2696 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2697 return true; 2698 2699 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2700 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2701 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2702 2703 assert(VDataIdx != -1); 2704 assert(DMaskIdx != -1); 2705 assert(TFEIdx != -1); 2706 2707 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2708 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2709 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2710 if (DMask == 0) 2711 DMask = 1; 2712 2713 unsigned DataSize = 2714 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2715 if (hasPackedD16()) { 2716 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2717 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2718 DataSize = (DataSize + 1) / 2; 2719 } 2720 2721 return (VDataSize / 4) == DataSize + TFESize; 2722 } 2723 2724 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2725 const unsigned Opc = Inst.getOpcode(); 2726 const MCInstrDesc &Desc = MII.get(Opc); 2727 2728 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2729 return true; 2730 2731 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2732 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2733 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2734 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2735 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2736 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2737 2738 assert(VAddr0Idx != -1); 2739 assert(SrsrcIdx != -1); 2740 assert(DimIdx != -1); 2741 assert(SrsrcIdx > VAddr0Idx); 2742 2743 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2744 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2745 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2746 unsigned VAddrSize = 2747 IsNSA ? SrsrcIdx - VAddr0Idx 2748 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2749 2750 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2751 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2752 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2753 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2754 if (!IsNSA) { 2755 if (AddrSize > 8) 2756 AddrSize = 16; 2757 else if (AddrSize > 4) 2758 AddrSize = 8; 2759 } 2760 2761 return VAddrSize == AddrSize; 2762 } 2763 2764 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2765 2766 const unsigned Opc = Inst.getOpcode(); 2767 const MCInstrDesc &Desc = MII.get(Opc); 2768 2769 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2770 return true; 2771 if (!Desc.mayLoad() || !Desc.mayStore()) 2772 return true; // Not atomic 2773 2774 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2775 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2776 2777 // This is an incomplete check because image_atomic_cmpswap 2778 // may only use 0x3 and 0xf while other atomic operations 2779 // may use 0x1 and 0x3. However these limitations are 2780 // verified when we check that dmask matches dst size. 2781 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2782 } 2783 2784 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2785 2786 const unsigned Opc = Inst.getOpcode(); 2787 const MCInstrDesc &Desc = MII.get(Opc); 2788 2789 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2790 return true; 2791 2792 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2793 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2794 2795 // GATHER4 instructions use dmask in a different fashion compared to 2796 // other MIMG instructions. The only useful DMASK values are 2797 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2798 // (red,red,red,red) etc.) The ISA document doesn't mention 2799 // this. 2800 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2801 } 2802 2803 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2804 2805 const unsigned Opc = Inst.getOpcode(); 2806 const MCInstrDesc &Desc = MII.get(Opc); 2807 2808 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2809 return true; 2810 2811 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2812 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2813 if (isCI() || isSI()) 2814 return false; 2815 } 2816 2817 return true; 2818 } 2819 2820 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2821 const unsigned Opc = Inst.getOpcode(); 2822 const MCInstrDesc &Desc = MII.get(Opc); 2823 2824 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2825 return true; 2826 2827 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2828 if (DimIdx < 0) 2829 return true; 2830 2831 long Imm = Inst.getOperand(DimIdx).getImm(); 2832 if (Imm < 0 || Imm >= 8) 2833 return false; 2834 2835 return true; 2836 } 2837 2838 static bool IsRevOpcode(const unsigned Opcode) 2839 { 2840 switch (Opcode) { 2841 case AMDGPU::V_SUBREV_F32_e32: 2842 case AMDGPU::V_SUBREV_F32_e64: 2843 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2844 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2845 case AMDGPU::V_SUBREV_F32_e32_vi: 2846 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2847 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2848 case AMDGPU::V_SUBREV_F32_e64_vi: 2849 2850 case AMDGPU::V_SUBREV_I32_e32: 2851 case AMDGPU::V_SUBREV_I32_e64: 2852 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2853 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2854 2855 case AMDGPU::V_SUBBREV_U32_e32: 2856 case AMDGPU::V_SUBBREV_U32_e64: 2857 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2858 case AMDGPU::V_SUBBREV_U32_e32_vi: 2859 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2860 case AMDGPU::V_SUBBREV_U32_e64_vi: 2861 2862 case AMDGPU::V_SUBREV_U32_e32: 2863 case AMDGPU::V_SUBREV_U32_e64: 2864 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2865 case AMDGPU::V_SUBREV_U32_e32_vi: 2866 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2867 case AMDGPU::V_SUBREV_U32_e64_vi: 2868 2869 case AMDGPU::V_SUBREV_F16_e32: 2870 case AMDGPU::V_SUBREV_F16_e64: 2871 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2872 case AMDGPU::V_SUBREV_F16_e32_vi: 2873 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2874 case AMDGPU::V_SUBREV_F16_e64_vi: 2875 2876 case AMDGPU::V_SUBREV_U16_e32: 2877 case AMDGPU::V_SUBREV_U16_e64: 2878 case AMDGPU::V_SUBREV_U16_e32_vi: 2879 case AMDGPU::V_SUBREV_U16_e64_vi: 2880 2881 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2882 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2883 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2884 2885 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2886 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2887 2888 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2889 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2890 2891 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2892 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2893 2894 case AMDGPU::V_LSHRREV_B32_e32: 2895 case AMDGPU::V_LSHRREV_B32_e64: 2896 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2897 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2898 case AMDGPU::V_LSHRREV_B32_e32_vi: 2899 case AMDGPU::V_LSHRREV_B32_e64_vi: 2900 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2901 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2902 2903 case AMDGPU::V_ASHRREV_I32_e32: 2904 case AMDGPU::V_ASHRREV_I32_e64: 2905 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2906 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2907 case AMDGPU::V_ASHRREV_I32_e32_vi: 2908 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2909 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2910 case AMDGPU::V_ASHRREV_I32_e64_vi: 2911 2912 case AMDGPU::V_LSHLREV_B32_e32: 2913 case AMDGPU::V_LSHLREV_B32_e64: 2914 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2915 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2916 case AMDGPU::V_LSHLREV_B32_e32_vi: 2917 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2918 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2919 case AMDGPU::V_LSHLREV_B32_e64_vi: 2920 2921 case AMDGPU::V_LSHLREV_B16_e32: 2922 case AMDGPU::V_LSHLREV_B16_e64: 2923 case AMDGPU::V_LSHLREV_B16_e32_vi: 2924 case AMDGPU::V_LSHLREV_B16_e64_vi: 2925 case AMDGPU::V_LSHLREV_B16_gfx10: 2926 2927 case AMDGPU::V_LSHRREV_B16_e32: 2928 case AMDGPU::V_LSHRREV_B16_e64: 2929 case AMDGPU::V_LSHRREV_B16_e32_vi: 2930 case AMDGPU::V_LSHRREV_B16_e64_vi: 2931 case AMDGPU::V_LSHRREV_B16_gfx10: 2932 2933 case AMDGPU::V_ASHRREV_I16_e32: 2934 case AMDGPU::V_ASHRREV_I16_e64: 2935 case AMDGPU::V_ASHRREV_I16_e32_vi: 2936 case AMDGPU::V_ASHRREV_I16_e64_vi: 2937 case AMDGPU::V_ASHRREV_I16_gfx10: 2938 2939 case AMDGPU::V_LSHLREV_B64: 2940 case AMDGPU::V_LSHLREV_B64_gfx10: 2941 case AMDGPU::V_LSHLREV_B64_vi: 2942 2943 case AMDGPU::V_LSHRREV_B64: 2944 case AMDGPU::V_LSHRREV_B64_gfx10: 2945 case AMDGPU::V_LSHRREV_B64_vi: 2946 2947 case AMDGPU::V_ASHRREV_I64: 2948 case AMDGPU::V_ASHRREV_I64_gfx10: 2949 case AMDGPU::V_ASHRREV_I64_vi: 2950 2951 case AMDGPU::V_PK_LSHLREV_B16: 2952 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2953 case AMDGPU::V_PK_LSHLREV_B16_vi: 2954 2955 case AMDGPU::V_PK_LSHRREV_B16: 2956 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2957 case AMDGPU::V_PK_LSHRREV_B16_vi: 2958 case AMDGPU::V_PK_ASHRREV_I16: 2959 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2960 case AMDGPU::V_PK_ASHRREV_I16_vi: 2961 return true; 2962 default: 2963 return false; 2964 } 2965 } 2966 2967 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2968 2969 using namespace SIInstrFlags; 2970 const unsigned Opcode = Inst.getOpcode(); 2971 const MCInstrDesc &Desc = MII.get(Opcode); 2972 2973 // lds_direct register is defined so that it can be used 2974 // with 9-bit operands only. Ignore encodings which do not accept these. 2975 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2976 return true; 2977 2978 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2979 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2980 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2981 2982 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2983 2984 // lds_direct cannot be specified as either src1 or src2. 2985 for (int SrcIdx : SrcIndices) { 2986 if (SrcIdx == -1) break; 2987 const MCOperand &Src = Inst.getOperand(SrcIdx); 2988 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2989 return false; 2990 } 2991 } 2992 2993 if (Src0Idx == -1) 2994 return true; 2995 2996 const MCOperand &Src = Inst.getOperand(Src0Idx); 2997 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2998 return true; 2999 3000 // lds_direct is specified as src0. Check additional limitations. 3001 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3002 } 3003 3004 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3005 unsigned Opcode = Inst.getOpcode(); 3006 const MCInstrDesc &Desc = MII.get(Opcode); 3007 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3008 return true; 3009 3010 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3011 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3012 3013 const int OpIndices[] = { Src0Idx, Src1Idx }; 3014 3015 unsigned NumLiterals = 0; 3016 uint32_t LiteralValue; 3017 3018 for (int OpIdx : OpIndices) { 3019 if (OpIdx == -1) break; 3020 3021 const MCOperand &MO = Inst.getOperand(OpIdx); 3022 if (MO.isImm() && 3023 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3024 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3025 !isInlineConstant(Inst, OpIdx)) { 3026 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3027 if (NumLiterals == 0 || LiteralValue != Value) { 3028 LiteralValue = Value; 3029 ++NumLiterals; 3030 } 3031 } 3032 } 3033 3034 return NumLiterals <= 1; 3035 } 3036 3037 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3038 const unsigned Opc = Inst.getOpcode(); 3039 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3040 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3041 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3042 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3043 3044 if (OpSel & ~3) 3045 return false; 3046 } 3047 return true; 3048 } 3049 3050 // Check if VCC register matches wavefront size 3051 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3052 auto FB = getFeatureBits(); 3053 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3054 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3055 } 3056 3057 // VOP3 literal is only allowed in GFX10+ and only one can be used 3058 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3059 unsigned Opcode = Inst.getOpcode(); 3060 const MCInstrDesc &Desc = MII.get(Opcode); 3061 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3062 return true; 3063 3064 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3065 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3066 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3067 3068 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3069 3070 unsigned NumLiterals = 0; 3071 uint32_t LiteralValue; 3072 3073 for (int OpIdx : OpIndices) { 3074 if (OpIdx == -1) break; 3075 3076 const MCOperand &MO = Inst.getOperand(OpIdx); 3077 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3078 continue; 3079 3080 if (!isInlineConstant(Inst, OpIdx)) { 3081 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3082 if (NumLiterals == 0 || LiteralValue != Value) { 3083 LiteralValue = Value; 3084 ++NumLiterals; 3085 } 3086 } 3087 } 3088 3089 return !NumLiterals || 3090 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3091 } 3092 3093 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3094 const SMLoc &IDLoc) { 3095 if (!validateLdsDirect(Inst)) { 3096 Error(IDLoc, 3097 "invalid use of lds_direct"); 3098 return false; 3099 } 3100 if (!validateSOPLiteral(Inst)) { 3101 Error(IDLoc, 3102 "only one literal operand is allowed"); 3103 return false; 3104 } 3105 if (!validateVOP3Literal(Inst)) { 3106 Error(IDLoc, 3107 "invalid literal operand"); 3108 return false; 3109 } 3110 if (!validateConstantBusLimitations(Inst)) { 3111 Error(IDLoc, 3112 "invalid operand (violates constant bus restrictions)"); 3113 return false; 3114 } 3115 if (!validateEarlyClobberLimitations(Inst)) { 3116 Error(IDLoc, 3117 "destination must be different than all sources"); 3118 return false; 3119 } 3120 if (!validateIntClampSupported(Inst)) { 3121 Error(IDLoc, 3122 "integer clamping is not supported on this GPU"); 3123 return false; 3124 } 3125 if (!validateOpSel(Inst)) { 3126 Error(IDLoc, 3127 "invalid op_sel operand"); 3128 return false; 3129 } 3130 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3131 if (!validateMIMGD16(Inst)) { 3132 Error(IDLoc, 3133 "d16 modifier is not supported on this GPU"); 3134 return false; 3135 } 3136 if (!validateMIMGDim(Inst)) { 3137 Error(IDLoc, "dim modifier is required on this GPU"); 3138 return false; 3139 } 3140 if (!validateMIMGDataSize(Inst)) { 3141 Error(IDLoc, 3142 "image data size does not match dmask and tfe"); 3143 return false; 3144 } 3145 if (!validateMIMGAddrSize(Inst)) { 3146 Error(IDLoc, 3147 "image address size does not match dim and a16"); 3148 return false; 3149 } 3150 if (!validateMIMGAtomicDMask(Inst)) { 3151 Error(IDLoc, 3152 "invalid atomic image dmask"); 3153 return false; 3154 } 3155 if (!validateMIMGGatherDMask(Inst)) { 3156 Error(IDLoc, 3157 "invalid image_gather dmask: only one bit must be set"); 3158 return false; 3159 } 3160 3161 return true; 3162 } 3163 3164 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3165 const FeatureBitset &FBS, 3166 unsigned VariantID = 0); 3167 3168 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3169 OperandVector &Operands, 3170 MCStreamer &Out, 3171 uint64_t &ErrorInfo, 3172 bool MatchingInlineAsm) { 3173 MCInst Inst; 3174 unsigned Result = Match_Success; 3175 for (auto Variant : getMatchedVariants()) { 3176 uint64_t EI; 3177 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3178 Variant); 3179 // We order match statuses from least to most specific. We use most specific 3180 // status as resulting 3181 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3182 if ((R == Match_Success) || 3183 (R == Match_PreferE32) || 3184 (R == Match_MissingFeature && Result != Match_PreferE32) || 3185 (R == Match_InvalidOperand && Result != Match_MissingFeature 3186 && Result != Match_PreferE32) || 3187 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3188 && Result != Match_MissingFeature 3189 && Result != Match_PreferE32)) { 3190 Result = R; 3191 ErrorInfo = EI; 3192 } 3193 if (R == Match_Success) 3194 break; 3195 } 3196 3197 switch (Result) { 3198 default: break; 3199 case Match_Success: 3200 if (!validateInstruction(Inst, IDLoc)) { 3201 return true; 3202 } 3203 Inst.setLoc(IDLoc); 3204 Out.EmitInstruction(Inst, getSTI()); 3205 return false; 3206 3207 case Match_MissingFeature: 3208 return Error(IDLoc, "instruction not supported on this GPU"); 3209 3210 case Match_MnemonicFail: { 3211 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3212 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3213 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3214 return Error(IDLoc, "invalid instruction" + Suggestion, 3215 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3216 } 3217 3218 case Match_InvalidOperand: { 3219 SMLoc ErrorLoc = IDLoc; 3220 if (ErrorInfo != ~0ULL) { 3221 if (ErrorInfo >= Operands.size()) { 3222 return Error(IDLoc, "too few operands for instruction"); 3223 } 3224 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3225 if (ErrorLoc == SMLoc()) 3226 ErrorLoc = IDLoc; 3227 } 3228 return Error(ErrorLoc, "invalid operand for instruction"); 3229 } 3230 3231 case Match_PreferE32: 3232 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3233 "should be encoded as e32"); 3234 } 3235 llvm_unreachable("Implement any new match types added!"); 3236 } 3237 3238 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3239 int64_t Tmp = -1; 3240 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3241 return true; 3242 } 3243 if (getParser().parseAbsoluteExpression(Tmp)) { 3244 return true; 3245 } 3246 Ret = static_cast<uint32_t>(Tmp); 3247 return false; 3248 } 3249 3250 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3251 uint32_t &Minor) { 3252 if (ParseAsAbsoluteExpression(Major)) 3253 return TokError("invalid major version"); 3254 3255 if (getLexer().isNot(AsmToken::Comma)) 3256 return TokError("minor version number required, comma expected"); 3257 Lex(); 3258 3259 if (ParseAsAbsoluteExpression(Minor)) 3260 return TokError("invalid minor version"); 3261 3262 return false; 3263 } 3264 3265 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3266 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3267 return TokError("directive only supported for amdgcn architecture"); 3268 3269 std::string Target; 3270 3271 SMLoc TargetStart = getTok().getLoc(); 3272 if (getParser().parseEscapedString(Target)) 3273 return true; 3274 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3275 3276 std::string ExpectedTarget; 3277 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3278 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3279 3280 if (Target != ExpectedTargetOS.str()) 3281 return getParser().Error(TargetRange.Start, "target must match options", 3282 TargetRange); 3283 3284 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3285 return false; 3286 } 3287 3288 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3289 return getParser().Error(Range.Start, "value out of range", Range); 3290 } 3291 3292 bool AMDGPUAsmParser::calculateGPRBlocks( 3293 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3294 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3295 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3296 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3297 // TODO(scott.linder): These calculations are duplicated from 3298 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3299 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3300 3301 unsigned NumVGPRs = NextFreeVGPR; 3302 unsigned NumSGPRs = NextFreeSGPR; 3303 3304 if (Version.Major >= 10) 3305 NumSGPRs = 0; 3306 else { 3307 unsigned MaxAddressableNumSGPRs = 3308 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3309 3310 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3311 NumSGPRs > MaxAddressableNumSGPRs) 3312 return OutOfRangeError(SGPRRange); 3313 3314 NumSGPRs += 3315 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3316 3317 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3318 NumSGPRs > MaxAddressableNumSGPRs) 3319 return OutOfRangeError(SGPRRange); 3320 3321 if (Features.test(FeatureSGPRInitBug)) 3322 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3323 } 3324 3325 VGPRBlocks = 3326 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3327 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3328 3329 return false; 3330 } 3331 3332 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3333 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3334 return TokError("directive only supported for amdgcn architecture"); 3335 3336 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3337 return TokError("directive only supported for amdhsa OS"); 3338 3339 StringRef KernelName; 3340 if (getParser().parseIdentifier(KernelName)) 3341 return true; 3342 3343 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3344 3345 StringSet<> Seen; 3346 3347 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3348 3349 SMRange VGPRRange; 3350 uint64_t NextFreeVGPR = 0; 3351 SMRange SGPRRange; 3352 uint64_t NextFreeSGPR = 0; 3353 unsigned UserSGPRCount = 0; 3354 bool ReserveVCC = true; 3355 bool ReserveFlatScr = true; 3356 bool ReserveXNACK = hasXNACK(); 3357 Optional<bool> EnableWavefrontSize32; 3358 3359 while (true) { 3360 while (getLexer().is(AsmToken::EndOfStatement)) 3361 Lex(); 3362 3363 if (getLexer().isNot(AsmToken::Identifier)) 3364 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3365 3366 StringRef ID = getTok().getIdentifier(); 3367 SMRange IDRange = getTok().getLocRange(); 3368 Lex(); 3369 3370 if (ID == ".end_amdhsa_kernel") 3371 break; 3372 3373 if (Seen.find(ID) != Seen.end()) 3374 return TokError(".amdhsa_ directives cannot be repeated"); 3375 Seen.insert(ID); 3376 3377 SMLoc ValStart = getTok().getLoc(); 3378 int64_t IVal; 3379 if (getParser().parseAbsoluteExpression(IVal)) 3380 return true; 3381 SMLoc ValEnd = getTok().getLoc(); 3382 SMRange ValRange = SMRange(ValStart, ValEnd); 3383 3384 if (IVal < 0) 3385 return OutOfRangeError(ValRange); 3386 3387 uint64_t Val = IVal; 3388 3389 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3390 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3391 return OutOfRangeError(RANGE); \ 3392 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3393 3394 if (ID == ".amdhsa_group_segment_fixed_size") { 3395 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3396 return OutOfRangeError(ValRange); 3397 KD.group_segment_fixed_size = Val; 3398 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3399 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3400 return OutOfRangeError(ValRange); 3401 KD.private_segment_fixed_size = Val; 3402 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3403 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3404 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3405 Val, ValRange); 3406 UserSGPRCount += 4; 3407 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3408 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3409 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3410 ValRange); 3411 UserSGPRCount += 2; 3412 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3413 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3414 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3415 ValRange); 3416 UserSGPRCount += 2; 3417 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3418 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3419 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3420 Val, ValRange); 3421 UserSGPRCount += 2; 3422 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3423 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3424 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3425 ValRange); 3426 UserSGPRCount += 2; 3427 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3428 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3429 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3430 ValRange); 3431 UserSGPRCount += 2; 3432 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3433 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3434 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3435 Val, ValRange); 3436 UserSGPRCount += 1; 3437 } else if (ID == ".amdhsa_wavefront_size32") { 3438 if (IVersion.Major < 10) 3439 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3440 IDRange); 3441 EnableWavefrontSize32 = Val; 3442 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3443 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3444 Val, ValRange); 3445 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3446 PARSE_BITS_ENTRY( 3447 KD.compute_pgm_rsrc2, 3448 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3449 ValRange); 3450 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3451 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3452 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3453 ValRange); 3454 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3455 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3456 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3457 ValRange); 3458 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3459 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3460 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3461 ValRange); 3462 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3463 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3464 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3465 ValRange); 3466 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3467 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3468 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3469 ValRange); 3470 } else if (ID == ".amdhsa_next_free_vgpr") { 3471 VGPRRange = ValRange; 3472 NextFreeVGPR = Val; 3473 } else if (ID == ".amdhsa_next_free_sgpr") { 3474 SGPRRange = ValRange; 3475 NextFreeSGPR = Val; 3476 } else if (ID == ".amdhsa_reserve_vcc") { 3477 if (!isUInt<1>(Val)) 3478 return OutOfRangeError(ValRange); 3479 ReserveVCC = Val; 3480 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3481 if (IVersion.Major < 7) 3482 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3483 IDRange); 3484 if (!isUInt<1>(Val)) 3485 return OutOfRangeError(ValRange); 3486 ReserveFlatScr = Val; 3487 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3488 if (IVersion.Major < 8) 3489 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3490 IDRange); 3491 if (!isUInt<1>(Val)) 3492 return OutOfRangeError(ValRange); 3493 ReserveXNACK = Val; 3494 } else if (ID == ".amdhsa_float_round_mode_32") { 3495 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3496 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3497 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3498 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3499 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3500 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3501 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3502 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3503 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3504 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3505 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3506 ValRange); 3507 } else if (ID == ".amdhsa_dx10_clamp") { 3508 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3509 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3510 } else if (ID == ".amdhsa_ieee_mode") { 3511 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3512 Val, ValRange); 3513 } else if (ID == ".amdhsa_fp16_overflow") { 3514 if (IVersion.Major < 9) 3515 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3516 IDRange); 3517 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3518 ValRange); 3519 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3520 if (IVersion.Major < 10) 3521 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3522 IDRange); 3523 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3524 ValRange); 3525 } else if (ID == ".amdhsa_memory_ordered") { 3526 if (IVersion.Major < 10) 3527 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3528 IDRange); 3529 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3530 ValRange); 3531 } else if (ID == ".amdhsa_forward_progress") { 3532 if (IVersion.Major < 10) 3533 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3534 IDRange); 3535 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3536 ValRange); 3537 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3538 PARSE_BITS_ENTRY( 3539 KD.compute_pgm_rsrc2, 3540 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3541 ValRange); 3542 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3543 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3544 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3545 Val, ValRange); 3546 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3547 PARSE_BITS_ENTRY( 3548 KD.compute_pgm_rsrc2, 3549 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3550 ValRange); 3551 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3552 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3553 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3554 Val, ValRange); 3555 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3556 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3557 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3558 Val, ValRange); 3559 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3560 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3561 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3562 Val, ValRange); 3563 } else if (ID == ".amdhsa_exception_int_div_zero") { 3564 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3565 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3566 Val, ValRange); 3567 } else { 3568 return getParser().Error(IDRange.Start, 3569 "unknown .amdhsa_kernel directive", IDRange); 3570 } 3571 3572 #undef PARSE_BITS_ENTRY 3573 } 3574 3575 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3576 return TokError(".amdhsa_next_free_vgpr directive is required"); 3577 3578 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3579 return TokError(".amdhsa_next_free_sgpr directive is required"); 3580 3581 unsigned VGPRBlocks; 3582 unsigned SGPRBlocks; 3583 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3584 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3585 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3586 SGPRBlocks)) 3587 return true; 3588 3589 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3590 VGPRBlocks)) 3591 return OutOfRangeError(VGPRRange); 3592 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3593 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3594 3595 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3596 SGPRBlocks)) 3597 return OutOfRangeError(SGPRRange); 3598 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3599 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3600 SGPRBlocks); 3601 3602 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3603 return TokError("too many user SGPRs enabled"); 3604 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3605 UserSGPRCount); 3606 3607 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3608 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3609 ReserveFlatScr, ReserveXNACK); 3610 return false; 3611 } 3612 3613 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3614 uint32_t Major; 3615 uint32_t Minor; 3616 3617 if (ParseDirectiveMajorMinor(Major, Minor)) 3618 return true; 3619 3620 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3621 return false; 3622 } 3623 3624 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3625 uint32_t Major; 3626 uint32_t Minor; 3627 uint32_t Stepping; 3628 StringRef VendorName; 3629 StringRef ArchName; 3630 3631 // If this directive has no arguments, then use the ISA version for the 3632 // targeted GPU. 3633 if (getLexer().is(AsmToken::EndOfStatement)) { 3634 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3635 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3636 ISA.Stepping, 3637 "AMD", "AMDGPU"); 3638 return false; 3639 } 3640 3641 if (ParseDirectiveMajorMinor(Major, Minor)) 3642 return true; 3643 3644 if (getLexer().isNot(AsmToken::Comma)) 3645 return TokError("stepping version number required, comma expected"); 3646 Lex(); 3647 3648 if (ParseAsAbsoluteExpression(Stepping)) 3649 return TokError("invalid stepping version"); 3650 3651 if (getLexer().isNot(AsmToken::Comma)) 3652 return TokError("vendor name required, comma expected"); 3653 Lex(); 3654 3655 if (getLexer().isNot(AsmToken::String)) 3656 return TokError("invalid vendor name"); 3657 3658 VendorName = getLexer().getTok().getStringContents(); 3659 Lex(); 3660 3661 if (getLexer().isNot(AsmToken::Comma)) 3662 return TokError("arch name required, comma expected"); 3663 Lex(); 3664 3665 if (getLexer().isNot(AsmToken::String)) 3666 return TokError("invalid arch name"); 3667 3668 ArchName = getLexer().getTok().getStringContents(); 3669 Lex(); 3670 3671 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3672 VendorName, ArchName); 3673 return false; 3674 } 3675 3676 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3677 amd_kernel_code_t &Header) { 3678 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3679 // assembly for backwards compatibility. 3680 if (ID == "max_scratch_backing_memory_byte_size") { 3681 Parser.eatToEndOfStatement(); 3682 return false; 3683 } 3684 3685 SmallString<40> ErrStr; 3686 raw_svector_ostream Err(ErrStr); 3687 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3688 return TokError(Err.str()); 3689 } 3690 Lex(); 3691 3692 if (ID == "enable_wavefront_size32") { 3693 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3694 if (!isGFX10()) 3695 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3696 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3697 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3698 } else { 3699 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3700 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3701 } 3702 } 3703 3704 if (ID == "wavefront_size") { 3705 if (Header.wavefront_size == 5) { 3706 if (!isGFX10()) 3707 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3708 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3709 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3710 } else if (Header.wavefront_size == 6) { 3711 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3712 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3713 } 3714 } 3715 3716 if (ID == "enable_wgp_mode") { 3717 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3718 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3719 } 3720 3721 if (ID == "enable_mem_ordered") { 3722 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3723 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3724 } 3725 3726 if (ID == "enable_fwd_progress") { 3727 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3728 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3729 } 3730 3731 return false; 3732 } 3733 3734 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3735 amd_kernel_code_t Header; 3736 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3737 3738 while (true) { 3739 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3740 // will set the current token to EndOfStatement. 3741 while(getLexer().is(AsmToken::EndOfStatement)) 3742 Lex(); 3743 3744 if (getLexer().isNot(AsmToken::Identifier)) 3745 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3746 3747 StringRef ID = getLexer().getTok().getIdentifier(); 3748 Lex(); 3749 3750 if (ID == ".end_amd_kernel_code_t") 3751 break; 3752 3753 if (ParseAMDKernelCodeTValue(ID, Header)) 3754 return true; 3755 } 3756 3757 getTargetStreamer().EmitAMDKernelCodeT(Header); 3758 3759 return false; 3760 } 3761 3762 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3763 if (getLexer().isNot(AsmToken::Identifier)) 3764 return TokError("expected symbol name"); 3765 3766 StringRef KernelName = Parser.getTok().getString(); 3767 3768 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3769 ELF::STT_AMDGPU_HSA_KERNEL); 3770 Lex(); 3771 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3772 KernelScope.initialize(getContext()); 3773 return false; 3774 } 3775 3776 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3777 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3778 return Error(getParser().getTok().getLoc(), 3779 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3780 "architectures"); 3781 } 3782 3783 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3784 3785 std::string ISAVersionStringFromSTI; 3786 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3787 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3788 3789 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3790 return Error(getParser().getTok().getLoc(), 3791 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3792 "arguments specified through the command line"); 3793 } 3794 3795 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3796 Lex(); 3797 3798 return false; 3799 } 3800 3801 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3802 const char *AssemblerDirectiveBegin; 3803 const char *AssemblerDirectiveEnd; 3804 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3805 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3806 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3807 HSAMD::V3::AssemblerDirectiveEnd) 3808 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3809 HSAMD::AssemblerDirectiveEnd); 3810 3811 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3812 return Error(getParser().getTok().getLoc(), 3813 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3814 "not available on non-amdhsa OSes")).str()); 3815 } 3816 3817 std::string HSAMetadataString; 3818 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3819 HSAMetadataString)) 3820 return true; 3821 3822 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3823 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3824 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3825 } else { 3826 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3827 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3828 } 3829 3830 return false; 3831 } 3832 3833 /// Common code to parse out a block of text (typically YAML) between start and 3834 /// end directives. 3835 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3836 const char *AssemblerDirectiveEnd, 3837 std::string &CollectString) { 3838 3839 raw_string_ostream CollectStream(CollectString); 3840 3841 getLexer().setSkipSpace(false); 3842 3843 bool FoundEnd = false; 3844 while (!getLexer().is(AsmToken::Eof)) { 3845 while (getLexer().is(AsmToken::Space)) { 3846 CollectStream << getLexer().getTok().getString(); 3847 Lex(); 3848 } 3849 3850 if (getLexer().is(AsmToken::Identifier)) { 3851 StringRef ID = getLexer().getTok().getIdentifier(); 3852 if (ID == AssemblerDirectiveEnd) { 3853 Lex(); 3854 FoundEnd = true; 3855 break; 3856 } 3857 } 3858 3859 CollectStream << Parser.parseStringToEndOfStatement() 3860 << getContext().getAsmInfo()->getSeparatorString(); 3861 3862 Parser.eatToEndOfStatement(); 3863 } 3864 3865 getLexer().setSkipSpace(true); 3866 3867 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3868 return TokError(Twine("expected directive ") + 3869 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3870 } 3871 3872 CollectStream.flush(); 3873 return false; 3874 } 3875 3876 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3877 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3878 std::string String; 3879 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3880 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3881 return true; 3882 3883 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3884 if (!PALMetadata->setFromString(String)) 3885 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3886 return false; 3887 } 3888 3889 /// Parse the assembler directive for old linear-format PAL metadata. 3890 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3891 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3892 return Error(getParser().getTok().getLoc(), 3893 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3894 "not available on non-amdpal OSes")).str()); 3895 } 3896 3897 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3898 PALMetadata->setLegacy(); 3899 for (;;) { 3900 uint32_t Key, Value; 3901 if (ParseAsAbsoluteExpression(Key)) { 3902 return TokError(Twine("invalid value in ") + 3903 Twine(PALMD::AssemblerDirective)); 3904 } 3905 if (getLexer().isNot(AsmToken::Comma)) { 3906 return TokError(Twine("expected an even number of values in ") + 3907 Twine(PALMD::AssemblerDirective)); 3908 } 3909 Lex(); 3910 if (ParseAsAbsoluteExpression(Value)) { 3911 return TokError(Twine("invalid value in ") + 3912 Twine(PALMD::AssemblerDirective)); 3913 } 3914 PALMetadata->setRegister(Key, Value); 3915 if (getLexer().isNot(AsmToken::Comma)) 3916 break; 3917 Lex(); 3918 } 3919 return false; 3920 } 3921 3922 /// ParseDirectiveAMDGPULDS 3923 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 3924 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 3925 if (getParser().checkForValidSection()) 3926 return true; 3927 3928 StringRef Name; 3929 SMLoc NameLoc = getLexer().getLoc(); 3930 if (getParser().parseIdentifier(Name)) 3931 return TokError("expected identifier in directive"); 3932 3933 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 3934 if (parseToken(AsmToken::Comma, "expected ','")) 3935 return true; 3936 3937 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 3938 3939 int64_t Size; 3940 SMLoc SizeLoc = getLexer().getLoc(); 3941 if (getParser().parseAbsoluteExpression(Size)) 3942 return true; 3943 if (Size < 0) 3944 return Error(SizeLoc, "size must be non-negative"); 3945 if (Size > LocalMemorySize) 3946 return Error(SizeLoc, "size is too large"); 3947 3948 int64_t Align = 4; 3949 if (getLexer().is(AsmToken::Comma)) { 3950 Lex(); 3951 SMLoc AlignLoc = getLexer().getLoc(); 3952 if (getParser().parseAbsoluteExpression(Align)) 3953 return true; 3954 if (Align < 0 || !isPowerOf2_64(Align)) 3955 return Error(AlignLoc, "alignment must be a power of two"); 3956 3957 // Alignment larger than the size of LDS is possible in theory, as long 3958 // as the linker manages to place to symbol at address 0, but we do want 3959 // to make sure the alignment fits nicely into a 32-bit integer. 3960 if (Align >= 1u << 31) 3961 return Error(AlignLoc, "alignment is too large"); 3962 } 3963 3964 if (parseToken(AsmToken::EndOfStatement, 3965 "unexpected token in '.amdgpu_lds' directive")) 3966 return true; 3967 3968 Symbol->redefineIfPossible(); 3969 if (!Symbol->isUndefined()) 3970 return Error(NameLoc, "invalid symbol redefinition"); 3971 3972 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 3973 return false; 3974 } 3975 3976 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3977 StringRef IDVal = DirectiveID.getString(); 3978 3979 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3980 if (IDVal == ".amdgcn_target") 3981 return ParseDirectiveAMDGCNTarget(); 3982 3983 if (IDVal == ".amdhsa_kernel") 3984 return ParseDirectiveAMDHSAKernel(); 3985 3986 // TODO: Restructure/combine with PAL metadata directive. 3987 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3988 return ParseDirectiveHSAMetadata(); 3989 } else { 3990 if (IDVal == ".hsa_code_object_version") 3991 return ParseDirectiveHSACodeObjectVersion(); 3992 3993 if (IDVal == ".hsa_code_object_isa") 3994 return ParseDirectiveHSACodeObjectISA(); 3995 3996 if (IDVal == ".amd_kernel_code_t") 3997 return ParseDirectiveAMDKernelCodeT(); 3998 3999 if (IDVal == ".amdgpu_hsa_kernel") 4000 return ParseDirectiveAMDGPUHsaKernel(); 4001 4002 if (IDVal == ".amd_amdgpu_isa") 4003 return ParseDirectiveISAVersion(); 4004 4005 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4006 return ParseDirectiveHSAMetadata(); 4007 } 4008 4009 if (IDVal == ".amdgpu_lds") 4010 return ParseDirectiveAMDGPULDS(); 4011 4012 if (IDVal == PALMD::AssemblerDirectiveBegin) 4013 return ParseDirectivePALMetadataBegin(); 4014 4015 if (IDVal == PALMD::AssemblerDirective) 4016 return ParseDirectivePALMetadata(); 4017 4018 return true; 4019 } 4020 4021 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4022 unsigned RegNo) const { 4023 4024 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4025 R.isValid(); ++R) { 4026 if (*R == RegNo) 4027 return isGFX9() || isGFX10(); 4028 } 4029 4030 // GFX10 has 2 more SGPRs 104 and 105. 4031 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4032 R.isValid(); ++R) { 4033 if (*R == RegNo) 4034 return hasSGPR104_SGPR105(); 4035 } 4036 4037 switch (RegNo) { 4038 case AMDGPU::SRC_SHARED_BASE: 4039 case AMDGPU::SRC_SHARED_LIMIT: 4040 case AMDGPU::SRC_PRIVATE_BASE: 4041 case AMDGPU::SRC_PRIVATE_LIMIT: 4042 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4043 return !isCI() && !isSI() && !isVI(); 4044 case AMDGPU::TBA: 4045 case AMDGPU::TBA_LO: 4046 case AMDGPU::TBA_HI: 4047 case AMDGPU::TMA: 4048 case AMDGPU::TMA_LO: 4049 case AMDGPU::TMA_HI: 4050 return !isGFX9() && !isGFX10(); 4051 case AMDGPU::XNACK_MASK: 4052 case AMDGPU::XNACK_MASK_LO: 4053 case AMDGPU::XNACK_MASK_HI: 4054 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4055 case AMDGPU::SGPR_NULL: 4056 return isGFX10(); 4057 default: 4058 break; 4059 } 4060 4061 if (isCI()) 4062 return true; 4063 4064 if (isSI() || isGFX10()) { 4065 // No flat_scr on SI. 4066 // On GFX10 flat scratch is not a valid register operand and can only be 4067 // accessed with s_setreg/s_getreg. 4068 switch (RegNo) { 4069 case AMDGPU::FLAT_SCR: 4070 case AMDGPU::FLAT_SCR_LO: 4071 case AMDGPU::FLAT_SCR_HI: 4072 return false; 4073 default: 4074 return true; 4075 } 4076 } 4077 4078 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4079 // SI/CI have. 4080 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4081 R.isValid(); ++R) { 4082 if (*R == RegNo) 4083 return hasSGPR102_SGPR103(); 4084 } 4085 4086 return true; 4087 } 4088 4089 OperandMatchResultTy 4090 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4091 OperandMode Mode) { 4092 // Try to parse with a custom parser 4093 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4094 4095 // If we successfully parsed the operand or if there as an error parsing, 4096 // we are done. 4097 // 4098 // If we are parsing after we reach EndOfStatement then this means we 4099 // are appending default values to the Operands list. This is only done 4100 // by custom parser, so we shouldn't continue on to the generic parsing. 4101 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4102 getLexer().is(AsmToken::EndOfStatement)) 4103 return ResTy; 4104 4105 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4106 unsigned Prefix = Operands.size(); 4107 SMLoc LBraceLoc = getTok().getLoc(); 4108 Parser.Lex(); // eat the '[' 4109 4110 for (;;) { 4111 ResTy = parseReg(Operands); 4112 if (ResTy != MatchOperand_Success) 4113 return ResTy; 4114 4115 if (getLexer().is(AsmToken::RBrac)) 4116 break; 4117 4118 if (getLexer().isNot(AsmToken::Comma)) 4119 return MatchOperand_ParseFail; 4120 Parser.Lex(); 4121 } 4122 4123 if (Operands.size() - Prefix > 1) { 4124 Operands.insert(Operands.begin() + Prefix, 4125 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4126 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4127 getTok().getLoc())); 4128 } 4129 4130 Parser.Lex(); // eat the ']' 4131 return MatchOperand_Success; 4132 } 4133 4134 return parseRegOrImm(Operands); 4135 } 4136 4137 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4138 // Clear any forced encodings from the previous instruction. 4139 setForcedEncodingSize(0); 4140 setForcedDPP(false); 4141 setForcedSDWA(false); 4142 4143 if (Name.endswith("_e64")) { 4144 setForcedEncodingSize(64); 4145 return Name.substr(0, Name.size() - 4); 4146 } else if (Name.endswith("_e32")) { 4147 setForcedEncodingSize(32); 4148 return Name.substr(0, Name.size() - 4); 4149 } else if (Name.endswith("_dpp")) { 4150 setForcedDPP(true); 4151 return Name.substr(0, Name.size() - 4); 4152 } else if (Name.endswith("_sdwa")) { 4153 setForcedSDWA(true); 4154 return Name.substr(0, Name.size() - 5); 4155 } 4156 return Name; 4157 } 4158 4159 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4160 StringRef Name, 4161 SMLoc NameLoc, OperandVector &Operands) { 4162 // Add the instruction mnemonic 4163 Name = parseMnemonicSuffix(Name); 4164 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4165 4166 bool IsMIMG = Name.startswith("image_"); 4167 4168 while (!getLexer().is(AsmToken::EndOfStatement)) { 4169 OperandMode Mode = OperandMode_Default; 4170 if (IsMIMG && isGFX10() && Operands.size() == 2) 4171 Mode = OperandMode_NSA; 4172 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4173 4174 // Eat the comma or space if there is one. 4175 if (getLexer().is(AsmToken::Comma)) 4176 Parser.Lex(); 4177 4178 switch (Res) { 4179 case MatchOperand_Success: break; 4180 case MatchOperand_ParseFail: 4181 // FIXME: use real operand location rather than the current location. 4182 Error(getLexer().getLoc(), "failed parsing operand."); 4183 while (!getLexer().is(AsmToken::EndOfStatement)) { 4184 Parser.Lex(); 4185 } 4186 return true; 4187 case MatchOperand_NoMatch: 4188 // FIXME: use real operand location rather than the current location. 4189 Error(getLexer().getLoc(), "not a valid operand."); 4190 while (!getLexer().is(AsmToken::EndOfStatement)) { 4191 Parser.Lex(); 4192 } 4193 return true; 4194 } 4195 } 4196 4197 return false; 4198 } 4199 4200 //===----------------------------------------------------------------------===// 4201 // Utility functions 4202 //===----------------------------------------------------------------------===// 4203 4204 OperandMatchResultTy 4205 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4206 4207 if (!trySkipId(Prefix, AsmToken::Colon)) 4208 return MatchOperand_NoMatch; 4209 4210 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4211 } 4212 4213 OperandMatchResultTy 4214 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4215 AMDGPUOperand::ImmTy ImmTy, 4216 bool (*ConvertResult)(int64_t&)) { 4217 SMLoc S = getLoc(); 4218 int64_t Value = 0; 4219 4220 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4221 if (Res != MatchOperand_Success) 4222 return Res; 4223 4224 if (ConvertResult && !ConvertResult(Value)) { 4225 Error(S, "invalid " + StringRef(Prefix) + " value."); 4226 } 4227 4228 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4229 return MatchOperand_Success; 4230 } 4231 4232 OperandMatchResultTy 4233 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4234 OperandVector &Operands, 4235 AMDGPUOperand::ImmTy ImmTy, 4236 bool (*ConvertResult)(int64_t&)) { 4237 SMLoc S = getLoc(); 4238 if (!trySkipId(Prefix, AsmToken::Colon)) 4239 return MatchOperand_NoMatch; 4240 4241 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4242 return MatchOperand_ParseFail; 4243 4244 unsigned Val = 0; 4245 const unsigned MaxSize = 4; 4246 4247 // FIXME: How to verify the number of elements matches the number of src 4248 // operands? 4249 for (int I = 0; ; ++I) { 4250 int64_t Op; 4251 SMLoc Loc = getLoc(); 4252 if (!parseExpr(Op)) 4253 return MatchOperand_ParseFail; 4254 4255 if (Op != 0 && Op != 1) { 4256 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4257 return MatchOperand_ParseFail; 4258 } 4259 4260 Val |= (Op << I); 4261 4262 if (trySkipToken(AsmToken::RBrac)) 4263 break; 4264 4265 if (I + 1 == MaxSize) { 4266 Error(getLoc(), "expected a closing square bracket"); 4267 return MatchOperand_ParseFail; 4268 } 4269 4270 if (!skipToken(AsmToken::Comma, "expected a comma")) 4271 return MatchOperand_ParseFail; 4272 } 4273 4274 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4275 return MatchOperand_Success; 4276 } 4277 4278 OperandMatchResultTy 4279 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4280 AMDGPUOperand::ImmTy ImmTy) { 4281 int64_t Bit = 0; 4282 SMLoc S = Parser.getTok().getLoc(); 4283 4284 // We are at the end of the statement, and this is a default argument, so 4285 // use a default value. 4286 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4287 switch(getLexer().getKind()) { 4288 case AsmToken::Identifier: { 4289 StringRef Tok = Parser.getTok().getString(); 4290 if (Tok == Name) { 4291 if (Tok == "r128" && isGFX9()) 4292 Error(S, "r128 modifier is not supported on this GPU"); 4293 if (Tok == "a16" && !isGFX9()) 4294 Error(S, "a16 modifier is not supported on this GPU"); 4295 Bit = 1; 4296 Parser.Lex(); 4297 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4298 Bit = 0; 4299 Parser.Lex(); 4300 } else { 4301 return MatchOperand_NoMatch; 4302 } 4303 break; 4304 } 4305 default: 4306 return MatchOperand_NoMatch; 4307 } 4308 } 4309 4310 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4311 return MatchOperand_ParseFail; 4312 4313 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4314 return MatchOperand_Success; 4315 } 4316 4317 static void addOptionalImmOperand( 4318 MCInst& Inst, const OperandVector& Operands, 4319 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4320 AMDGPUOperand::ImmTy ImmT, 4321 int64_t Default = 0) { 4322 auto i = OptionalIdx.find(ImmT); 4323 if (i != OptionalIdx.end()) { 4324 unsigned Idx = i->second; 4325 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4326 } else { 4327 Inst.addOperand(MCOperand::createImm(Default)); 4328 } 4329 } 4330 4331 OperandMatchResultTy 4332 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4333 if (getLexer().isNot(AsmToken::Identifier)) { 4334 return MatchOperand_NoMatch; 4335 } 4336 StringRef Tok = Parser.getTok().getString(); 4337 if (Tok != Prefix) { 4338 return MatchOperand_NoMatch; 4339 } 4340 4341 Parser.Lex(); 4342 if (getLexer().isNot(AsmToken::Colon)) { 4343 return MatchOperand_ParseFail; 4344 } 4345 4346 Parser.Lex(); 4347 if (getLexer().isNot(AsmToken::Identifier)) { 4348 return MatchOperand_ParseFail; 4349 } 4350 4351 Value = Parser.getTok().getString(); 4352 return MatchOperand_Success; 4353 } 4354 4355 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4356 // values to live in a joint format operand in the MCInst encoding. 4357 OperandMatchResultTy 4358 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4359 SMLoc S = Parser.getTok().getLoc(); 4360 int64_t Dfmt = 0, Nfmt = 0; 4361 // dfmt and nfmt can appear in either order, and each is optional. 4362 bool GotDfmt = false, GotNfmt = false; 4363 while (!GotDfmt || !GotNfmt) { 4364 if (!GotDfmt) { 4365 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4366 if (Res != MatchOperand_NoMatch) { 4367 if (Res != MatchOperand_Success) 4368 return Res; 4369 if (Dfmt >= 16) { 4370 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4371 return MatchOperand_ParseFail; 4372 } 4373 GotDfmt = true; 4374 Parser.Lex(); 4375 continue; 4376 } 4377 } 4378 if (!GotNfmt) { 4379 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4380 if (Res != MatchOperand_NoMatch) { 4381 if (Res != MatchOperand_Success) 4382 return Res; 4383 if (Nfmt >= 8) { 4384 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4385 return MatchOperand_ParseFail; 4386 } 4387 GotNfmt = true; 4388 Parser.Lex(); 4389 continue; 4390 } 4391 } 4392 break; 4393 } 4394 if (!GotDfmt && !GotNfmt) 4395 return MatchOperand_NoMatch; 4396 auto Format = Dfmt | Nfmt << 4; 4397 Operands.push_back( 4398 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4399 return MatchOperand_Success; 4400 } 4401 4402 //===----------------------------------------------------------------------===// 4403 // ds 4404 //===----------------------------------------------------------------------===// 4405 4406 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4407 const OperandVector &Operands) { 4408 OptionalImmIndexMap OptionalIdx; 4409 4410 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4411 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4412 4413 // Add the register arguments 4414 if (Op.isReg()) { 4415 Op.addRegOperands(Inst, 1); 4416 continue; 4417 } 4418 4419 // Handle optional arguments 4420 OptionalIdx[Op.getImmTy()] = i; 4421 } 4422 4423 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4425 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4426 4427 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4428 } 4429 4430 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4431 bool IsGdsHardcoded) { 4432 OptionalImmIndexMap OptionalIdx; 4433 4434 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4435 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4436 4437 // Add the register arguments 4438 if (Op.isReg()) { 4439 Op.addRegOperands(Inst, 1); 4440 continue; 4441 } 4442 4443 if (Op.isToken() && Op.getToken() == "gds") { 4444 IsGdsHardcoded = true; 4445 continue; 4446 } 4447 4448 // Handle optional arguments 4449 OptionalIdx[Op.getImmTy()] = i; 4450 } 4451 4452 AMDGPUOperand::ImmTy OffsetType = 4453 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4454 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4455 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4456 AMDGPUOperand::ImmTyOffset; 4457 4458 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4459 4460 if (!IsGdsHardcoded) { 4461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4462 } 4463 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4464 } 4465 4466 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4467 OptionalImmIndexMap OptionalIdx; 4468 4469 unsigned OperandIdx[4]; 4470 unsigned EnMask = 0; 4471 int SrcIdx = 0; 4472 4473 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4474 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4475 4476 // Add the register arguments 4477 if (Op.isReg()) { 4478 assert(SrcIdx < 4); 4479 OperandIdx[SrcIdx] = Inst.size(); 4480 Op.addRegOperands(Inst, 1); 4481 ++SrcIdx; 4482 continue; 4483 } 4484 4485 if (Op.isOff()) { 4486 assert(SrcIdx < 4); 4487 OperandIdx[SrcIdx] = Inst.size(); 4488 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4489 ++SrcIdx; 4490 continue; 4491 } 4492 4493 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4494 Op.addImmOperands(Inst, 1); 4495 continue; 4496 } 4497 4498 if (Op.isToken() && Op.getToken() == "done") 4499 continue; 4500 4501 // Handle optional arguments 4502 OptionalIdx[Op.getImmTy()] = i; 4503 } 4504 4505 assert(SrcIdx == 4); 4506 4507 bool Compr = false; 4508 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4509 Compr = true; 4510 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4511 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4512 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4513 } 4514 4515 for (auto i = 0; i < SrcIdx; ++i) { 4516 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4517 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4518 } 4519 } 4520 4521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4522 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4523 4524 Inst.addOperand(MCOperand::createImm(EnMask)); 4525 } 4526 4527 //===----------------------------------------------------------------------===// 4528 // s_waitcnt 4529 //===----------------------------------------------------------------------===// 4530 4531 static bool 4532 encodeCnt( 4533 const AMDGPU::IsaVersion ISA, 4534 int64_t &IntVal, 4535 int64_t CntVal, 4536 bool Saturate, 4537 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4538 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4539 { 4540 bool Failed = false; 4541 4542 IntVal = encode(ISA, IntVal, CntVal); 4543 if (CntVal != decode(ISA, IntVal)) { 4544 if (Saturate) { 4545 IntVal = encode(ISA, IntVal, -1); 4546 } else { 4547 Failed = true; 4548 } 4549 } 4550 return Failed; 4551 } 4552 4553 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4554 4555 SMLoc CntLoc = getLoc(); 4556 StringRef CntName = getTokenStr(); 4557 4558 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4559 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4560 return false; 4561 4562 int64_t CntVal; 4563 SMLoc ValLoc = getLoc(); 4564 if (!parseExpr(CntVal)) 4565 return false; 4566 4567 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4568 4569 bool Failed = true; 4570 bool Sat = CntName.endswith("_sat"); 4571 4572 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4573 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4574 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4575 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4576 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4577 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4578 } else { 4579 Error(CntLoc, "invalid counter name " + CntName); 4580 return false; 4581 } 4582 4583 if (Failed) { 4584 Error(ValLoc, "too large value for " + CntName); 4585 return false; 4586 } 4587 4588 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4589 return false; 4590 4591 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4592 if (isToken(AsmToken::EndOfStatement)) { 4593 Error(getLoc(), "expected a counter name"); 4594 return false; 4595 } 4596 } 4597 4598 return true; 4599 } 4600 4601 OperandMatchResultTy 4602 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4603 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4604 int64_t Waitcnt = getWaitcntBitMask(ISA); 4605 SMLoc S = getLoc(); 4606 4607 // If parse failed, do not return error code 4608 // to avoid excessive error messages. 4609 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4610 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4611 } else { 4612 parseExpr(Waitcnt); 4613 } 4614 4615 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4616 return MatchOperand_Success; 4617 } 4618 4619 bool 4620 AMDGPUOperand::isSWaitCnt() const { 4621 return isImm(); 4622 } 4623 4624 //===----------------------------------------------------------------------===// 4625 // hwreg 4626 //===----------------------------------------------------------------------===// 4627 4628 bool 4629 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4630 int64_t &Offset, 4631 int64_t &Width) { 4632 using namespace llvm::AMDGPU::Hwreg; 4633 4634 // The register may be specified by name or using a numeric code 4635 if (isToken(AsmToken::Identifier) && 4636 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4637 HwReg.IsSymbolic = true; 4638 lex(); // skip message name 4639 } else if (!parseExpr(HwReg.Id)) { 4640 return false; 4641 } 4642 4643 if (trySkipToken(AsmToken::RParen)) 4644 return true; 4645 4646 // parse optional params 4647 return 4648 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4649 parseExpr(Offset) && 4650 skipToken(AsmToken::Comma, "expected a comma") && 4651 parseExpr(Width) && 4652 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4653 } 4654 4655 void 4656 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4657 const int64_t Offset, 4658 const int64_t Width, 4659 const SMLoc Loc) { 4660 4661 using namespace llvm::AMDGPU::Hwreg; 4662 4663 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4664 Error(Loc, "specified hardware register is not supported on this GPU"); 4665 } else if (!isValidHwreg(HwReg.Id)) { 4666 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4667 } else if (!isValidHwregOffset(Offset)) { 4668 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4669 } else if (!isValidHwregWidth(Width)) { 4670 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4671 } 4672 } 4673 4674 OperandMatchResultTy 4675 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4676 using namespace llvm::AMDGPU::Hwreg; 4677 4678 int64_t ImmVal = 0; 4679 SMLoc Loc = getLoc(); 4680 4681 // If parse failed, do not return error code 4682 // to avoid excessive error messages. 4683 if (trySkipId("hwreg", AsmToken::LParen)) { 4684 OperandInfoTy HwReg(ID_UNKNOWN_); 4685 int64_t Offset = OFFSET_DEFAULT_; 4686 int64_t Width = WIDTH_DEFAULT_; 4687 if (parseHwregBody(HwReg, Offset, Width)) { 4688 validateHwreg(HwReg, Offset, Width, Loc); 4689 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4690 } 4691 } else if (parseExpr(ImmVal)) { 4692 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4693 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4694 } 4695 4696 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4697 return MatchOperand_Success; 4698 } 4699 4700 bool AMDGPUOperand::isHwreg() const { 4701 return isImmTy(ImmTyHwreg); 4702 } 4703 4704 //===----------------------------------------------------------------------===// 4705 // sendmsg 4706 //===----------------------------------------------------------------------===// 4707 4708 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4709 using namespace llvm::AMDGPU::SendMsg; 4710 4711 if (Parser.getTok().getString() != "sendmsg") 4712 return true; 4713 Parser.Lex(); 4714 4715 if (getLexer().isNot(AsmToken::LParen)) 4716 return true; 4717 Parser.Lex(); 4718 4719 if (getLexer().is(AsmToken::Identifier)) { 4720 Msg.IsSymbolic = true; 4721 Msg.Id = ID_UNKNOWN_; 4722 const std::string tok = Parser.getTok().getString(); 4723 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4724 switch(i) { 4725 default: continue; // Omit gaps. 4726 case ID_GS_ALLOC_REQ: 4727 if (isSI() || isCI() || isVI()) 4728 continue; 4729 break; 4730 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4731 case ID_SYSMSG: break; 4732 } 4733 if (tok == IdSymbolic[i]) { 4734 Msg.Id = i; 4735 break; 4736 } 4737 } 4738 Parser.Lex(); 4739 } else { 4740 Msg.IsSymbolic = false; 4741 if (getLexer().isNot(AsmToken::Integer)) 4742 return true; 4743 if (getParser().parseAbsoluteExpression(Msg.Id)) 4744 return true; 4745 if (getLexer().is(AsmToken::Integer)) 4746 if (getParser().parseAbsoluteExpression(Msg.Id)) 4747 Msg.Id = ID_UNKNOWN_; 4748 } 4749 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4750 return false; 4751 4752 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4753 if (getLexer().isNot(AsmToken::RParen)) 4754 return true; 4755 Parser.Lex(); 4756 return false; 4757 } 4758 4759 if (getLexer().isNot(AsmToken::Comma)) 4760 return true; 4761 Parser.Lex(); 4762 4763 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4764 Operation.Id = ID_UNKNOWN_; 4765 if (getLexer().is(AsmToken::Identifier)) { 4766 Operation.IsSymbolic = true; 4767 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4768 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4769 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4770 const StringRef Tok = Parser.getTok().getString(); 4771 for (int i = F; i < L; ++i) { 4772 if (Tok == S[i]) { 4773 Operation.Id = i; 4774 break; 4775 } 4776 } 4777 Parser.Lex(); 4778 } else { 4779 Operation.IsSymbolic = false; 4780 if (getLexer().isNot(AsmToken::Integer)) 4781 return true; 4782 if (getParser().parseAbsoluteExpression(Operation.Id)) 4783 return true; 4784 } 4785 4786 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4787 // Stream id is optional. 4788 if (getLexer().is(AsmToken::RParen)) { 4789 Parser.Lex(); 4790 return false; 4791 } 4792 4793 if (getLexer().isNot(AsmToken::Comma)) 4794 return true; 4795 Parser.Lex(); 4796 4797 if (getLexer().isNot(AsmToken::Integer)) 4798 return true; 4799 if (getParser().parseAbsoluteExpression(StreamId)) 4800 return true; 4801 } 4802 4803 if (getLexer().isNot(AsmToken::RParen)) 4804 return true; 4805 Parser.Lex(); 4806 return false; 4807 } 4808 4809 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4810 if (getLexer().getKind() != AsmToken::Identifier) 4811 return MatchOperand_NoMatch; 4812 4813 StringRef Str = Parser.getTok().getString(); 4814 int Slot = StringSwitch<int>(Str) 4815 .Case("p10", 0) 4816 .Case("p20", 1) 4817 .Case("p0", 2) 4818 .Default(-1); 4819 4820 SMLoc S = Parser.getTok().getLoc(); 4821 if (Slot == -1) 4822 return MatchOperand_ParseFail; 4823 4824 Parser.Lex(); 4825 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4826 AMDGPUOperand::ImmTyInterpSlot)); 4827 return MatchOperand_Success; 4828 } 4829 4830 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4831 if (getLexer().getKind() != AsmToken::Identifier) 4832 return MatchOperand_NoMatch; 4833 4834 StringRef Str = Parser.getTok().getString(); 4835 if (!Str.startswith("attr")) 4836 return MatchOperand_NoMatch; 4837 4838 StringRef Chan = Str.take_back(2); 4839 int AttrChan = StringSwitch<int>(Chan) 4840 .Case(".x", 0) 4841 .Case(".y", 1) 4842 .Case(".z", 2) 4843 .Case(".w", 3) 4844 .Default(-1); 4845 if (AttrChan == -1) 4846 return MatchOperand_ParseFail; 4847 4848 Str = Str.drop_back(2).drop_front(4); 4849 4850 uint8_t Attr; 4851 if (Str.getAsInteger(10, Attr)) 4852 return MatchOperand_ParseFail; 4853 4854 SMLoc S = Parser.getTok().getLoc(); 4855 Parser.Lex(); 4856 if (Attr > 63) { 4857 Error(S, "out of bounds attr"); 4858 return MatchOperand_Success; 4859 } 4860 4861 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4862 4863 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4864 AMDGPUOperand::ImmTyInterpAttr)); 4865 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4866 AMDGPUOperand::ImmTyAttrChan)); 4867 return MatchOperand_Success; 4868 } 4869 4870 void AMDGPUAsmParser::errorExpTgt() { 4871 Error(Parser.getTok().getLoc(), "invalid exp target"); 4872 } 4873 4874 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4875 uint8_t &Val) { 4876 if (Str == "null") { 4877 Val = 9; 4878 return MatchOperand_Success; 4879 } 4880 4881 if (Str.startswith("mrt")) { 4882 Str = Str.drop_front(3); 4883 if (Str == "z") { // == mrtz 4884 Val = 8; 4885 return MatchOperand_Success; 4886 } 4887 4888 if (Str.getAsInteger(10, Val)) 4889 return MatchOperand_ParseFail; 4890 4891 if (Val > 7) 4892 errorExpTgt(); 4893 4894 return MatchOperand_Success; 4895 } 4896 4897 if (Str.startswith("pos")) { 4898 Str = Str.drop_front(3); 4899 if (Str.getAsInteger(10, Val)) 4900 return MatchOperand_ParseFail; 4901 4902 if (Val > 4 || (Val == 4 && !isGFX10())) 4903 errorExpTgt(); 4904 4905 Val += 12; 4906 return MatchOperand_Success; 4907 } 4908 4909 if (isGFX10() && Str == "prim") { 4910 Val = 20; 4911 return MatchOperand_Success; 4912 } 4913 4914 if (Str.startswith("param")) { 4915 Str = Str.drop_front(5); 4916 if (Str.getAsInteger(10, Val)) 4917 return MatchOperand_ParseFail; 4918 4919 if (Val >= 32) 4920 errorExpTgt(); 4921 4922 Val += 32; 4923 return MatchOperand_Success; 4924 } 4925 4926 if (Str.startswith("invalid_target_")) { 4927 Str = Str.drop_front(15); 4928 if (Str.getAsInteger(10, Val)) 4929 return MatchOperand_ParseFail; 4930 4931 errorExpTgt(); 4932 return MatchOperand_Success; 4933 } 4934 4935 return MatchOperand_NoMatch; 4936 } 4937 4938 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4939 uint8_t Val; 4940 StringRef Str = Parser.getTok().getString(); 4941 4942 auto Res = parseExpTgtImpl(Str, Val); 4943 if (Res != MatchOperand_Success) 4944 return Res; 4945 4946 SMLoc S = Parser.getTok().getLoc(); 4947 Parser.Lex(); 4948 4949 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4950 AMDGPUOperand::ImmTyExpTgt)); 4951 return MatchOperand_Success; 4952 } 4953 4954 OperandMatchResultTy 4955 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4956 using namespace llvm::AMDGPU::SendMsg; 4957 4958 int64_t Imm16Val = 0; 4959 SMLoc S = Parser.getTok().getLoc(); 4960 4961 switch(getLexer().getKind()) { 4962 default: 4963 return MatchOperand_NoMatch; 4964 case AsmToken::Integer: 4965 // The operand can be an integer value. 4966 if (getParser().parseAbsoluteExpression(Imm16Val)) 4967 return MatchOperand_NoMatch; 4968 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4969 Error(S, "invalid immediate: only 16-bit values are legal"); 4970 // Do not return error code, but create an imm operand anyway and proceed 4971 // to the next operand, if any. That avoids unneccessary error messages. 4972 } 4973 break; 4974 case AsmToken::Identifier: { 4975 OperandInfoTy Msg(ID_UNKNOWN_); 4976 OperandInfoTy Operation(OP_UNKNOWN_); 4977 int64_t StreamId = STREAM_ID_DEFAULT_; 4978 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4979 return MatchOperand_ParseFail; 4980 do { 4981 // Validate and encode message ID. 4982 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4983 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4984 || Msg.Id == ID_SYSMSG)) { 4985 if (Msg.IsSymbolic) 4986 Error(S, "invalid/unsupported symbolic name of message"); 4987 else 4988 Error(S, "invalid/unsupported code of message"); 4989 break; 4990 } 4991 Imm16Val = (Msg.Id << ID_SHIFT_); 4992 // Validate and encode operation ID. 4993 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4994 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4995 if (Operation.IsSymbolic) 4996 Error(S, "invalid symbolic name of GS_OP"); 4997 else 4998 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4999 break; 5000 } 5001 if (Operation.Id == OP_GS_NOP 5002 && Msg.Id != ID_GS_DONE) { 5003 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 5004 break; 5005 } 5006 Imm16Val |= (Operation.Id << OP_SHIFT_); 5007 } 5008 if (Msg.Id == ID_SYSMSG) { 5009 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 5010 if (Operation.IsSymbolic) 5011 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 5012 else 5013 Error(S, "invalid/unsupported code of SYSMSG_OP"); 5014 break; 5015 } 5016 Imm16Val |= (Operation.Id << OP_SHIFT_); 5017 } 5018 // Validate and encode stream ID. 5019 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 5020 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 5021 Error(S, "invalid stream id: only 2-bit values are legal"); 5022 break; 5023 } 5024 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 5025 } 5026 } while (false); 5027 } 5028 break; 5029 } 5030 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 5031 return MatchOperand_Success; 5032 } 5033 5034 bool AMDGPUOperand::isSendMsg() const { 5035 return isImmTy(ImmTySendMsg); 5036 } 5037 5038 //===----------------------------------------------------------------------===// 5039 // parser helpers 5040 //===----------------------------------------------------------------------===// 5041 5042 bool 5043 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5044 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5045 } 5046 5047 bool 5048 AMDGPUAsmParser::isId(const StringRef Id) const { 5049 return isId(getToken(), Id); 5050 } 5051 5052 bool 5053 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5054 return getTokenKind() == Kind; 5055 } 5056 5057 bool 5058 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5059 if (isId(Id)) { 5060 lex(); 5061 return true; 5062 } 5063 return false; 5064 } 5065 5066 bool 5067 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5068 if (isId(Id) && peekToken().is(Kind)) { 5069 lex(); 5070 lex(); 5071 return true; 5072 } 5073 return false; 5074 } 5075 5076 bool 5077 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5078 if (isToken(Kind)) { 5079 lex(); 5080 return true; 5081 } 5082 return false; 5083 } 5084 5085 bool 5086 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5087 const StringRef ErrMsg) { 5088 if (!trySkipToken(Kind)) { 5089 Error(getLoc(), ErrMsg); 5090 return false; 5091 } 5092 return true; 5093 } 5094 5095 bool 5096 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5097 return !getParser().parseAbsoluteExpression(Imm); 5098 } 5099 5100 bool 5101 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5102 if (isToken(AsmToken::String)) { 5103 Val = getToken().getStringContents(); 5104 lex(); 5105 return true; 5106 } else { 5107 Error(getLoc(), ErrMsg); 5108 return false; 5109 } 5110 } 5111 5112 AsmToken 5113 AMDGPUAsmParser::getToken() const { 5114 return Parser.getTok(); 5115 } 5116 5117 AsmToken 5118 AMDGPUAsmParser::peekToken() { 5119 return getLexer().peekTok(); 5120 } 5121 5122 void 5123 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5124 auto TokCount = getLexer().peekTokens(Tokens); 5125 5126 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5127 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5128 } 5129 5130 AsmToken::TokenKind 5131 AMDGPUAsmParser::getTokenKind() const { 5132 return getLexer().getKind(); 5133 } 5134 5135 SMLoc 5136 AMDGPUAsmParser::getLoc() const { 5137 return getToken().getLoc(); 5138 } 5139 5140 StringRef 5141 AMDGPUAsmParser::getTokenStr() const { 5142 return getToken().getString(); 5143 } 5144 5145 void 5146 AMDGPUAsmParser::lex() { 5147 Parser.Lex(); 5148 } 5149 5150 //===----------------------------------------------------------------------===// 5151 // swizzle 5152 //===----------------------------------------------------------------------===// 5153 5154 LLVM_READNONE 5155 static unsigned 5156 encodeBitmaskPerm(const unsigned AndMask, 5157 const unsigned OrMask, 5158 const unsigned XorMask) { 5159 using namespace llvm::AMDGPU::Swizzle; 5160 5161 return BITMASK_PERM_ENC | 5162 (AndMask << BITMASK_AND_SHIFT) | 5163 (OrMask << BITMASK_OR_SHIFT) | 5164 (XorMask << BITMASK_XOR_SHIFT); 5165 } 5166 5167 bool 5168 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5169 const unsigned MinVal, 5170 const unsigned MaxVal, 5171 const StringRef ErrMsg) { 5172 for (unsigned i = 0; i < OpNum; ++i) { 5173 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5174 return false; 5175 } 5176 SMLoc ExprLoc = Parser.getTok().getLoc(); 5177 if (!parseExpr(Op[i])) { 5178 return false; 5179 } 5180 if (Op[i] < MinVal || Op[i] > MaxVal) { 5181 Error(ExprLoc, ErrMsg); 5182 return false; 5183 } 5184 } 5185 5186 return true; 5187 } 5188 5189 bool 5190 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5191 using namespace llvm::AMDGPU::Swizzle; 5192 5193 int64_t Lane[LANE_NUM]; 5194 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5195 "expected a 2-bit lane id")) { 5196 Imm = QUAD_PERM_ENC; 5197 for (unsigned I = 0; I < LANE_NUM; ++I) { 5198 Imm |= Lane[I] << (LANE_SHIFT * I); 5199 } 5200 return true; 5201 } 5202 return false; 5203 } 5204 5205 bool 5206 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5207 using namespace llvm::AMDGPU::Swizzle; 5208 5209 SMLoc S = Parser.getTok().getLoc(); 5210 int64_t GroupSize; 5211 int64_t LaneIdx; 5212 5213 if (!parseSwizzleOperands(1, &GroupSize, 5214 2, 32, 5215 "group size must be in the interval [2,32]")) { 5216 return false; 5217 } 5218 if (!isPowerOf2_64(GroupSize)) { 5219 Error(S, "group size must be a power of two"); 5220 return false; 5221 } 5222 if (parseSwizzleOperands(1, &LaneIdx, 5223 0, GroupSize - 1, 5224 "lane id must be in the interval [0,group size - 1]")) { 5225 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5226 return true; 5227 } 5228 return false; 5229 } 5230 5231 bool 5232 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5233 using namespace llvm::AMDGPU::Swizzle; 5234 5235 SMLoc S = Parser.getTok().getLoc(); 5236 int64_t GroupSize; 5237 5238 if (!parseSwizzleOperands(1, &GroupSize, 5239 2, 32, "group size must be in the interval [2,32]")) { 5240 return false; 5241 } 5242 if (!isPowerOf2_64(GroupSize)) { 5243 Error(S, "group size must be a power of two"); 5244 return false; 5245 } 5246 5247 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5248 return true; 5249 } 5250 5251 bool 5252 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5253 using namespace llvm::AMDGPU::Swizzle; 5254 5255 SMLoc S = Parser.getTok().getLoc(); 5256 int64_t GroupSize; 5257 5258 if (!parseSwizzleOperands(1, &GroupSize, 5259 1, 16, "group size must be in the interval [1,16]")) { 5260 return false; 5261 } 5262 if (!isPowerOf2_64(GroupSize)) { 5263 Error(S, "group size must be a power of two"); 5264 return false; 5265 } 5266 5267 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5268 return true; 5269 } 5270 5271 bool 5272 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5273 using namespace llvm::AMDGPU::Swizzle; 5274 5275 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5276 return false; 5277 } 5278 5279 StringRef Ctl; 5280 SMLoc StrLoc = Parser.getTok().getLoc(); 5281 if (!parseString(Ctl)) { 5282 return false; 5283 } 5284 if (Ctl.size() != BITMASK_WIDTH) { 5285 Error(StrLoc, "expected a 5-character mask"); 5286 return false; 5287 } 5288 5289 unsigned AndMask = 0; 5290 unsigned OrMask = 0; 5291 unsigned XorMask = 0; 5292 5293 for (size_t i = 0; i < Ctl.size(); ++i) { 5294 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5295 switch(Ctl[i]) { 5296 default: 5297 Error(StrLoc, "invalid mask"); 5298 return false; 5299 case '0': 5300 break; 5301 case '1': 5302 OrMask |= Mask; 5303 break; 5304 case 'p': 5305 AndMask |= Mask; 5306 break; 5307 case 'i': 5308 AndMask |= Mask; 5309 XorMask |= Mask; 5310 break; 5311 } 5312 } 5313 5314 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5315 return true; 5316 } 5317 5318 bool 5319 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5320 5321 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5322 5323 if (!parseExpr(Imm)) { 5324 return false; 5325 } 5326 if (!isUInt<16>(Imm)) { 5327 Error(OffsetLoc, "expected a 16-bit offset"); 5328 return false; 5329 } 5330 return true; 5331 } 5332 5333 bool 5334 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5335 using namespace llvm::AMDGPU::Swizzle; 5336 5337 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5338 5339 SMLoc ModeLoc = Parser.getTok().getLoc(); 5340 bool Ok = false; 5341 5342 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5343 Ok = parseSwizzleQuadPerm(Imm); 5344 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5345 Ok = parseSwizzleBitmaskPerm(Imm); 5346 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5347 Ok = parseSwizzleBroadcast(Imm); 5348 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5349 Ok = parseSwizzleSwap(Imm); 5350 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5351 Ok = parseSwizzleReverse(Imm); 5352 } else { 5353 Error(ModeLoc, "expected a swizzle mode"); 5354 } 5355 5356 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5357 } 5358 5359 return false; 5360 } 5361 5362 OperandMatchResultTy 5363 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5364 SMLoc S = Parser.getTok().getLoc(); 5365 int64_t Imm = 0; 5366 5367 if (trySkipId("offset")) { 5368 5369 bool Ok = false; 5370 if (skipToken(AsmToken::Colon, "expected a colon")) { 5371 if (trySkipId("swizzle")) { 5372 Ok = parseSwizzleMacro(Imm); 5373 } else { 5374 Ok = parseSwizzleOffset(Imm); 5375 } 5376 } 5377 5378 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5379 5380 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5381 } else { 5382 // Swizzle "offset" operand is optional. 5383 // If it is omitted, try parsing other optional operands. 5384 return parseOptionalOpr(Operands); 5385 } 5386 } 5387 5388 bool 5389 AMDGPUOperand::isSwizzle() const { 5390 return isImmTy(ImmTySwizzle); 5391 } 5392 5393 //===----------------------------------------------------------------------===// 5394 // VGPR Index Mode 5395 //===----------------------------------------------------------------------===// 5396 5397 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5398 5399 using namespace llvm::AMDGPU::VGPRIndexMode; 5400 5401 if (trySkipToken(AsmToken::RParen)) { 5402 return OFF; 5403 } 5404 5405 int64_t Imm = 0; 5406 5407 while (true) { 5408 unsigned Mode = 0; 5409 SMLoc S = Parser.getTok().getLoc(); 5410 5411 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5412 if (trySkipId(IdSymbolic[ModeId])) { 5413 Mode = 1 << ModeId; 5414 break; 5415 } 5416 } 5417 5418 if (Mode == 0) { 5419 Error(S, (Imm == 0)? 5420 "expected a VGPR index mode or a closing parenthesis" : 5421 "expected a VGPR index mode"); 5422 break; 5423 } 5424 5425 if (Imm & Mode) { 5426 Error(S, "duplicate VGPR index mode"); 5427 break; 5428 } 5429 Imm |= Mode; 5430 5431 if (trySkipToken(AsmToken::RParen)) 5432 break; 5433 if (!skipToken(AsmToken::Comma, 5434 "expected a comma or a closing parenthesis")) 5435 break; 5436 } 5437 5438 return Imm; 5439 } 5440 5441 OperandMatchResultTy 5442 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5443 5444 int64_t Imm = 0; 5445 SMLoc S = Parser.getTok().getLoc(); 5446 5447 if (getLexer().getKind() == AsmToken::Identifier && 5448 Parser.getTok().getString() == "gpr_idx" && 5449 getLexer().peekTok().is(AsmToken::LParen)) { 5450 5451 Parser.Lex(); 5452 Parser.Lex(); 5453 5454 // If parse failed, trigger an error but do not return error code 5455 // to avoid excessive error messages. 5456 Imm = parseGPRIdxMacro(); 5457 5458 } else { 5459 if (getParser().parseAbsoluteExpression(Imm)) 5460 return MatchOperand_NoMatch; 5461 if (Imm < 0 || !isUInt<4>(Imm)) { 5462 Error(S, "invalid immediate: only 4-bit values are legal"); 5463 } 5464 } 5465 5466 Operands.push_back( 5467 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5468 return MatchOperand_Success; 5469 } 5470 5471 bool AMDGPUOperand::isGPRIdxMode() const { 5472 return isImmTy(ImmTyGprIdxMode); 5473 } 5474 5475 //===----------------------------------------------------------------------===// 5476 // sopp branch targets 5477 //===----------------------------------------------------------------------===// 5478 5479 OperandMatchResultTy 5480 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5481 SMLoc S = Parser.getTok().getLoc(); 5482 5483 switch (getLexer().getKind()) { 5484 default: return MatchOperand_ParseFail; 5485 case AsmToken::Integer: { 5486 int64_t Imm; 5487 if (getParser().parseAbsoluteExpression(Imm)) 5488 return MatchOperand_ParseFail; 5489 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5490 return MatchOperand_Success; 5491 } 5492 5493 case AsmToken::Identifier: 5494 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5495 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5496 Parser.getTok().getString()), getContext()), S)); 5497 Parser.Lex(); 5498 return MatchOperand_Success; 5499 } 5500 } 5501 5502 //===----------------------------------------------------------------------===// 5503 // Boolean holding registers 5504 //===----------------------------------------------------------------------===// 5505 5506 OperandMatchResultTy 5507 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5508 return parseReg(Operands); 5509 } 5510 5511 //===----------------------------------------------------------------------===// 5512 // mubuf 5513 //===----------------------------------------------------------------------===// 5514 5515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5516 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5517 } 5518 5519 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5520 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5521 } 5522 5523 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5524 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5525 } 5526 5527 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5528 const OperandVector &Operands, 5529 bool IsAtomic, 5530 bool IsAtomicReturn, 5531 bool IsLds) { 5532 bool IsLdsOpcode = IsLds; 5533 bool HasLdsModifier = false; 5534 OptionalImmIndexMap OptionalIdx; 5535 assert(IsAtomicReturn ? IsAtomic : true); 5536 unsigned FirstOperandIdx = 1; 5537 5538 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5539 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5540 5541 // Add the register arguments 5542 if (Op.isReg()) { 5543 Op.addRegOperands(Inst, 1); 5544 // Insert a tied src for atomic return dst. 5545 // This cannot be postponed as subsequent calls to 5546 // addImmOperands rely on correct number of MC operands. 5547 if (IsAtomicReturn && i == FirstOperandIdx) 5548 Op.addRegOperands(Inst, 1); 5549 continue; 5550 } 5551 5552 // Handle the case where soffset is an immediate 5553 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5554 Op.addImmOperands(Inst, 1); 5555 continue; 5556 } 5557 5558 HasLdsModifier |= Op.isLDS(); 5559 5560 // Handle tokens like 'offen' which are sometimes hard-coded into the 5561 // asm string. There are no MCInst operands for these. 5562 if (Op.isToken()) { 5563 continue; 5564 } 5565 assert(Op.isImm()); 5566 5567 // Handle optional arguments 5568 OptionalIdx[Op.getImmTy()] = i; 5569 } 5570 5571 // This is a workaround for an llvm quirk which may result in an 5572 // incorrect instruction selection. Lds and non-lds versions of 5573 // MUBUF instructions are identical except that lds versions 5574 // have mandatory 'lds' modifier. However this modifier follows 5575 // optional modifiers and llvm asm matcher regards this 'lds' 5576 // modifier as an optional one. As a result, an lds version 5577 // of opcode may be selected even if it has no 'lds' modifier. 5578 if (IsLdsOpcode && !HasLdsModifier) { 5579 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5580 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5581 Inst.setOpcode(NoLdsOpcode); 5582 IsLdsOpcode = false; 5583 } 5584 } 5585 5586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5587 if (!IsAtomic) { // glc is hard-coded. 5588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5589 } 5590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5591 5592 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5594 } 5595 5596 if (isGFX10()) 5597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5598 } 5599 5600 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5601 OptionalImmIndexMap OptionalIdx; 5602 5603 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5604 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5605 5606 // Add the register arguments 5607 if (Op.isReg()) { 5608 Op.addRegOperands(Inst, 1); 5609 continue; 5610 } 5611 5612 // Handle the case where soffset is an immediate 5613 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5614 Op.addImmOperands(Inst, 1); 5615 continue; 5616 } 5617 5618 // Handle tokens like 'offen' which are sometimes hard-coded into the 5619 // asm string. There are no MCInst operands for these. 5620 if (Op.isToken()) { 5621 continue; 5622 } 5623 assert(Op.isImm()); 5624 5625 // Handle optional arguments 5626 OptionalIdx[Op.getImmTy()] = i; 5627 } 5628 5629 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5630 AMDGPUOperand::ImmTyOffset); 5631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5632 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5633 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5635 5636 if (isGFX10()) 5637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5638 } 5639 5640 //===----------------------------------------------------------------------===// 5641 // mimg 5642 //===----------------------------------------------------------------------===// 5643 5644 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5645 bool IsAtomic) { 5646 unsigned I = 1; 5647 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5648 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5649 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5650 } 5651 5652 if (IsAtomic) { 5653 // Add src, same as dst 5654 assert(Desc.getNumDefs() == 1); 5655 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5656 } 5657 5658 OptionalImmIndexMap OptionalIdx; 5659 5660 for (unsigned E = Operands.size(); I != E; ++I) { 5661 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5662 5663 // Add the register arguments 5664 if (Op.isReg()) { 5665 Op.addRegOperands(Inst, 1); 5666 } else if (Op.isImmModifier()) { 5667 OptionalIdx[Op.getImmTy()] = I; 5668 } else if (!Op.isToken()) { 5669 llvm_unreachable("unexpected operand type"); 5670 } 5671 } 5672 5673 bool IsGFX10 = isGFX10(); 5674 5675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5676 if (IsGFX10) 5677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5679 if (IsGFX10) 5680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5681 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5686 if (!IsGFX10) 5687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5689 } 5690 5691 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5692 cvtMIMG(Inst, Operands, true); 5693 } 5694 5695 //===----------------------------------------------------------------------===// 5696 // smrd 5697 //===----------------------------------------------------------------------===// 5698 5699 bool AMDGPUOperand::isSMRDOffset8() const { 5700 return isImm() && isUInt<8>(getImm()); 5701 } 5702 5703 bool AMDGPUOperand::isSMRDOffset20() const { 5704 return isImm() && isUInt<20>(getImm()); 5705 } 5706 5707 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5708 // 32-bit literals are only supported on CI and we only want to use them 5709 // when the offset is > 8-bits. 5710 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5711 } 5712 5713 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5714 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5715 } 5716 5717 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5718 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5719 } 5720 5721 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5722 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5723 } 5724 5725 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5726 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5727 } 5728 5729 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5730 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5731 } 5732 5733 //===----------------------------------------------------------------------===// 5734 // vop3 5735 //===----------------------------------------------------------------------===// 5736 5737 static bool ConvertOmodMul(int64_t &Mul) { 5738 if (Mul != 1 && Mul != 2 && Mul != 4) 5739 return false; 5740 5741 Mul >>= 1; 5742 return true; 5743 } 5744 5745 static bool ConvertOmodDiv(int64_t &Div) { 5746 if (Div == 1) { 5747 Div = 0; 5748 return true; 5749 } 5750 5751 if (Div == 2) { 5752 Div = 3; 5753 return true; 5754 } 5755 5756 return false; 5757 } 5758 5759 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5760 if (BoundCtrl == 0) { 5761 BoundCtrl = 1; 5762 return true; 5763 } 5764 5765 if (BoundCtrl == -1) { 5766 BoundCtrl = 0; 5767 return true; 5768 } 5769 5770 return false; 5771 } 5772 5773 // Note: the order in this table matches the order of operands in AsmString. 5774 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5775 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5776 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5777 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5778 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5779 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5780 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5781 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5782 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5783 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5784 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5785 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5786 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5787 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5788 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5789 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5790 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5791 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5792 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5793 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5794 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5795 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5796 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5797 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5798 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5799 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5800 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5801 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5802 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5803 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5804 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5805 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5806 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5807 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5808 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5809 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5810 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5811 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5812 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5813 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5814 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5815 }; 5816 5817 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5818 unsigned size = Operands.size(); 5819 assert(size > 0); 5820 5821 OperandMatchResultTy res = parseOptionalOpr(Operands); 5822 5823 // This is a hack to enable hardcoded mandatory operands which follow 5824 // optional operands. 5825 // 5826 // Current design assumes that all operands after the first optional operand 5827 // are also optional. However implementation of some instructions violates 5828 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5829 // 5830 // To alleviate this problem, we have to (implicitly) parse extra operands 5831 // to make sure autogenerated parser of custom operands never hit hardcoded 5832 // mandatory operands. 5833 5834 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5835 5836 // We have parsed the first optional operand. 5837 // Parse as many operands as necessary to skip all mandatory operands. 5838 5839 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5840 if (res != MatchOperand_Success || 5841 getLexer().is(AsmToken::EndOfStatement)) break; 5842 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5843 res = parseOptionalOpr(Operands); 5844 } 5845 } 5846 5847 return res; 5848 } 5849 5850 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5851 OperandMatchResultTy res; 5852 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5853 // try to parse any optional operand here 5854 if (Op.IsBit) { 5855 res = parseNamedBit(Op.Name, Operands, Op.Type); 5856 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5857 res = parseOModOperand(Operands); 5858 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5859 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5860 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5861 res = parseSDWASel(Operands, Op.Name, Op.Type); 5862 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5863 res = parseSDWADstUnused(Operands); 5864 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5865 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5866 Op.Type == AMDGPUOperand::ImmTyNegLo || 5867 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5868 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5869 Op.ConvertResult); 5870 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5871 res = parseDim(Operands); 5872 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5873 res = parseDfmtNfmt(Operands); 5874 } else { 5875 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5876 } 5877 if (res != MatchOperand_NoMatch) { 5878 return res; 5879 } 5880 } 5881 return MatchOperand_NoMatch; 5882 } 5883 5884 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5885 StringRef Name = Parser.getTok().getString(); 5886 if (Name == "mul") { 5887 return parseIntWithPrefix("mul", Operands, 5888 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5889 } 5890 5891 if (Name == "div") { 5892 return parseIntWithPrefix("div", Operands, 5893 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5894 } 5895 5896 return MatchOperand_NoMatch; 5897 } 5898 5899 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5900 cvtVOP3P(Inst, Operands); 5901 5902 int Opc = Inst.getOpcode(); 5903 5904 int SrcNum; 5905 const int Ops[] = { AMDGPU::OpName::src0, 5906 AMDGPU::OpName::src1, 5907 AMDGPU::OpName::src2 }; 5908 for (SrcNum = 0; 5909 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5910 ++SrcNum); 5911 assert(SrcNum > 0); 5912 5913 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5914 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5915 5916 if ((OpSel & (1 << SrcNum)) != 0) { 5917 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5918 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5919 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5920 } 5921 } 5922 5923 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5924 // 1. This operand is input modifiers 5925 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5926 // 2. This is not last operand 5927 && Desc.NumOperands > (OpNum + 1) 5928 // 3. Next operand is register class 5929 && Desc.OpInfo[OpNum + 1].RegClass != -1 5930 // 4. Next register is not tied to any other operand 5931 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5932 } 5933 5934 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5935 { 5936 OptionalImmIndexMap OptionalIdx; 5937 unsigned Opc = Inst.getOpcode(); 5938 5939 unsigned I = 1; 5940 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5941 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5942 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5943 } 5944 5945 for (unsigned E = Operands.size(); I != E; ++I) { 5946 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5947 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5948 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5949 } else if (Op.isInterpSlot() || 5950 Op.isInterpAttr() || 5951 Op.isAttrChan()) { 5952 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5953 } else if (Op.isImmModifier()) { 5954 OptionalIdx[Op.getImmTy()] = I; 5955 } else { 5956 llvm_unreachable("unhandled operand type"); 5957 } 5958 } 5959 5960 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5961 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5962 } 5963 5964 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5966 } 5967 5968 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5969 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5970 } 5971 } 5972 5973 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5974 OptionalImmIndexMap &OptionalIdx) { 5975 unsigned Opc = Inst.getOpcode(); 5976 5977 unsigned I = 1; 5978 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5979 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5980 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5981 } 5982 5983 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5984 // This instruction has src modifiers 5985 for (unsigned E = Operands.size(); I != E; ++I) { 5986 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5987 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5988 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5989 } else if (Op.isImmModifier()) { 5990 OptionalIdx[Op.getImmTy()] = I; 5991 } else if (Op.isRegOrImm()) { 5992 Op.addRegOrImmOperands(Inst, 1); 5993 } else { 5994 llvm_unreachable("unhandled operand type"); 5995 } 5996 } 5997 } else { 5998 // No src modifiers 5999 for (unsigned E = Operands.size(); I != E; ++I) { 6000 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6001 if (Op.isMod()) { 6002 OptionalIdx[Op.getImmTy()] = I; 6003 } else { 6004 Op.addRegOrImmOperands(Inst, 1); 6005 } 6006 } 6007 } 6008 6009 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6011 } 6012 6013 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6014 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6015 } 6016 6017 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6018 // it has src2 register operand that is tied to dst operand 6019 // we don't allow modifiers for this operand in assembler so src2_modifiers 6020 // should be 0. 6021 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6022 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6023 Opc == AMDGPU::V_MAC_F32_e64_vi || 6024 Opc == AMDGPU::V_MAC_F16_e64_vi || 6025 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6026 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6027 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6028 auto it = Inst.begin(); 6029 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6030 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6031 ++it; 6032 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6033 } 6034 } 6035 6036 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6037 OptionalImmIndexMap OptionalIdx; 6038 cvtVOP3(Inst, Operands, OptionalIdx); 6039 } 6040 6041 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6042 const OperandVector &Operands) { 6043 OptionalImmIndexMap OptIdx; 6044 const int Opc = Inst.getOpcode(); 6045 const MCInstrDesc &Desc = MII.get(Opc); 6046 6047 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6048 6049 cvtVOP3(Inst, Operands, OptIdx); 6050 6051 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6052 assert(!IsPacked); 6053 Inst.addOperand(Inst.getOperand(0)); 6054 } 6055 6056 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6057 // instruction, and then figure out where to actually put the modifiers 6058 6059 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6060 6061 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6062 if (OpSelHiIdx != -1) { 6063 int DefaultVal = IsPacked ? -1 : 0; 6064 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6065 DefaultVal); 6066 } 6067 6068 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6069 if (NegLoIdx != -1) { 6070 assert(IsPacked); 6071 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6072 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6073 } 6074 6075 const int Ops[] = { AMDGPU::OpName::src0, 6076 AMDGPU::OpName::src1, 6077 AMDGPU::OpName::src2 }; 6078 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6079 AMDGPU::OpName::src1_modifiers, 6080 AMDGPU::OpName::src2_modifiers }; 6081 6082 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6083 6084 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6085 unsigned OpSelHi = 0; 6086 unsigned NegLo = 0; 6087 unsigned NegHi = 0; 6088 6089 if (OpSelHiIdx != -1) { 6090 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6091 } 6092 6093 if (NegLoIdx != -1) { 6094 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6095 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6096 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6097 } 6098 6099 for (int J = 0; J < 3; ++J) { 6100 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6101 if (OpIdx == -1) 6102 break; 6103 6104 uint32_t ModVal = 0; 6105 6106 if ((OpSel & (1 << J)) != 0) 6107 ModVal |= SISrcMods::OP_SEL_0; 6108 6109 if ((OpSelHi & (1 << J)) != 0) 6110 ModVal |= SISrcMods::OP_SEL_1; 6111 6112 if ((NegLo & (1 << J)) != 0) 6113 ModVal |= SISrcMods::NEG; 6114 6115 if ((NegHi & (1 << J)) != 0) 6116 ModVal |= SISrcMods::NEG_HI; 6117 6118 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6119 6120 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6121 } 6122 } 6123 6124 //===----------------------------------------------------------------------===// 6125 // dpp 6126 //===----------------------------------------------------------------------===// 6127 6128 bool AMDGPUOperand::isDPP8() const { 6129 return isImmTy(ImmTyDPP8); 6130 } 6131 6132 bool AMDGPUOperand::isDPPCtrl() const { 6133 using namespace AMDGPU::DPP; 6134 6135 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6136 if (result) { 6137 int64_t Imm = getImm(); 6138 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6139 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6140 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6141 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6142 (Imm == DppCtrl::WAVE_SHL1) || 6143 (Imm == DppCtrl::WAVE_ROL1) || 6144 (Imm == DppCtrl::WAVE_SHR1) || 6145 (Imm == DppCtrl::WAVE_ROR1) || 6146 (Imm == DppCtrl::ROW_MIRROR) || 6147 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6148 (Imm == DppCtrl::BCAST15) || 6149 (Imm == DppCtrl::BCAST31) || 6150 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6151 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6152 } 6153 return false; 6154 } 6155 6156 bool AMDGPUOperand::isS16Imm() const { 6157 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6158 } 6159 6160 bool AMDGPUOperand::isU16Imm() const { 6161 return isImm() && isUInt<16>(getImm()); 6162 } 6163 6164 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6165 if (!isGFX10()) 6166 return MatchOperand_NoMatch; 6167 6168 SMLoc S = Parser.getTok().getLoc(); 6169 6170 if (getLexer().isNot(AsmToken::Identifier)) 6171 return MatchOperand_NoMatch; 6172 if (getLexer().getTok().getString() != "dim") 6173 return MatchOperand_NoMatch; 6174 6175 Parser.Lex(); 6176 if (getLexer().isNot(AsmToken::Colon)) 6177 return MatchOperand_ParseFail; 6178 6179 Parser.Lex(); 6180 6181 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6182 // integer. 6183 std::string Token; 6184 if (getLexer().is(AsmToken::Integer)) { 6185 SMLoc Loc = getLexer().getTok().getEndLoc(); 6186 Token = getLexer().getTok().getString(); 6187 Parser.Lex(); 6188 if (getLexer().getTok().getLoc() != Loc) 6189 return MatchOperand_ParseFail; 6190 } 6191 if (getLexer().isNot(AsmToken::Identifier)) 6192 return MatchOperand_ParseFail; 6193 Token += getLexer().getTok().getString(); 6194 6195 StringRef DimId = Token; 6196 if (DimId.startswith("SQ_RSRC_IMG_")) 6197 DimId = DimId.substr(12); 6198 6199 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6200 if (!DimInfo) 6201 return MatchOperand_ParseFail; 6202 6203 Parser.Lex(); 6204 6205 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6206 AMDGPUOperand::ImmTyDim)); 6207 return MatchOperand_Success; 6208 } 6209 6210 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6211 SMLoc S = Parser.getTok().getLoc(); 6212 StringRef Prefix; 6213 6214 if (getLexer().getKind() == AsmToken::Identifier) { 6215 Prefix = Parser.getTok().getString(); 6216 } else { 6217 return MatchOperand_NoMatch; 6218 } 6219 6220 if (Prefix != "dpp8") 6221 return parseDPPCtrl(Operands); 6222 if (!isGFX10()) 6223 return MatchOperand_NoMatch; 6224 6225 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6226 6227 int64_t Sels[8]; 6228 6229 Parser.Lex(); 6230 if (getLexer().isNot(AsmToken::Colon)) 6231 return MatchOperand_ParseFail; 6232 6233 Parser.Lex(); 6234 if (getLexer().isNot(AsmToken::LBrac)) 6235 return MatchOperand_ParseFail; 6236 6237 Parser.Lex(); 6238 if (getParser().parseAbsoluteExpression(Sels[0])) 6239 return MatchOperand_ParseFail; 6240 if (0 > Sels[0] || 7 < Sels[0]) 6241 return MatchOperand_ParseFail; 6242 6243 for (size_t i = 1; i < 8; ++i) { 6244 if (getLexer().isNot(AsmToken::Comma)) 6245 return MatchOperand_ParseFail; 6246 6247 Parser.Lex(); 6248 if (getParser().parseAbsoluteExpression(Sels[i])) 6249 return MatchOperand_ParseFail; 6250 if (0 > Sels[i] || 7 < Sels[i]) 6251 return MatchOperand_ParseFail; 6252 } 6253 6254 if (getLexer().isNot(AsmToken::RBrac)) 6255 return MatchOperand_ParseFail; 6256 Parser.Lex(); 6257 6258 unsigned DPP8 = 0; 6259 for (size_t i = 0; i < 8; ++i) 6260 DPP8 |= (Sels[i] << (i * 3)); 6261 6262 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6263 return MatchOperand_Success; 6264 } 6265 6266 OperandMatchResultTy 6267 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6268 using namespace AMDGPU::DPP; 6269 6270 SMLoc S = Parser.getTok().getLoc(); 6271 StringRef Prefix; 6272 int64_t Int; 6273 6274 if (getLexer().getKind() == AsmToken::Identifier) { 6275 Prefix = Parser.getTok().getString(); 6276 } else { 6277 return MatchOperand_NoMatch; 6278 } 6279 6280 if (Prefix == "row_mirror") { 6281 Int = DppCtrl::ROW_MIRROR; 6282 Parser.Lex(); 6283 } else if (Prefix == "row_half_mirror") { 6284 Int = DppCtrl::ROW_HALF_MIRROR; 6285 Parser.Lex(); 6286 } else { 6287 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6288 if (Prefix != "quad_perm" 6289 && Prefix != "row_shl" 6290 && Prefix != "row_shr" 6291 && Prefix != "row_ror" 6292 && Prefix != "wave_shl" 6293 && Prefix != "wave_rol" 6294 && Prefix != "wave_shr" 6295 && Prefix != "wave_ror" 6296 && Prefix != "row_bcast" 6297 && Prefix != "row_share" 6298 && Prefix != "row_xmask") { 6299 return MatchOperand_NoMatch; 6300 } 6301 6302 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6303 return MatchOperand_NoMatch; 6304 6305 if (!isVI() && !isGFX9() && 6306 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6307 Prefix == "wave_rol" || Prefix == "wave_ror" || 6308 Prefix == "row_bcast")) 6309 return MatchOperand_NoMatch; 6310 6311 Parser.Lex(); 6312 if (getLexer().isNot(AsmToken::Colon)) 6313 return MatchOperand_ParseFail; 6314 6315 if (Prefix == "quad_perm") { 6316 // quad_perm:[%d,%d,%d,%d] 6317 Parser.Lex(); 6318 if (getLexer().isNot(AsmToken::LBrac)) 6319 return MatchOperand_ParseFail; 6320 Parser.Lex(); 6321 6322 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6323 return MatchOperand_ParseFail; 6324 6325 for (int i = 0; i < 3; ++i) { 6326 if (getLexer().isNot(AsmToken::Comma)) 6327 return MatchOperand_ParseFail; 6328 Parser.Lex(); 6329 6330 int64_t Temp; 6331 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6332 return MatchOperand_ParseFail; 6333 const int shift = i*2 + 2; 6334 Int += (Temp << shift); 6335 } 6336 6337 if (getLexer().isNot(AsmToken::RBrac)) 6338 return MatchOperand_ParseFail; 6339 Parser.Lex(); 6340 } else { 6341 // sel:%d 6342 Parser.Lex(); 6343 if (getParser().parseAbsoluteExpression(Int)) 6344 return MatchOperand_ParseFail; 6345 6346 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6347 Int |= DppCtrl::ROW_SHL0; 6348 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6349 Int |= DppCtrl::ROW_SHR0; 6350 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6351 Int |= DppCtrl::ROW_ROR0; 6352 } else if (Prefix == "wave_shl" && 1 == Int) { 6353 Int = DppCtrl::WAVE_SHL1; 6354 } else if (Prefix == "wave_rol" && 1 == Int) { 6355 Int = DppCtrl::WAVE_ROL1; 6356 } else if (Prefix == "wave_shr" && 1 == Int) { 6357 Int = DppCtrl::WAVE_SHR1; 6358 } else if (Prefix == "wave_ror" && 1 == Int) { 6359 Int = DppCtrl::WAVE_ROR1; 6360 } else if (Prefix == "row_bcast") { 6361 if (Int == 15) { 6362 Int = DppCtrl::BCAST15; 6363 } else if (Int == 31) { 6364 Int = DppCtrl::BCAST31; 6365 } else { 6366 return MatchOperand_ParseFail; 6367 } 6368 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6369 Int |= DppCtrl::ROW_SHARE_FIRST; 6370 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6371 Int |= DppCtrl::ROW_XMASK_FIRST; 6372 } else { 6373 return MatchOperand_ParseFail; 6374 } 6375 } 6376 } 6377 6378 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6379 return MatchOperand_Success; 6380 } 6381 6382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6383 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6384 } 6385 6386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6387 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6388 } 6389 6390 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6391 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6392 } 6393 6394 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6395 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6396 } 6397 6398 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6399 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6400 } 6401 6402 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6403 OptionalImmIndexMap OptionalIdx; 6404 6405 unsigned I = 1; 6406 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6407 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6408 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6409 } 6410 6411 int Fi = 0; 6412 for (unsigned E = Operands.size(); I != E; ++I) { 6413 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6414 MCOI::TIED_TO); 6415 if (TiedTo != -1) { 6416 assert((unsigned)TiedTo < Inst.getNumOperands()); 6417 // handle tied old or src2 for MAC instructions 6418 Inst.addOperand(Inst.getOperand(TiedTo)); 6419 } 6420 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6421 // Add the register arguments 6422 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6423 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6424 // Skip it. 6425 continue; 6426 } 6427 6428 if (IsDPP8) { 6429 if (Op.isDPP8()) { 6430 Op.addImmOperands(Inst, 1); 6431 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6432 Op.addRegWithFPInputModsOperands(Inst, 2); 6433 } else if (Op.isFI()) { 6434 Fi = Op.getImm(); 6435 } else if (Op.isReg()) { 6436 Op.addRegOperands(Inst, 1); 6437 } else { 6438 llvm_unreachable("Invalid operand type"); 6439 } 6440 } else { 6441 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6442 Op.addRegWithFPInputModsOperands(Inst, 2); 6443 } else if (Op.isDPPCtrl()) { 6444 Op.addImmOperands(Inst, 1); 6445 } else if (Op.isImm()) { 6446 // Handle optional arguments 6447 OptionalIdx[Op.getImmTy()] = I; 6448 } else { 6449 llvm_unreachable("Invalid operand type"); 6450 } 6451 } 6452 } 6453 6454 if (IsDPP8) { 6455 using namespace llvm::AMDGPU::DPP; 6456 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6457 } else { 6458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6461 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6463 } 6464 } 6465 } 6466 6467 //===----------------------------------------------------------------------===// 6468 // sdwa 6469 //===----------------------------------------------------------------------===// 6470 6471 OperandMatchResultTy 6472 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6473 AMDGPUOperand::ImmTy Type) { 6474 using namespace llvm::AMDGPU::SDWA; 6475 6476 SMLoc S = Parser.getTok().getLoc(); 6477 StringRef Value; 6478 OperandMatchResultTy res; 6479 6480 res = parseStringWithPrefix(Prefix, Value); 6481 if (res != MatchOperand_Success) { 6482 return res; 6483 } 6484 6485 int64_t Int; 6486 Int = StringSwitch<int64_t>(Value) 6487 .Case("BYTE_0", SdwaSel::BYTE_0) 6488 .Case("BYTE_1", SdwaSel::BYTE_1) 6489 .Case("BYTE_2", SdwaSel::BYTE_2) 6490 .Case("BYTE_3", SdwaSel::BYTE_3) 6491 .Case("WORD_0", SdwaSel::WORD_0) 6492 .Case("WORD_1", SdwaSel::WORD_1) 6493 .Case("DWORD", SdwaSel::DWORD) 6494 .Default(0xffffffff); 6495 Parser.Lex(); // eat last token 6496 6497 if (Int == 0xffffffff) { 6498 return MatchOperand_ParseFail; 6499 } 6500 6501 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6502 return MatchOperand_Success; 6503 } 6504 6505 OperandMatchResultTy 6506 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6507 using namespace llvm::AMDGPU::SDWA; 6508 6509 SMLoc S = Parser.getTok().getLoc(); 6510 StringRef Value; 6511 OperandMatchResultTy res; 6512 6513 res = parseStringWithPrefix("dst_unused", Value); 6514 if (res != MatchOperand_Success) { 6515 return res; 6516 } 6517 6518 int64_t Int; 6519 Int = StringSwitch<int64_t>(Value) 6520 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6521 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6522 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6523 .Default(0xffffffff); 6524 Parser.Lex(); // eat last token 6525 6526 if (Int == 0xffffffff) { 6527 return MatchOperand_ParseFail; 6528 } 6529 6530 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6531 return MatchOperand_Success; 6532 } 6533 6534 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6535 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6536 } 6537 6538 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6539 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6540 } 6541 6542 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6543 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6544 } 6545 6546 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6547 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6548 } 6549 6550 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6551 uint64_t BasicInstType, bool skipVcc) { 6552 using namespace llvm::AMDGPU::SDWA; 6553 6554 OptionalImmIndexMap OptionalIdx; 6555 bool skippedVcc = false; 6556 6557 unsigned I = 1; 6558 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6559 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6560 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6561 } 6562 6563 for (unsigned E = Operands.size(); I != E; ++I) { 6564 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6565 if (skipVcc && !skippedVcc && Op.isReg() && 6566 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6567 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6568 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6569 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6570 // Skip VCC only if we didn't skip it on previous iteration. 6571 if (BasicInstType == SIInstrFlags::VOP2 && 6572 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6573 skippedVcc = true; 6574 continue; 6575 } else if (BasicInstType == SIInstrFlags::VOPC && 6576 Inst.getNumOperands() == 0) { 6577 skippedVcc = true; 6578 continue; 6579 } 6580 } 6581 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6582 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6583 } else if (Op.isImm()) { 6584 // Handle optional arguments 6585 OptionalIdx[Op.getImmTy()] = I; 6586 } else { 6587 llvm_unreachable("Invalid operand type"); 6588 } 6589 skippedVcc = false; 6590 } 6591 6592 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6593 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6594 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6595 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6596 switch (BasicInstType) { 6597 case SIInstrFlags::VOP1: 6598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6599 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6601 } 6602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6605 break; 6606 6607 case SIInstrFlags::VOP2: 6608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6609 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6611 } 6612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6616 break; 6617 6618 case SIInstrFlags::VOPC: 6619 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6623 break; 6624 6625 default: 6626 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6627 } 6628 } 6629 6630 // special case v_mac_{f16, f32}: 6631 // it has src2 register operand that is tied to dst operand 6632 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6633 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6634 auto it = Inst.begin(); 6635 std::advance( 6636 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6637 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6638 } 6639 } 6640 6641 /// Force static initialization. 6642 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6643 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6644 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6645 } 6646 6647 #define GET_REGISTER_MATCHER 6648 #define GET_MATCHER_IMPLEMENTATION 6649 #define GET_MNEMONIC_SPELL_CHECKER 6650 #include "AMDGPUGenAsmMatcher.inc" 6651 6652 // This fuction should be defined after auto-generated include so that we have 6653 // MatchClassKind enum defined 6654 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6655 unsigned Kind) { 6656 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6657 // But MatchInstructionImpl() expects to meet token and fails to validate 6658 // operand. This method checks if we are given immediate operand but expect to 6659 // get corresponding token. 6660 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6661 switch (Kind) { 6662 case MCK_addr64: 6663 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6664 case MCK_gds: 6665 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6666 case MCK_lds: 6667 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6668 case MCK_glc: 6669 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6670 case MCK_idxen: 6671 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6672 case MCK_offen: 6673 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6674 case MCK_SSrcB32: 6675 // When operands have expression values, they will return true for isToken, 6676 // because it is not possible to distinguish between a token and an 6677 // expression at parse time. MatchInstructionImpl() will always try to 6678 // match an operand as a token, when isToken returns true, and when the 6679 // name of the expression is not a valid token, the match will fail, 6680 // so we need to handle it here. 6681 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6682 case MCK_SSrcF32: 6683 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6684 case MCK_SoppBrTarget: 6685 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6686 case MCK_VReg32OrOff: 6687 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6688 case MCK_InterpSlot: 6689 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6690 case MCK_Attr: 6691 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6692 case MCK_AttrChan: 6693 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6694 default: 6695 return Match_InvalidOperand; 6696 } 6697 } 6698 6699 //===----------------------------------------------------------------------===// 6700 // endpgm 6701 //===----------------------------------------------------------------------===// 6702 6703 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6704 SMLoc S = Parser.getTok().getLoc(); 6705 int64_t Imm = 0; 6706 6707 if (!parseExpr(Imm)) { 6708 // The operand is optional, if not present default to 0 6709 Imm = 0; 6710 } 6711 6712 if (!isUInt<16>(Imm)) { 6713 Error(S, "expected a 16-bit value"); 6714 return MatchOperand_ParseFail; 6715 } 6716 6717 Operands.push_back( 6718 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6719 return MatchOperand_Success; 6720 } 6721 6722 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6723