1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDppCtrl, 151 ImmTyDppRowMask, 152 ImmTyDppBankMask, 153 ImmTyDppBoundCtrl, 154 ImmTySdwaDstSel, 155 ImmTySdwaSrc0Sel, 156 ImmTySdwaSrc1Sel, 157 ImmTySdwaDstUnused, 158 ImmTyDMask, 159 ImmTyDim, 160 ImmTyUNorm, 161 ImmTyDA, 162 ImmTyR128A16, 163 ImmTyLWE, 164 ImmTyExpTgt, 165 ImmTyExpCompr, 166 ImmTyExpVM, 167 ImmTyFORMAT, 168 ImmTyHwreg, 169 ImmTyOff, 170 ImmTySendMsg, 171 ImmTyInterpSlot, 172 ImmTyInterpAttr, 173 ImmTyAttrChan, 174 ImmTyOpSel, 175 ImmTyOpSelHi, 176 ImmTyNegLo, 177 ImmTyNegHi, 178 ImmTySwizzle, 179 ImmTyGprIdxMode, 180 ImmTyEndpgm, 181 ImmTyHigh 182 }; 183 184 private: 185 struct TokOp { 186 const char *Data; 187 unsigned Length; 188 }; 189 190 struct ImmOp { 191 int64_t Val; 192 ImmTy Type; 193 bool IsFPImm; 194 Modifiers Mods; 195 }; 196 197 struct RegOp { 198 unsigned RegNo; 199 Modifiers Mods; 200 }; 201 202 union { 203 TokOp Tok; 204 ImmOp Imm; 205 RegOp Reg; 206 const MCExpr *Expr; 207 }; 208 209 public: 210 bool isToken() const override { 211 if (Kind == Token) 212 return true; 213 214 if (Kind != Expression || !Expr) 215 return false; 216 217 // When parsing operands, we can't always tell if something was meant to be 218 // a token, like 'gds', or an expression that references a global variable. 219 // In this case, we assume the string is an expression, and if we need to 220 // interpret is a token, then we treat the symbol name as the token. 221 return isa<MCSymbolRefExpr>(Expr); 222 } 223 224 bool isImm() const override { 225 return Kind == Immediate; 226 } 227 228 bool isInlinableImm(MVT type) const; 229 bool isLiteralImm(MVT type) const; 230 231 bool isRegKind() const { 232 return Kind == Register; 233 } 234 235 bool isReg() const override { 236 return isRegKind() && !hasModifiers(); 237 } 238 239 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 240 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 241 } 242 243 bool isRegOrImmWithInt16InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 245 } 246 247 bool isRegOrImmWithInt32InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 249 } 250 251 bool isRegOrImmWithInt64InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 253 } 254 255 bool isRegOrImmWithFP16InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 257 } 258 259 bool isRegOrImmWithFP32InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 261 } 262 263 bool isRegOrImmWithFP64InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 265 } 266 267 bool isVReg() const { 268 return isRegClass(AMDGPU::VGPR_32RegClassID) || 269 isRegClass(AMDGPU::VReg_64RegClassID) || 270 isRegClass(AMDGPU::VReg_96RegClassID) || 271 isRegClass(AMDGPU::VReg_128RegClassID) || 272 isRegClass(AMDGPU::VReg_256RegClassID) || 273 isRegClass(AMDGPU::VReg_512RegClassID); 274 } 275 276 bool isVReg32() const { 277 return isRegClass(AMDGPU::VGPR_32RegClassID); 278 } 279 280 bool isVReg32OrOff() const { 281 return isOff() || isVReg32(); 282 } 283 284 bool isSDWAOperand(MVT type) const; 285 bool isSDWAFP16Operand() const; 286 bool isSDWAFP32Operand() const; 287 bool isSDWAInt16Operand() const; 288 bool isSDWAInt32Operand() const; 289 290 bool isImmTy(ImmTy ImmT) const { 291 return isImm() && Imm.Type == ImmT; 292 } 293 294 bool isImmModifier() const { 295 return isImm() && Imm.Type != ImmTyNone; 296 } 297 298 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 299 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 300 bool isDMask() const { return isImmTy(ImmTyDMask); } 301 bool isDim() const { return isImmTy(ImmTyDim); } 302 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 303 bool isDA() const { return isImmTy(ImmTyDA); } 304 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 305 bool isLWE() const { return isImmTy(ImmTyLWE); } 306 bool isOff() const { return isImmTy(ImmTyOff); } 307 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 308 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 309 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 310 bool isOffen() const { return isImmTy(ImmTyOffen); } 311 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 312 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 313 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 314 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 315 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 316 317 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 318 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 319 bool isGDS() const { return isImmTy(ImmTyGDS); } 320 bool isLDS() const { return isImmTy(ImmTyLDS); } 321 bool isDLC() const { return isImmTy(ImmTyDLC); } 322 bool isGLC() const { return isImmTy(ImmTyGLC); } 323 bool isSLC() const { return isImmTy(ImmTySLC); } 324 bool isTFE() const { return isImmTy(ImmTyTFE); } 325 bool isD16() const { return isImmTy(ImmTyD16); } 326 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 327 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 328 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 329 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 330 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 331 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 332 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 333 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 334 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 335 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 336 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 337 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 338 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 339 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 340 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 341 bool isHigh() const { return isImmTy(ImmTyHigh); } 342 343 bool isMod() const { 344 return isClampSI() || isOModSI(); 345 } 346 347 bool isRegOrImm() const { 348 return isReg() || isImm(); 349 } 350 351 bool isRegClass(unsigned RCID) const; 352 353 bool isInlineValue() const; 354 355 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 356 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 357 } 358 359 bool isSCSrcB16() const { 360 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 361 } 362 363 bool isSCSrcV2B16() const { 364 return isSCSrcB16(); 365 } 366 367 bool isSCSrcB32() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 369 } 370 371 bool isSCSrcB64() const { 372 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 373 } 374 375 bool isSCSrcF16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 377 } 378 379 bool isSCSrcV2F16() const { 380 return isSCSrcF16(); 381 } 382 383 bool isSCSrcF32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 385 } 386 387 bool isSCSrcF64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 389 } 390 391 bool isSSrcB32() const { 392 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 393 } 394 395 bool isSSrcB16() const { 396 return isSCSrcB16() || isLiteralImm(MVT::i16); 397 } 398 399 bool isSSrcV2B16() const { 400 llvm_unreachable("cannot happen"); 401 return isSSrcB16(); 402 } 403 404 bool isSSrcB64() const { 405 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 406 // See isVSrc64(). 407 return isSCSrcB64() || isLiteralImm(MVT::i64); 408 } 409 410 bool isSSrcF32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 412 } 413 414 bool isSSrcF64() const { 415 return isSCSrcB64() || isLiteralImm(MVT::f64); 416 } 417 418 bool isSSrcF16() const { 419 return isSCSrcB16() || isLiteralImm(MVT::f16); 420 } 421 422 bool isSSrcV2F16() const { 423 llvm_unreachable("cannot happen"); 424 return isSSrcF16(); 425 } 426 427 bool isSSrcOrLdsB32() const { 428 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 429 isLiteralImm(MVT::i32) || isExpr(); 430 } 431 432 bool isVCSrcB32() const { 433 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 434 } 435 436 bool isVCSrcB64() const { 437 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 438 } 439 440 bool isVCSrcB16() const { 441 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 442 } 443 444 bool isVCSrcV2B16() const { 445 return isVCSrcB16(); 446 } 447 448 bool isVCSrcF32() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 450 } 451 452 bool isVCSrcF64() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 454 } 455 456 bool isVCSrcF16() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 458 } 459 460 bool isVCSrcV2F16() const { 461 return isVCSrcF16(); 462 } 463 464 bool isVSrcB32() const { 465 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 466 } 467 468 bool isVSrcB64() const { 469 return isVCSrcF64() || isLiteralImm(MVT::i64); 470 } 471 472 bool isVSrcB16() const { 473 return isVCSrcF16() || isLiteralImm(MVT::i16); 474 } 475 476 bool isVSrcV2B16() const { 477 return isVSrcB16() || isLiteralImm(MVT::v2i16); 478 } 479 480 bool isVSrcF32() const { 481 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 482 } 483 484 bool isVSrcF64() const { 485 return isVCSrcF64() || isLiteralImm(MVT::f64); 486 } 487 488 bool isVSrcF16() const { 489 return isVCSrcF16() || isLiteralImm(MVT::f16); 490 } 491 492 bool isVSrcV2F16() const { 493 return isVSrcF16() || isLiteralImm(MVT::v2f16); 494 } 495 496 bool isKImmFP32() const { 497 return isLiteralImm(MVT::f32); 498 } 499 500 bool isKImmFP16() const { 501 return isLiteralImm(MVT::f16); 502 } 503 504 bool isMem() const override { 505 return false; 506 } 507 508 bool isExpr() const { 509 return Kind == Expression; 510 } 511 512 bool isSoppBrTarget() const { 513 return isExpr() || isImm(); 514 } 515 516 bool isSWaitCnt() const; 517 bool isHwreg() const; 518 bool isSendMsg() const; 519 bool isSwizzle() const; 520 bool isSMRDOffset8() const; 521 bool isSMRDOffset20() const; 522 bool isSMRDLiteralOffset() const; 523 bool isDPPCtrl() const; 524 bool isGPRIdxMode() const; 525 bool isS16Imm() const; 526 bool isU16Imm() const; 527 bool isEndpgm() const; 528 529 StringRef getExpressionAsToken() const { 530 assert(isExpr()); 531 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 532 return S->getSymbol().getName(); 533 } 534 535 StringRef getToken() const { 536 assert(isToken()); 537 538 if (Kind == Expression) 539 return getExpressionAsToken(); 540 541 return StringRef(Tok.Data, Tok.Length); 542 } 543 544 int64_t getImm() const { 545 assert(isImm()); 546 return Imm.Val; 547 } 548 549 ImmTy getImmTy() const { 550 assert(isImm()); 551 return Imm.Type; 552 } 553 554 unsigned getReg() const override { 555 assert(isRegKind()); 556 return Reg.RegNo; 557 } 558 559 SMLoc getStartLoc() const override { 560 return StartLoc; 561 } 562 563 SMLoc getEndLoc() const override { 564 return EndLoc; 565 } 566 567 SMRange getLocRange() const { 568 return SMRange(StartLoc, EndLoc); 569 } 570 571 Modifiers getModifiers() const { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 return isRegKind() ? Reg.Mods : Imm.Mods; 574 } 575 576 void setModifiers(Modifiers Mods) { 577 assert(isRegKind() || isImmTy(ImmTyNone)); 578 if (isRegKind()) 579 Reg.Mods = Mods; 580 else 581 Imm.Mods = Mods; 582 } 583 584 bool hasModifiers() const { 585 return getModifiers().hasModifiers(); 586 } 587 588 bool hasFPModifiers() const { 589 return getModifiers().hasFPModifiers(); 590 } 591 592 bool hasIntModifiers() const { 593 return getModifiers().hasIntModifiers(); 594 } 595 596 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 597 598 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 599 600 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 601 602 template <unsigned Bitwidth> 603 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 604 605 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 606 addKImmFPOperands<16>(Inst, N); 607 } 608 609 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 610 addKImmFPOperands<32>(Inst, N); 611 } 612 613 void addRegOperands(MCInst &Inst, unsigned N) const; 614 615 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 616 if (isRegKind()) 617 addRegOperands(Inst, N); 618 else if (isExpr()) 619 Inst.addOperand(MCOperand::createExpr(Expr)); 620 else 621 addImmOperands(Inst, N); 622 } 623 624 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 625 Modifiers Mods = getModifiers(); 626 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 627 if (isRegKind()) { 628 addRegOperands(Inst, N); 629 } else { 630 addImmOperands(Inst, N, false); 631 } 632 } 633 634 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasIntModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 640 assert(!hasFPModifiers()); 641 addRegOrImmWithInputModsOperands(Inst, N); 642 } 643 644 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 645 Modifiers Mods = getModifiers(); 646 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 647 assert(isRegKind()); 648 addRegOperands(Inst, N); 649 } 650 651 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasIntModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 657 assert(!hasFPModifiers()); 658 addRegWithInputModsOperands(Inst, N); 659 } 660 661 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 662 if (isImm()) 663 addImmOperands(Inst, N); 664 else { 665 assert(isExpr()); 666 Inst.addOperand(MCOperand::createExpr(Expr)); 667 } 668 } 669 670 static void printImmTy(raw_ostream& OS, ImmTy Type) { 671 switch (Type) { 672 case ImmTyNone: OS << "None"; break; 673 case ImmTyGDS: OS << "GDS"; break; 674 case ImmTyLDS: OS << "LDS"; break; 675 case ImmTyOffen: OS << "Offen"; break; 676 case ImmTyIdxen: OS << "Idxen"; break; 677 case ImmTyAddr64: OS << "Addr64"; break; 678 case ImmTyOffset: OS << "Offset"; break; 679 case ImmTyInstOffset: OS << "InstOffset"; break; 680 case ImmTyOffset0: OS << "Offset0"; break; 681 case ImmTyOffset1: OS << "Offset1"; break; 682 case ImmTyDLC: OS << "DLC"; break; 683 case ImmTyGLC: OS << "GLC"; break; 684 case ImmTySLC: OS << "SLC"; break; 685 case ImmTyTFE: OS << "TFE"; break; 686 case ImmTyD16: OS << "D16"; break; 687 case ImmTyFORMAT: OS << "FORMAT"; break; 688 case ImmTyClampSI: OS << "ClampSI"; break; 689 case ImmTyOModSI: OS << "OModSI"; break; 690 case ImmTyDppCtrl: OS << "DppCtrl"; break; 691 case ImmTyDppRowMask: OS << "DppRowMask"; break; 692 case ImmTyDppBankMask: OS << "DppBankMask"; break; 693 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 694 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 695 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 696 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 697 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 698 case ImmTyDMask: OS << "DMask"; break; 699 case ImmTyDim: OS << "Dim"; break; 700 case ImmTyUNorm: OS << "UNorm"; break; 701 case ImmTyDA: OS << "DA"; break; 702 case ImmTyR128A16: OS << "R128A16"; break; 703 case ImmTyLWE: OS << "LWE"; break; 704 case ImmTyOff: OS << "Off"; break; 705 case ImmTyExpTgt: OS << "ExpTgt"; break; 706 case ImmTyExpCompr: OS << "ExpCompr"; break; 707 case ImmTyExpVM: OS << "ExpVM"; break; 708 case ImmTyHwreg: OS << "Hwreg"; break; 709 case ImmTySendMsg: OS << "SendMsg"; break; 710 case ImmTyInterpSlot: OS << "InterpSlot"; break; 711 case ImmTyInterpAttr: OS << "InterpAttr"; break; 712 case ImmTyAttrChan: OS << "AttrChan"; break; 713 case ImmTyOpSel: OS << "OpSel"; break; 714 case ImmTyOpSelHi: OS << "OpSelHi"; break; 715 case ImmTyNegLo: OS << "NegLo"; break; 716 case ImmTyNegHi: OS << "NegHi"; break; 717 case ImmTySwizzle: OS << "Swizzle"; break; 718 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 719 case ImmTyHigh: OS << "High"; break; 720 case ImmTyEndpgm: 721 OS << "Endpgm"; 722 break; 723 } 724 } 725 726 void print(raw_ostream &OS) const override { 727 switch (Kind) { 728 case Register: 729 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 730 break; 731 case Immediate: 732 OS << '<' << getImm(); 733 if (getImmTy() != ImmTyNone) { 734 OS << " type: "; printImmTy(OS, getImmTy()); 735 } 736 OS << " mods: " << Imm.Mods << '>'; 737 break; 738 case Token: 739 OS << '\'' << getToken() << '\''; 740 break; 741 case Expression: 742 OS << "<expr " << *Expr << '>'; 743 break; 744 } 745 } 746 747 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 748 int64_t Val, SMLoc Loc, 749 ImmTy Type = ImmTyNone, 750 bool IsFPImm = false) { 751 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 752 Op->Imm.Val = Val; 753 Op->Imm.IsFPImm = IsFPImm; 754 Op->Imm.Type = Type; 755 Op->Imm.Mods = Modifiers(); 756 Op->StartLoc = Loc; 757 Op->EndLoc = Loc; 758 return Op; 759 } 760 761 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 762 StringRef Str, SMLoc Loc, 763 bool HasExplicitEncodingSize = true) { 764 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 765 Res->Tok.Data = Str.data(); 766 Res->Tok.Length = Str.size(); 767 Res->StartLoc = Loc; 768 Res->EndLoc = Loc; 769 return Res; 770 } 771 772 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 773 unsigned RegNo, SMLoc S, 774 SMLoc E) { 775 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 776 Op->Reg.RegNo = RegNo; 777 Op->Reg.Mods = Modifiers(); 778 Op->StartLoc = S; 779 Op->EndLoc = E; 780 return Op; 781 } 782 783 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 784 const class MCExpr *Expr, SMLoc S) { 785 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 786 Op->Expr = Expr; 787 Op->StartLoc = S; 788 Op->EndLoc = S; 789 return Op; 790 } 791 }; 792 793 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 794 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 795 return OS; 796 } 797 798 //===----------------------------------------------------------------------===// 799 // AsmParser 800 //===----------------------------------------------------------------------===// 801 802 // Holds info related to the current kernel, e.g. count of SGPRs used. 803 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 804 // .amdgpu_hsa_kernel or at EOF. 805 class KernelScopeInfo { 806 int SgprIndexUnusedMin = -1; 807 int VgprIndexUnusedMin = -1; 808 MCContext *Ctx = nullptr; 809 810 void usesSgprAt(int i) { 811 if (i >= SgprIndexUnusedMin) { 812 SgprIndexUnusedMin = ++i; 813 if (Ctx) { 814 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 815 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 816 } 817 } 818 } 819 820 void usesVgprAt(int i) { 821 if (i >= VgprIndexUnusedMin) { 822 VgprIndexUnusedMin = ++i; 823 if (Ctx) { 824 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 825 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 826 } 827 } 828 } 829 830 public: 831 KernelScopeInfo() = default; 832 833 void initialize(MCContext &Context) { 834 Ctx = &Context; 835 usesSgprAt(SgprIndexUnusedMin = -1); 836 usesVgprAt(VgprIndexUnusedMin = -1); 837 } 838 839 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 840 switch (RegKind) { 841 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 842 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 843 default: break; 844 } 845 } 846 }; 847 848 class AMDGPUAsmParser : public MCTargetAsmParser { 849 MCAsmParser &Parser; 850 851 // Number of extra operands parsed after the first optional operand. 852 // This may be necessary to skip hardcoded mandatory operands. 853 static const unsigned MAX_OPR_LOOKAHEAD = 8; 854 855 unsigned ForcedEncodingSize = 0; 856 bool ForcedDPP = false; 857 bool ForcedSDWA = false; 858 KernelScopeInfo KernelScope; 859 860 /// @name Auto-generated Match Functions 861 /// { 862 863 #define GET_ASSEMBLER_HEADER 864 #include "AMDGPUGenAsmMatcher.inc" 865 866 /// } 867 868 private: 869 bool ParseAsAbsoluteExpression(uint32_t &Ret); 870 bool OutOfRangeError(SMRange Range); 871 /// Calculate VGPR/SGPR blocks required for given target, reserved 872 /// registers, and user-specified NextFreeXGPR values. 873 /// 874 /// \param Features [in] Target features, used for bug corrections. 875 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 876 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 877 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 878 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 879 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 880 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 881 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 882 /// \param VGPRBlocks [out] Result VGPR block count. 883 /// \param SGPRBlocks [out] Result SGPR block count. 884 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 885 bool FlatScrUsed, bool XNACKUsed, 886 unsigned NextFreeVGPR, SMRange VGPRRange, 887 unsigned NextFreeSGPR, SMRange SGPRRange, 888 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 889 bool ParseDirectiveAMDGCNTarget(); 890 bool ParseDirectiveAMDHSAKernel(); 891 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 892 bool ParseDirectiveHSACodeObjectVersion(); 893 bool ParseDirectiveHSACodeObjectISA(); 894 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 895 bool ParseDirectiveAMDKernelCodeT(); 896 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 897 bool ParseDirectiveAMDGPUHsaKernel(); 898 899 bool ParseDirectiveISAVersion(); 900 bool ParseDirectiveHSAMetadata(); 901 bool ParseDirectivePALMetadataBegin(); 902 bool ParseDirectivePALMetadata(); 903 904 /// Common code to parse out a block of text (typically YAML) between start and 905 /// end directives. 906 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 907 const char *AssemblerDirectiveEnd, 908 std::string &CollectString); 909 910 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 911 RegisterKind RegKind, unsigned Reg1, 912 unsigned RegNum); 913 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 914 unsigned& RegNum, unsigned& RegWidth, 915 unsigned *DwordRegIndex); 916 bool isRegister(); 917 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 918 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 919 void initializeGprCountSymbol(RegisterKind RegKind); 920 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 921 unsigned RegWidth); 922 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 923 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 924 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 925 bool IsGdsHardcoded); 926 927 public: 928 enum AMDGPUMatchResultTy { 929 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 930 }; 931 enum OperandMode { 932 OperandMode_Default, 933 OperandMode_NSA, 934 }; 935 936 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 937 938 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 939 const MCInstrInfo &MII, 940 const MCTargetOptions &Options) 941 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 942 MCAsmParserExtension::Initialize(Parser); 943 944 if (getFeatureBits().none()) { 945 // Set default features. 946 copySTI().ToggleFeature("southern-islands"); 947 } 948 949 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 950 951 { 952 // TODO: make those pre-defined variables read-only. 953 // Currently there is none suitable machinery in the core llvm-mc for this. 954 // MCSymbol::isRedefinable is intended for another purpose, and 955 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 956 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 957 MCContext &Ctx = getContext(); 958 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 959 MCSymbol *Sym = 960 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 961 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 962 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 963 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 964 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 965 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 966 } else { 967 MCSymbol *Sym = 968 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 969 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 970 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 971 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 972 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 973 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 974 } 975 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 976 initializeGprCountSymbol(IS_VGPR); 977 initializeGprCountSymbol(IS_SGPR); 978 } else 979 KernelScope.initialize(getContext()); 980 } 981 } 982 983 bool hasXNACK() const { 984 return AMDGPU::hasXNACK(getSTI()); 985 } 986 987 bool hasMIMG_R128() const { 988 return AMDGPU::hasMIMG_R128(getSTI()); 989 } 990 991 bool hasPackedD16() const { 992 return AMDGPU::hasPackedD16(getSTI()); 993 } 994 995 bool isSI() const { 996 return AMDGPU::isSI(getSTI()); 997 } 998 999 bool isCI() const { 1000 return AMDGPU::isCI(getSTI()); 1001 } 1002 1003 bool isVI() const { 1004 return AMDGPU::isVI(getSTI()); 1005 } 1006 1007 bool isGFX9() const { 1008 return AMDGPU::isGFX9(getSTI()); 1009 } 1010 1011 bool isGFX10() const { 1012 return AMDGPU::isGFX10(getSTI()); 1013 } 1014 1015 bool hasInv2PiInlineImm() const { 1016 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1017 } 1018 1019 bool hasFlatOffsets() const { 1020 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1021 } 1022 1023 bool hasSGPR102_SGPR103() const { 1024 return !isVI() && !isGFX9(); 1025 } 1026 1027 bool hasSGPR104_SGPR105() const { 1028 return isGFX10(); 1029 } 1030 1031 bool hasIntClamp() const { 1032 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1033 } 1034 1035 AMDGPUTargetStreamer &getTargetStreamer() { 1036 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1037 return static_cast<AMDGPUTargetStreamer &>(TS); 1038 } 1039 1040 const MCRegisterInfo *getMRI() const { 1041 // We need this const_cast because for some reason getContext() is not const 1042 // in MCAsmParser. 1043 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1044 } 1045 1046 const MCInstrInfo *getMII() const { 1047 return &MII; 1048 } 1049 1050 const FeatureBitset &getFeatureBits() const { 1051 return getSTI().getFeatureBits(); 1052 } 1053 1054 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1055 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1056 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1057 1058 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1059 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1060 bool isForcedDPP() const { return ForcedDPP; } 1061 bool isForcedSDWA() const { return ForcedSDWA; } 1062 ArrayRef<unsigned> getMatchedVariants() const; 1063 1064 std::unique_ptr<AMDGPUOperand> parseRegister(); 1065 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1066 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1067 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1068 unsigned Kind) override; 1069 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1070 OperandVector &Operands, MCStreamer &Out, 1071 uint64_t &ErrorInfo, 1072 bool MatchingInlineAsm) override; 1073 bool ParseDirective(AsmToken DirectiveID) override; 1074 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1075 OperandMode Mode = OperandMode_Default); 1076 StringRef parseMnemonicSuffix(StringRef Name); 1077 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1078 SMLoc NameLoc, OperandVector &Operands) override; 1079 //bool ProcessInstruction(MCInst &Inst); 1080 1081 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1082 1083 OperandMatchResultTy 1084 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1085 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1086 bool (*ConvertResult)(int64_t &) = nullptr); 1087 1088 OperandMatchResultTy 1089 parseOperandArrayWithPrefix(const char *Prefix, 1090 OperandVector &Operands, 1091 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1092 bool (*ConvertResult)(int64_t&) = nullptr); 1093 1094 OperandMatchResultTy 1095 parseNamedBit(const char *Name, OperandVector &Operands, 1096 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1097 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1098 StringRef &Value); 1099 1100 bool isModifier(); 1101 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1102 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1103 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1104 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1105 bool parseSP3NegModifier(); 1106 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1107 OperandMatchResultTy parseReg(OperandVector &Operands); 1108 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1109 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1110 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1111 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1112 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1113 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1114 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1115 1116 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1117 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1118 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1119 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1120 1121 bool parseCnt(int64_t &IntVal); 1122 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1123 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1124 1125 private: 1126 struct OperandInfoTy { 1127 int64_t Id; 1128 bool IsSymbolic = false; 1129 1130 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1131 }; 1132 1133 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1134 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1135 1136 void errorExpTgt(); 1137 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1138 1139 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1140 bool validateSOPLiteral(const MCInst &Inst) const; 1141 bool validateConstantBusLimitations(const MCInst &Inst); 1142 bool validateEarlyClobberLimitations(const MCInst &Inst); 1143 bool validateIntClampSupported(const MCInst &Inst); 1144 bool validateMIMGAtomicDMask(const MCInst &Inst); 1145 bool validateMIMGGatherDMask(const MCInst &Inst); 1146 bool validateMIMGDataSize(const MCInst &Inst); 1147 bool validateMIMGAddrSize(const MCInst &Inst); 1148 bool validateMIMGD16(const MCInst &Inst); 1149 bool validateMIMGDim(const MCInst &Inst); 1150 bool validateLdsDirect(const MCInst &Inst); 1151 bool validateVOP3Literal(const MCInst &Inst) const; 1152 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1153 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1154 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1155 1156 bool isId(const StringRef Id) const; 1157 bool isId(const AsmToken &Token, const StringRef Id) const; 1158 bool isToken(const AsmToken::TokenKind Kind) const; 1159 bool trySkipId(const StringRef Id); 1160 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1161 bool trySkipToken(const AsmToken::TokenKind Kind); 1162 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1163 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1164 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1165 AsmToken::TokenKind getTokenKind() const; 1166 bool parseExpr(int64_t &Imm); 1167 StringRef getTokenStr() const; 1168 AsmToken peekToken(); 1169 AsmToken getToken() const; 1170 SMLoc getLoc() const; 1171 void lex(); 1172 1173 public: 1174 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1175 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1176 1177 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1178 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1179 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1180 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1181 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1182 1183 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1184 const unsigned MinVal, 1185 const unsigned MaxVal, 1186 const StringRef ErrMsg); 1187 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1188 bool parseSwizzleOffset(int64_t &Imm); 1189 bool parseSwizzleMacro(int64_t &Imm); 1190 bool parseSwizzleQuadPerm(int64_t &Imm); 1191 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1192 bool parseSwizzleBroadcast(int64_t &Imm); 1193 bool parseSwizzleSwap(int64_t &Imm); 1194 bool parseSwizzleReverse(int64_t &Imm); 1195 1196 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1197 int64_t parseGPRIdxMacro(); 1198 1199 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1200 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1201 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1202 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1203 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1204 1205 AMDGPUOperand::Ptr defaultDLC() const; 1206 AMDGPUOperand::Ptr defaultGLC() const; 1207 AMDGPUOperand::Ptr defaultSLC() const; 1208 1209 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1210 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1211 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1212 AMDGPUOperand::Ptr defaultOffsetU12() const; 1213 AMDGPUOperand::Ptr defaultOffsetS13() const; 1214 1215 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1216 1217 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1218 OptionalImmIndexMap &OptionalIdx); 1219 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1220 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1221 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1222 1223 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1224 1225 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1226 bool IsAtomic = false); 1227 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1228 1229 OperandMatchResultTy parseDim(OperandVector &Operands); 1230 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1231 AMDGPUOperand::Ptr defaultRowMask() const; 1232 AMDGPUOperand::Ptr defaultBankMask() const; 1233 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1234 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1235 1236 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1237 AMDGPUOperand::ImmTy Type); 1238 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1239 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1240 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1241 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1242 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1243 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1244 uint64_t BasicInstType, bool skipVcc = false); 1245 1246 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1247 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1248 }; 1249 1250 struct OptionalOperand { 1251 const char *Name; 1252 AMDGPUOperand::ImmTy Type; 1253 bool IsBit; 1254 bool (*ConvertResult)(int64_t&); 1255 }; 1256 1257 } // end anonymous namespace 1258 1259 // May be called with integer type with equivalent bitwidth. 1260 static const fltSemantics *getFltSemantics(unsigned Size) { 1261 switch (Size) { 1262 case 4: 1263 return &APFloat::IEEEsingle(); 1264 case 8: 1265 return &APFloat::IEEEdouble(); 1266 case 2: 1267 return &APFloat::IEEEhalf(); 1268 default: 1269 llvm_unreachable("unsupported fp type"); 1270 } 1271 } 1272 1273 static const fltSemantics *getFltSemantics(MVT VT) { 1274 return getFltSemantics(VT.getSizeInBits() / 8); 1275 } 1276 1277 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1278 switch (OperandType) { 1279 case AMDGPU::OPERAND_REG_IMM_INT32: 1280 case AMDGPU::OPERAND_REG_IMM_FP32: 1281 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1282 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1283 return &APFloat::IEEEsingle(); 1284 case AMDGPU::OPERAND_REG_IMM_INT64: 1285 case AMDGPU::OPERAND_REG_IMM_FP64: 1286 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1287 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1288 return &APFloat::IEEEdouble(); 1289 case AMDGPU::OPERAND_REG_IMM_INT16: 1290 case AMDGPU::OPERAND_REG_IMM_FP16: 1291 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1292 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1293 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1294 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1295 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1296 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1297 return &APFloat::IEEEhalf(); 1298 default: 1299 llvm_unreachable("unsupported fp type"); 1300 } 1301 } 1302 1303 //===----------------------------------------------------------------------===// 1304 // Operand 1305 //===----------------------------------------------------------------------===// 1306 1307 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1308 bool Lost; 1309 1310 // Convert literal to single precision 1311 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1312 APFloat::rmNearestTiesToEven, 1313 &Lost); 1314 // We allow precision lost but not overflow or underflow 1315 if (Status != APFloat::opOK && 1316 Lost && 1317 ((Status & APFloat::opOverflow) != 0 || 1318 (Status & APFloat::opUnderflow) != 0)) { 1319 return false; 1320 } 1321 1322 return true; 1323 } 1324 1325 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1326 return isUIntN(Size, Val) || isIntN(Size, Val); 1327 } 1328 1329 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1330 1331 // This is a hack to enable named inline values like 1332 // shared_base with both 32-bit and 64-bit operands. 1333 // Note that these values are defined as 1334 // 32-bit operands only. 1335 if (isInlineValue()) { 1336 return true; 1337 } 1338 1339 if (!isImmTy(ImmTyNone)) { 1340 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1341 return false; 1342 } 1343 // TODO: We should avoid using host float here. It would be better to 1344 // check the float bit values which is what a few other places do. 1345 // We've had bot failures before due to weird NaN support on mips hosts. 1346 1347 APInt Literal(64, Imm.Val); 1348 1349 if (Imm.IsFPImm) { // We got fp literal token 1350 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1351 return AMDGPU::isInlinableLiteral64(Imm.Val, 1352 AsmParser->hasInv2PiInlineImm()); 1353 } 1354 1355 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1356 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1357 return false; 1358 1359 if (type.getScalarSizeInBits() == 16) { 1360 return AMDGPU::isInlinableLiteral16( 1361 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1362 AsmParser->hasInv2PiInlineImm()); 1363 } 1364 1365 // Check if single precision literal is inlinable 1366 return AMDGPU::isInlinableLiteral32( 1367 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1368 AsmParser->hasInv2PiInlineImm()); 1369 } 1370 1371 // We got int literal token. 1372 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1373 return AMDGPU::isInlinableLiteral64(Imm.Val, 1374 AsmParser->hasInv2PiInlineImm()); 1375 } 1376 1377 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1378 return false; 1379 } 1380 1381 if (type.getScalarSizeInBits() == 16) { 1382 return AMDGPU::isInlinableLiteral16( 1383 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1384 AsmParser->hasInv2PiInlineImm()); 1385 } 1386 1387 return AMDGPU::isInlinableLiteral32( 1388 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1389 AsmParser->hasInv2PiInlineImm()); 1390 } 1391 1392 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1393 // Check that this immediate can be added as literal 1394 if (!isImmTy(ImmTyNone)) { 1395 return false; 1396 } 1397 1398 if (!Imm.IsFPImm) { 1399 // We got int literal token. 1400 1401 if (type == MVT::f64 && hasFPModifiers()) { 1402 // Cannot apply fp modifiers to int literals preserving the same semantics 1403 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1404 // disable these cases. 1405 return false; 1406 } 1407 1408 unsigned Size = type.getSizeInBits(); 1409 if (Size == 64) 1410 Size = 32; 1411 1412 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1413 // types. 1414 return isSafeTruncation(Imm.Val, Size); 1415 } 1416 1417 // We got fp literal token 1418 if (type == MVT::f64) { // Expected 64-bit fp operand 1419 // We would set low 64-bits of literal to zeroes but we accept this literals 1420 return true; 1421 } 1422 1423 if (type == MVT::i64) { // Expected 64-bit int operand 1424 // We don't allow fp literals in 64-bit integer instructions. It is 1425 // unclear how we should encode them. 1426 return false; 1427 } 1428 1429 // We allow fp literals with f16x2 operands assuming that the specified 1430 // literal goes into the lower half and the upper half is zero. We also 1431 // require that the literal may be losslesly converted to f16. 1432 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1433 (type == MVT::v2i16)? MVT::i16 : type; 1434 1435 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1436 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1437 } 1438 1439 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1440 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1441 } 1442 1443 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1444 if (AsmParser->isVI()) 1445 return isVReg32(); 1446 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1447 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1448 else 1449 return false; 1450 } 1451 1452 bool AMDGPUOperand::isSDWAFP16Operand() const { 1453 return isSDWAOperand(MVT::f16); 1454 } 1455 1456 bool AMDGPUOperand::isSDWAFP32Operand() const { 1457 return isSDWAOperand(MVT::f32); 1458 } 1459 1460 bool AMDGPUOperand::isSDWAInt16Operand() const { 1461 return isSDWAOperand(MVT::i16); 1462 } 1463 1464 bool AMDGPUOperand::isSDWAInt32Operand() const { 1465 return isSDWAOperand(MVT::i32); 1466 } 1467 1468 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1469 { 1470 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1471 assert(Size == 2 || Size == 4 || Size == 8); 1472 1473 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1474 1475 if (Imm.Mods.Abs) { 1476 Val &= ~FpSignMask; 1477 } 1478 if (Imm.Mods.Neg) { 1479 Val ^= FpSignMask; 1480 } 1481 1482 return Val; 1483 } 1484 1485 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1486 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1487 Inst.getNumOperands())) { 1488 addLiteralImmOperand(Inst, Imm.Val, 1489 ApplyModifiers & 1490 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1491 } else { 1492 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1493 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1494 } 1495 } 1496 1497 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1498 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1499 auto OpNum = Inst.getNumOperands(); 1500 // Check that this operand accepts literals 1501 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1502 1503 if (ApplyModifiers) { 1504 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1505 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1506 Val = applyInputFPModifiers(Val, Size); 1507 } 1508 1509 APInt Literal(64, Val); 1510 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1511 1512 if (Imm.IsFPImm) { // We got fp literal token 1513 switch (OpTy) { 1514 case AMDGPU::OPERAND_REG_IMM_INT64: 1515 case AMDGPU::OPERAND_REG_IMM_FP64: 1516 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1517 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1518 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1519 AsmParser->hasInv2PiInlineImm())) { 1520 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1521 return; 1522 } 1523 1524 // Non-inlineable 1525 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1526 // For fp operands we check if low 32 bits are zeros 1527 if (Literal.getLoBits(32) != 0) { 1528 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1529 "Can't encode literal as exact 64-bit floating-point operand. " 1530 "Low 32-bits will be set to zero"); 1531 } 1532 1533 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1534 return; 1535 } 1536 1537 // We don't allow fp literals in 64-bit integer instructions. It is 1538 // unclear how we should encode them. This case should be checked earlier 1539 // in predicate methods (isLiteralImm()) 1540 llvm_unreachable("fp literal in 64-bit integer instruction."); 1541 1542 case AMDGPU::OPERAND_REG_IMM_INT32: 1543 case AMDGPU::OPERAND_REG_IMM_FP32: 1544 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1545 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1546 case AMDGPU::OPERAND_REG_IMM_INT16: 1547 case AMDGPU::OPERAND_REG_IMM_FP16: 1548 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1549 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1550 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1551 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1552 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1553 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1554 bool lost; 1555 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1556 // Convert literal to single precision 1557 FPLiteral.convert(*getOpFltSemantics(OpTy), 1558 APFloat::rmNearestTiesToEven, &lost); 1559 // We allow precision lost but not overflow or underflow. This should be 1560 // checked earlier in isLiteralImm() 1561 1562 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1563 Inst.addOperand(MCOperand::createImm(ImmVal)); 1564 return; 1565 } 1566 default: 1567 llvm_unreachable("invalid operand size"); 1568 } 1569 1570 return; 1571 } 1572 1573 // We got int literal token. 1574 // Only sign extend inline immediates. 1575 switch (OpTy) { 1576 case AMDGPU::OPERAND_REG_IMM_INT32: 1577 case AMDGPU::OPERAND_REG_IMM_FP32: 1578 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1579 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1580 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1581 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1582 if (isSafeTruncation(Val, 32) && 1583 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1584 AsmParser->hasInv2PiInlineImm())) { 1585 Inst.addOperand(MCOperand::createImm(Val)); 1586 return; 1587 } 1588 1589 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1590 return; 1591 1592 case AMDGPU::OPERAND_REG_IMM_INT64: 1593 case AMDGPU::OPERAND_REG_IMM_FP64: 1594 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1595 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1596 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1597 Inst.addOperand(MCOperand::createImm(Val)); 1598 return; 1599 } 1600 1601 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1602 return; 1603 1604 case AMDGPU::OPERAND_REG_IMM_INT16: 1605 case AMDGPU::OPERAND_REG_IMM_FP16: 1606 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1607 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1608 if (isSafeTruncation(Val, 16) && 1609 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1610 AsmParser->hasInv2PiInlineImm())) { 1611 Inst.addOperand(MCOperand::createImm(Val)); 1612 return; 1613 } 1614 1615 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1616 return; 1617 1618 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1619 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1620 assert(isSafeTruncation(Val, 16)); 1621 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1622 AsmParser->hasInv2PiInlineImm())); 1623 1624 Inst.addOperand(MCOperand::createImm(Val)); 1625 return; 1626 } 1627 default: 1628 llvm_unreachable("invalid operand size"); 1629 } 1630 } 1631 1632 template <unsigned Bitwidth> 1633 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1634 APInt Literal(64, Imm.Val); 1635 1636 if (!Imm.IsFPImm) { 1637 // We got int literal token. 1638 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1639 return; 1640 } 1641 1642 bool Lost; 1643 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1644 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1645 APFloat::rmNearestTiesToEven, &Lost); 1646 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1647 } 1648 1649 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1650 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1651 } 1652 1653 static bool isInlineValue(unsigned Reg) { 1654 switch (Reg) { 1655 case AMDGPU::SRC_SHARED_BASE: 1656 case AMDGPU::SRC_SHARED_LIMIT: 1657 case AMDGPU::SRC_PRIVATE_BASE: 1658 case AMDGPU::SRC_PRIVATE_LIMIT: 1659 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1660 return true; 1661 default: 1662 return false; 1663 } 1664 } 1665 1666 bool AMDGPUOperand::isInlineValue() const { 1667 return isRegKind() && ::isInlineValue(getReg()); 1668 } 1669 1670 //===----------------------------------------------------------------------===// 1671 // AsmParser 1672 //===----------------------------------------------------------------------===// 1673 1674 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1675 if (Is == IS_VGPR) { 1676 switch (RegWidth) { 1677 default: return -1; 1678 case 1: return AMDGPU::VGPR_32RegClassID; 1679 case 2: return AMDGPU::VReg_64RegClassID; 1680 case 3: return AMDGPU::VReg_96RegClassID; 1681 case 4: return AMDGPU::VReg_128RegClassID; 1682 case 8: return AMDGPU::VReg_256RegClassID; 1683 case 16: return AMDGPU::VReg_512RegClassID; 1684 } 1685 } else if (Is == IS_TTMP) { 1686 switch (RegWidth) { 1687 default: return -1; 1688 case 1: return AMDGPU::TTMP_32RegClassID; 1689 case 2: return AMDGPU::TTMP_64RegClassID; 1690 case 4: return AMDGPU::TTMP_128RegClassID; 1691 case 8: return AMDGPU::TTMP_256RegClassID; 1692 case 16: return AMDGPU::TTMP_512RegClassID; 1693 } 1694 } else if (Is == IS_SGPR) { 1695 switch (RegWidth) { 1696 default: return -1; 1697 case 1: return AMDGPU::SGPR_32RegClassID; 1698 case 2: return AMDGPU::SGPR_64RegClassID; 1699 case 4: return AMDGPU::SGPR_128RegClassID; 1700 case 8: return AMDGPU::SGPR_256RegClassID; 1701 case 16: return AMDGPU::SGPR_512RegClassID; 1702 } 1703 } 1704 return -1; 1705 } 1706 1707 static unsigned getSpecialRegForName(StringRef RegName) { 1708 return StringSwitch<unsigned>(RegName) 1709 .Case("exec", AMDGPU::EXEC) 1710 .Case("vcc", AMDGPU::VCC) 1711 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1712 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1713 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1714 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1715 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1716 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1717 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1718 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1719 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1720 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1721 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1722 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1723 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1724 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1725 .Case("m0", AMDGPU::M0) 1726 .Case("scc", AMDGPU::SCC) 1727 .Case("tba", AMDGPU::TBA) 1728 .Case("tma", AMDGPU::TMA) 1729 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1730 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1731 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1732 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1733 .Case("vcc_lo", AMDGPU::VCC_LO) 1734 .Case("vcc_hi", AMDGPU::VCC_HI) 1735 .Case("exec_lo", AMDGPU::EXEC_LO) 1736 .Case("exec_hi", AMDGPU::EXEC_HI) 1737 .Case("tma_lo", AMDGPU::TMA_LO) 1738 .Case("tma_hi", AMDGPU::TMA_HI) 1739 .Case("tba_lo", AMDGPU::TBA_LO) 1740 .Case("tba_hi", AMDGPU::TBA_HI) 1741 .Case("null", AMDGPU::SGPR_NULL) 1742 .Default(0); 1743 } 1744 1745 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1746 SMLoc &EndLoc) { 1747 auto R = parseRegister(); 1748 if (!R) return true; 1749 assert(R->isReg()); 1750 RegNo = R->getReg(); 1751 StartLoc = R->getStartLoc(); 1752 EndLoc = R->getEndLoc(); 1753 return false; 1754 } 1755 1756 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1757 RegisterKind RegKind, unsigned Reg1, 1758 unsigned RegNum) { 1759 switch (RegKind) { 1760 case IS_SPECIAL: 1761 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1762 Reg = AMDGPU::EXEC; 1763 RegWidth = 2; 1764 return true; 1765 } 1766 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1767 Reg = AMDGPU::FLAT_SCR; 1768 RegWidth = 2; 1769 return true; 1770 } 1771 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1772 Reg = AMDGPU::XNACK_MASK; 1773 RegWidth = 2; 1774 return true; 1775 } 1776 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1777 Reg = AMDGPU::VCC; 1778 RegWidth = 2; 1779 return true; 1780 } 1781 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1782 Reg = AMDGPU::TBA; 1783 RegWidth = 2; 1784 return true; 1785 } 1786 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1787 Reg = AMDGPU::TMA; 1788 RegWidth = 2; 1789 return true; 1790 } 1791 return false; 1792 case IS_VGPR: 1793 case IS_SGPR: 1794 case IS_TTMP: 1795 if (Reg1 != Reg + RegWidth) { 1796 return false; 1797 } 1798 RegWidth++; 1799 return true; 1800 default: 1801 llvm_unreachable("unexpected register kind"); 1802 } 1803 } 1804 1805 static const StringRef Registers[] = { 1806 { "v" }, 1807 { "s" }, 1808 { "ttmp" }, 1809 }; 1810 1811 bool 1812 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1813 const AsmToken &NextToken) const { 1814 1815 // A list of consecutive registers: [s0,s1,s2,s3] 1816 if (Token.is(AsmToken::LBrac)) 1817 return true; 1818 1819 if (!Token.is(AsmToken::Identifier)) 1820 return false; 1821 1822 // A single register like s0 or a range of registers like s[0:1] 1823 1824 StringRef RegName = Token.getString(); 1825 1826 for (StringRef Reg : Registers) { 1827 if (RegName.startswith(Reg)) { 1828 if (Reg.size() < RegName.size()) { 1829 unsigned RegNum; 1830 // A single register with an index: rXX 1831 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1832 return true; 1833 } else { 1834 // A range of registers: r[XX:YY]. 1835 if (NextToken.is(AsmToken::LBrac)) 1836 return true; 1837 } 1838 } 1839 } 1840 1841 return getSpecialRegForName(RegName); 1842 } 1843 1844 bool 1845 AMDGPUAsmParser::isRegister() 1846 { 1847 return isRegister(getToken(), peekToken()); 1848 } 1849 1850 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1851 unsigned &RegNum, unsigned &RegWidth, 1852 unsigned *DwordRegIndex) { 1853 if (DwordRegIndex) { *DwordRegIndex = 0; } 1854 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1855 if (getLexer().is(AsmToken::Identifier)) { 1856 StringRef RegName = Parser.getTok().getString(); 1857 if ((Reg = getSpecialRegForName(RegName))) { 1858 Parser.Lex(); 1859 RegKind = IS_SPECIAL; 1860 } else { 1861 unsigned RegNumIndex = 0; 1862 if (RegName[0] == 'v') { 1863 RegNumIndex = 1; 1864 RegKind = IS_VGPR; 1865 } else if (RegName[0] == 's') { 1866 RegNumIndex = 1; 1867 RegKind = IS_SGPR; 1868 } else if (RegName.startswith("ttmp")) { 1869 RegNumIndex = strlen("ttmp"); 1870 RegKind = IS_TTMP; 1871 } else { 1872 return false; 1873 } 1874 if (RegName.size() > RegNumIndex) { 1875 // Single 32-bit register: vXX. 1876 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1877 return false; 1878 Parser.Lex(); 1879 RegWidth = 1; 1880 } else { 1881 // Range of registers: v[XX:YY]. ":YY" is optional. 1882 Parser.Lex(); 1883 int64_t RegLo, RegHi; 1884 if (getLexer().isNot(AsmToken::LBrac)) 1885 return false; 1886 Parser.Lex(); 1887 1888 if (getParser().parseAbsoluteExpression(RegLo)) 1889 return false; 1890 1891 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1892 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1893 return false; 1894 Parser.Lex(); 1895 1896 if (isRBrace) { 1897 RegHi = RegLo; 1898 } else { 1899 if (getParser().parseAbsoluteExpression(RegHi)) 1900 return false; 1901 1902 if (getLexer().isNot(AsmToken::RBrac)) 1903 return false; 1904 Parser.Lex(); 1905 } 1906 RegNum = (unsigned) RegLo; 1907 RegWidth = (RegHi - RegLo) + 1; 1908 } 1909 } 1910 } else if (getLexer().is(AsmToken::LBrac)) { 1911 // List of consecutive registers: [s0,s1,s2,s3] 1912 Parser.Lex(); 1913 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1914 return false; 1915 if (RegWidth != 1) 1916 return false; 1917 RegisterKind RegKind1; 1918 unsigned Reg1, RegNum1, RegWidth1; 1919 do { 1920 if (getLexer().is(AsmToken::Comma)) { 1921 Parser.Lex(); 1922 } else if (getLexer().is(AsmToken::RBrac)) { 1923 Parser.Lex(); 1924 break; 1925 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1926 if (RegWidth1 != 1) { 1927 return false; 1928 } 1929 if (RegKind1 != RegKind) { 1930 return false; 1931 } 1932 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1933 return false; 1934 } 1935 } else { 1936 return false; 1937 } 1938 } while (true); 1939 } else { 1940 return false; 1941 } 1942 switch (RegKind) { 1943 case IS_SPECIAL: 1944 RegNum = 0; 1945 RegWidth = 1; 1946 break; 1947 case IS_VGPR: 1948 case IS_SGPR: 1949 case IS_TTMP: 1950 { 1951 unsigned Size = 1; 1952 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1953 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1954 Size = std::min(RegWidth, 4u); 1955 } 1956 if (RegNum % Size != 0) 1957 return false; 1958 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1959 RegNum = RegNum / Size; 1960 int RCID = getRegClass(RegKind, RegWidth); 1961 if (RCID == -1) 1962 return false; 1963 const MCRegisterClass RC = TRI->getRegClass(RCID); 1964 if (RegNum >= RC.getNumRegs()) 1965 return false; 1966 Reg = RC.getRegister(RegNum); 1967 break; 1968 } 1969 1970 default: 1971 llvm_unreachable("unexpected register kind"); 1972 } 1973 1974 if (!subtargetHasRegister(*TRI, Reg)) 1975 return false; 1976 return true; 1977 } 1978 1979 Optional<StringRef> 1980 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1981 switch (RegKind) { 1982 case IS_VGPR: 1983 return StringRef(".amdgcn.next_free_vgpr"); 1984 case IS_SGPR: 1985 return StringRef(".amdgcn.next_free_sgpr"); 1986 default: 1987 return None; 1988 } 1989 } 1990 1991 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1992 auto SymbolName = getGprCountSymbolName(RegKind); 1993 assert(SymbolName && "initializing invalid register kind"); 1994 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1995 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1996 } 1997 1998 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1999 unsigned DwordRegIndex, 2000 unsigned RegWidth) { 2001 // Symbols are only defined for GCN targets 2002 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2003 return true; 2004 2005 auto SymbolName = getGprCountSymbolName(RegKind); 2006 if (!SymbolName) 2007 return true; 2008 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2009 2010 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2011 int64_t OldCount; 2012 2013 if (!Sym->isVariable()) 2014 return !Error(getParser().getTok().getLoc(), 2015 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2016 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2017 return !Error( 2018 getParser().getTok().getLoc(), 2019 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2020 2021 if (OldCount <= NewMax) 2022 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2023 2024 return true; 2025 } 2026 2027 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2028 const auto &Tok = Parser.getTok(); 2029 SMLoc StartLoc = Tok.getLoc(); 2030 SMLoc EndLoc = Tok.getEndLoc(); 2031 RegisterKind RegKind; 2032 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2033 2034 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2035 //FIXME: improve error messages (bug 41303). 2036 Error(StartLoc, "not a valid operand."); 2037 return nullptr; 2038 } 2039 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2040 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2041 return nullptr; 2042 } else 2043 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2044 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2045 } 2046 2047 OperandMatchResultTy 2048 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2049 // TODO: add syntactic sugar for 1/(2*PI) 2050 2051 assert(!isRegister()); 2052 assert(!isModifier()); 2053 2054 const auto& Tok = getToken(); 2055 const auto& NextTok = peekToken(); 2056 bool IsReal = Tok.is(AsmToken::Real); 2057 SMLoc S = getLoc(); 2058 bool Negate = false; 2059 2060 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2061 lex(); 2062 IsReal = true; 2063 Negate = true; 2064 } 2065 2066 if (IsReal) { 2067 // Floating-point expressions are not supported. 2068 // Can only allow floating-point literals with an 2069 // optional sign. 2070 2071 StringRef Num = getTokenStr(); 2072 lex(); 2073 2074 APFloat RealVal(APFloat::IEEEdouble()); 2075 auto roundMode = APFloat::rmNearestTiesToEven; 2076 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2077 return MatchOperand_ParseFail; 2078 } 2079 if (Negate) 2080 RealVal.changeSign(); 2081 2082 Operands.push_back( 2083 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2084 AMDGPUOperand::ImmTyNone, true)); 2085 2086 return MatchOperand_Success; 2087 2088 } else { 2089 int64_t IntVal; 2090 const MCExpr *Expr; 2091 SMLoc S = getLoc(); 2092 2093 if (HasSP3AbsModifier) { 2094 // This is a workaround for handling expressions 2095 // as arguments of SP3 'abs' modifier, for example: 2096 // |1.0| 2097 // |-1| 2098 // |1+x| 2099 // This syntax is not compatible with syntax of standard 2100 // MC expressions (due to the trailing '|'). 2101 SMLoc EndLoc; 2102 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2103 return MatchOperand_ParseFail; 2104 } else { 2105 if (Parser.parseExpression(Expr)) 2106 return MatchOperand_ParseFail; 2107 } 2108 2109 if (Expr->evaluateAsAbsolute(IntVal)) { 2110 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2111 } else { 2112 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2113 } 2114 2115 return MatchOperand_Success; 2116 } 2117 2118 return MatchOperand_NoMatch; 2119 } 2120 2121 OperandMatchResultTy 2122 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2123 if (!isRegister()) 2124 return MatchOperand_NoMatch; 2125 2126 if (auto R = parseRegister()) { 2127 assert(R->isReg()); 2128 Operands.push_back(std::move(R)); 2129 return MatchOperand_Success; 2130 } 2131 return MatchOperand_ParseFail; 2132 } 2133 2134 OperandMatchResultTy 2135 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2136 auto res = parseReg(Operands); 2137 if (res != MatchOperand_NoMatch) { 2138 return res; 2139 } else if (isModifier()) { 2140 return MatchOperand_NoMatch; 2141 } else { 2142 return parseImm(Operands, HasSP3AbsMod); 2143 } 2144 } 2145 2146 bool 2147 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2148 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2149 const auto &str = Token.getString(); 2150 return str == "abs" || str == "neg" || str == "sext"; 2151 } 2152 return false; 2153 } 2154 2155 bool 2156 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2157 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2158 } 2159 2160 bool 2161 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2162 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2163 } 2164 2165 bool 2166 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2167 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2168 } 2169 2170 // Check if this is an operand modifier or an opcode modifier 2171 // which may look like an expression but it is not. We should 2172 // avoid parsing these modifiers as expressions. Currently 2173 // recognized sequences are: 2174 // |...| 2175 // abs(...) 2176 // neg(...) 2177 // sext(...) 2178 // -reg 2179 // -|...| 2180 // -abs(...) 2181 // name:... 2182 // Note that simple opcode modifiers like 'gds' may be parsed as 2183 // expressions; this is a special case. See getExpressionAsToken. 2184 // 2185 bool 2186 AMDGPUAsmParser::isModifier() { 2187 2188 AsmToken Tok = getToken(); 2189 AsmToken NextToken[2]; 2190 peekTokens(NextToken); 2191 2192 return isOperandModifier(Tok, NextToken[0]) || 2193 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2194 isOpcodeModifierWithVal(Tok, NextToken[0]); 2195 } 2196 2197 // Check if the current token is an SP3 'neg' modifier. 2198 // Currently this modifier is allowed in the following context: 2199 // 2200 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2201 // 2. Before an 'abs' modifier: -abs(...) 2202 // 3. Before an SP3 'abs' modifier: -|...| 2203 // 2204 // In all other cases "-" is handled as a part 2205 // of an expression that follows the sign. 2206 // 2207 // Note: When "-" is followed by an integer literal, 2208 // this is interpreted as integer negation rather 2209 // than a floating-point NEG modifier applied to N. 2210 // Beside being contr-intuitive, such use of floating-point 2211 // NEG modifier would have resulted in different meaning 2212 // of integer literals used with VOP1/2/C and VOP3, 2213 // for example: 2214 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2215 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2216 // Negative fp literals with preceding "-" are 2217 // handled likewise for unifomtity 2218 // 2219 bool 2220 AMDGPUAsmParser::parseSP3NegModifier() { 2221 2222 AsmToken NextToken[2]; 2223 peekTokens(NextToken); 2224 2225 if (isToken(AsmToken::Minus) && 2226 (isRegister(NextToken[0], NextToken[1]) || 2227 NextToken[0].is(AsmToken::Pipe) || 2228 isId(NextToken[0], "abs"))) { 2229 lex(); 2230 return true; 2231 } 2232 2233 return false; 2234 } 2235 2236 OperandMatchResultTy 2237 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2238 bool AllowImm) { 2239 bool Neg, SP3Neg; 2240 bool Abs, SP3Abs; 2241 SMLoc Loc; 2242 2243 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2244 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2245 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2246 return MatchOperand_ParseFail; 2247 } 2248 2249 SP3Neg = parseSP3NegModifier(); 2250 2251 Loc = getLoc(); 2252 Neg = trySkipId("neg"); 2253 if (Neg && SP3Neg) { 2254 Error(Loc, "expected register or immediate"); 2255 return MatchOperand_ParseFail; 2256 } 2257 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2258 return MatchOperand_ParseFail; 2259 2260 Abs = trySkipId("abs"); 2261 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2262 return MatchOperand_ParseFail; 2263 2264 Loc = getLoc(); 2265 SP3Abs = trySkipToken(AsmToken::Pipe); 2266 if (Abs && SP3Abs) { 2267 Error(Loc, "expected register or immediate"); 2268 return MatchOperand_ParseFail; 2269 } 2270 2271 OperandMatchResultTy Res; 2272 if (AllowImm) { 2273 Res = parseRegOrImm(Operands, SP3Abs); 2274 } else { 2275 Res = parseReg(Operands); 2276 } 2277 if (Res != MatchOperand_Success) { 2278 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2279 } 2280 2281 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2282 return MatchOperand_ParseFail; 2283 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2284 return MatchOperand_ParseFail; 2285 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2286 return MatchOperand_ParseFail; 2287 2288 AMDGPUOperand::Modifiers Mods; 2289 Mods.Abs = Abs || SP3Abs; 2290 Mods.Neg = Neg || SP3Neg; 2291 2292 if (Mods.hasFPModifiers()) { 2293 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2294 if (Op.isExpr()) { 2295 Error(Op.getStartLoc(), "expected an absolute expression"); 2296 return MatchOperand_ParseFail; 2297 } 2298 Op.setModifiers(Mods); 2299 } 2300 return MatchOperand_Success; 2301 } 2302 2303 OperandMatchResultTy 2304 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2305 bool AllowImm) { 2306 bool Sext = trySkipId("sext"); 2307 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2308 return MatchOperand_ParseFail; 2309 2310 OperandMatchResultTy Res; 2311 if (AllowImm) { 2312 Res = parseRegOrImm(Operands); 2313 } else { 2314 Res = parseReg(Operands); 2315 } 2316 if (Res != MatchOperand_Success) { 2317 return Sext? MatchOperand_ParseFail : Res; 2318 } 2319 2320 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2321 return MatchOperand_ParseFail; 2322 2323 AMDGPUOperand::Modifiers Mods; 2324 Mods.Sext = Sext; 2325 2326 if (Mods.hasIntModifiers()) { 2327 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2328 if (Op.isExpr()) { 2329 Error(Op.getStartLoc(), "expected an absolute expression"); 2330 return MatchOperand_ParseFail; 2331 } 2332 Op.setModifiers(Mods); 2333 } 2334 2335 return MatchOperand_Success; 2336 } 2337 2338 OperandMatchResultTy 2339 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2340 return parseRegOrImmWithFPInputMods(Operands, false); 2341 } 2342 2343 OperandMatchResultTy 2344 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2345 return parseRegOrImmWithIntInputMods(Operands, false); 2346 } 2347 2348 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2349 auto Loc = getLoc(); 2350 if (trySkipId("off")) { 2351 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2352 AMDGPUOperand::ImmTyOff, false)); 2353 return MatchOperand_Success; 2354 } 2355 2356 if (!isRegister()) 2357 return MatchOperand_NoMatch; 2358 2359 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2360 if (Reg) { 2361 Operands.push_back(std::move(Reg)); 2362 return MatchOperand_Success; 2363 } 2364 2365 return MatchOperand_ParseFail; 2366 2367 } 2368 2369 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2370 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2371 2372 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2373 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2374 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2375 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2376 return Match_InvalidOperand; 2377 2378 if ((TSFlags & SIInstrFlags::VOP3) && 2379 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2380 getForcedEncodingSize() != 64) 2381 return Match_PreferE32; 2382 2383 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2384 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2385 // v_mac_f32/16 allow only dst_sel == DWORD; 2386 auto OpNum = 2387 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2388 const auto &Op = Inst.getOperand(OpNum); 2389 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2390 return Match_InvalidOperand; 2391 } 2392 } 2393 2394 if (TSFlags & SIInstrFlags::FLAT) { 2395 // FIXME: Produces error without correct column reported. 2396 auto Opcode = Inst.getOpcode(); 2397 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 2398 2399 const auto &Op = Inst.getOperand(OpNum); 2400 if (!hasFlatOffsets() && Op.getImm() != 0) 2401 return Match_InvalidOperand; 2402 2403 // GFX10: Address offset is 12-bit signed byte offset. Must be positive for 2404 // FLAT segment. For FLAT segment MSB is ignored and forced to zero. 2405 if (isGFX10()) { 2406 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 2407 if (!isInt<12>(Op.getImm())) 2408 return Match_InvalidOperand; 2409 } else { 2410 if (!isUInt<11>(Op.getImm())) 2411 return Match_InvalidOperand; 2412 } 2413 } 2414 } 2415 2416 return Match_Success; 2417 } 2418 2419 // What asm variants we should check 2420 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2421 if (getForcedEncodingSize() == 32) { 2422 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2423 return makeArrayRef(Variants); 2424 } 2425 2426 if (isForcedVOP3()) { 2427 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2428 return makeArrayRef(Variants); 2429 } 2430 2431 if (isForcedSDWA()) { 2432 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2433 AMDGPUAsmVariants::SDWA9}; 2434 return makeArrayRef(Variants); 2435 } 2436 2437 if (isForcedDPP()) { 2438 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2439 return makeArrayRef(Variants); 2440 } 2441 2442 static const unsigned Variants[] = { 2443 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2444 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2445 }; 2446 2447 return makeArrayRef(Variants); 2448 } 2449 2450 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2451 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2452 const unsigned Num = Desc.getNumImplicitUses(); 2453 for (unsigned i = 0; i < Num; ++i) { 2454 unsigned Reg = Desc.ImplicitUses[i]; 2455 switch (Reg) { 2456 case AMDGPU::FLAT_SCR: 2457 case AMDGPU::VCC: 2458 case AMDGPU::VCC_LO: 2459 case AMDGPU::VCC_HI: 2460 case AMDGPU::M0: 2461 case AMDGPU::SGPR_NULL: 2462 return Reg; 2463 default: 2464 break; 2465 } 2466 } 2467 return AMDGPU::NoRegister; 2468 } 2469 2470 // NB: This code is correct only when used to check constant 2471 // bus limitations because GFX7 support no f16 inline constants. 2472 // Note that there are no cases when a GFX7 opcode violates 2473 // constant bus limitations due to the use of an f16 constant. 2474 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2475 unsigned OpIdx) const { 2476 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2477 2478 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2479 return false; 2480 } 2481 2482 const MCOperand &MO = Inst.getOperand(OpIdx); 2483 2484 int64_t Val = MO.getImm(); 2485 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2486 2487 switch (OpSize) { // expected operand size 2488 case 8: 2489 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2490 case 4: 2491 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2492 case 2: { 2493 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2494 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2495 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2496 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2497 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2498 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2499 } else { 2500 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2501 } 2502 } 2503 default: 2504 llvm_unreachable("invalid operand size"); 2505 } 2506 } 2507 2508 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2509 const MCOperand &MO = Inst.getOperand(OpIdx); 2510 if (MO.isImm()) { 2511 return !isInlineConstant(Inst, OpIdx); 2512 } 2513 return !MO.isReg() || 2514 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2515 } 2516 2517 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2518 const unsigned Opcode = Inst.getOpcode(); 2519 const MCInstrDesc &Desc = MII.get(Opcode); 2520 unsigned ConstantBusUseCount = 0; 2521 unsigned NumLiterals = 0; 2522 unsigned LiteralSize; 2523 2524 if (Desc.TSFlags & 2525 (SIInstrFlags::VOPC | 2526 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2527 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2528 SIInstrFlags::SDWA)) { 2529 // Check special imm operands (used by madmk, etc) 2530 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2531 ++ConstantBusUseCount; 2532 } 2533 2534 SmallDenseSet<unsigned> SGPRsUsed; 2535 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2536 if (SGPRUsed != AMDGPU::NoRegister) { 2537 SGPRsUsed.insert(SGPRUsed); 2538 ++ConstantBusUseCount; 2539 } 2540 2541 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2542 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2543 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2544 2545 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2546 2547 for (int OpIdx : OpIndices) { 2548 if (OpIdx == -1) break; 2549 2550 const MCOperand &MO = Inst.getOperand(OpIdx); 2551 if (usesConstantBus(Inst, OpIdx)) { 2552 if (MO.isReg()) { 2553 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2554 // Pairs of registers with a partial intersections like these 2555 // s0, s[0:1] 2556 // flat_scratch_lo, flat_scratch 2557 // flat_scratch_lo, flat_scratch_hi 2558 // are theoretically valid but they are disabled anyway. 2559 // Note that this code mimics SIInstrInfo::verifyInstruction 2560 if (!SGPRsUsed.count(Reg)) { 2561 SGPRsUsed.insert(Reg); 2562 ++ConstantBusUseCount; 2563 } 2564 SGPRUsed = Reg; 2565 } else { // Expression or a literal 2566 2567 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2568 continue; // special operand like VINTERP attr_chan 2569 2570 // An instruction may use only one literal. 2571 // This has been validated on the previous step. 2572 // See validateVOP3Literal. 2573 // This literal may be used as more than one operand. 2574 // If all these operands are of the same size, 2575 // this literal counts as one scalar value. 2576 // Otherwise it counts as 2 scalar values. 2577 // See "GFX10 Shader Programming", section 3.6.2.3. 2578 2579 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2580 if (Size < 4) Size = 4; 2581 2582 if (NumLiterals == 0) { 2583 NumLiterals = 1; 2584 LiteralSize = Size; 2585 } else if (LiteralSize != Size) { 2586 NumLiterals = 2; 2587 } 2588 } 2589 } 2590 } 2591 } 2592 ConstantBusUseCount += NumLiterals; 2593 2594 if (isGFX10()) 2595 return ConstantBusUseCount <= 2; 2596 2597 return ConstantBusUseCount <= 1; 2598 } 2599 2600 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2601 const unsigned Opcode = Inst.getOpcode(); 2602 const MCInstrDesc &Desc = MII.get(Opcode); 2603 2604 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2605 if (DstIdx == -1 || 2606 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2607 return true; 2608 } 2609 2610 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2611 2612 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2613 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2614 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2615 2616 assert(DstIdx != -1); 2617 const MCOperand &Dst = Inst.getOperand(DstIdx); 2618 assert(Dst.isReg()); 2619 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2620 2621 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2622 2623 for (int SrcIdx : SrcIndices) { 2624 if (SrcIdx == -1) break; 2625 const MCOperand &Src = Inst.getOperand(SrcIdx); 2626 if (Src.isReg()) { 2627 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2628 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2629 return false; 2630 } 2631 } 2632 } 2633 2634 return true; 2635 } 2636 2637 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2638 2639 const unsigned Opc = Inst.getOpcode(); 2640 const MCInstrDesc &Desc = MII.get(Opc); 2641 2642 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2643 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2644 assert(ClampIdx != -1); 2645 return Inst.getOperand(ClampIdx).getImm() == 0; 2646 } 2647 2648 return true; 2649 } 2650 2651 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2652 2653 const unsigned Opc = Inst.getOpcode(); 2654 const MCInstrDesc &Desc = MII.get(Opc); 2655 2656 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2657 return true; 2658 2659 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2660 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2661 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2662 2663 assert(VDataIdx != -1); 2664 assert(DMaskIdx != -1); 2665 assert(TFEIdx != -1); 2666 2667 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2668 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2669 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2670 if (DMask == 0) 2671 DMask = 1; 2672 2673 unsigned DataSize = 2674 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2675 if (hasPackedD16()) { 2676 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2677 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2678 DataSize = (DataSize + 1) / 2; 2679 } 2680 2681 return (VDataSize / 4) == DataSize + TFESize; 2682 } 2683 2684 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2685 const unsigned Opc = Inst.getOpcode(); 2686 const MCInstrDesc &Desc = MII.get(Opc); 2687 2688 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2689 return true; 2690 2691 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2692 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2693 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2694 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2695 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2696 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2697 2698 assert(VAddr0Idx != -1); 2699 assert(SrsrcIdx != -1); 2700 assert(DimIdx != -1); 2701 assert(SrsrcIdx > VAddr0Idx); 2702 2703 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2704 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2705 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2706 unsigned VAddrSize = 2707 IsNSA ? SrsrcIdx - VAddr0Idx 2708 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2709 2710 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2711 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2712 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2713 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2714 if (!IsNSA) { 2715 if (AddrSize > 8) 2716 AddrSize = 16; 2717 else if (AddrSize > 4) 2718 AddrSize = 8; 2719 } 2720 2721 return VAddrSize == AddrSize; 2722 } 2723 2724 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2725 2726 const unsigned Opc = Inst.getOpcode(); 2727 const MCInstrDesc &Desc = MII.get(Opc); 2728 2729 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2730 return true; 2731 if (!Desc.mayLoad() || !Desc.mayStore()) 2732 return true; // Not atomic 2733 2734 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2735 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2736 2737 // This is an incomplete check because image_atomic_cmpswap 2738 // may only use 0x3 and 0xf while other atomic operations 2739 // may use 0x1 and 0x3. However these limitations are 2740 // verified when we check that dmask matches dst size. 2741 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2742 } 2743 2744 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2745 2746 const unsigned Opc = Inst.getOpcode(); 2747 const MCInstrDesc &Desc = MII.get(Opc); 2748 2749 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2750 return true; 2751 2752 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2753 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2754 2755 // GATHER4 instructions use dmask in a different fashion compared to 2756 // other MIMG instructions. The only useful DMASK values are 2757 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2758 // (red,red,red,red) etc.) The ISA document doesn't mention 2759 // this. 2760 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2761 } 2762 2763 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2764 2765 const unsigned Opc = Inst.getOpcode(); 2766 const MCInstrDesc &Desc = MII.get(Opc); 2767 2768 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2769 return true; 2770 2771 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2772 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2773 if (isCI() || isSI()) 2774 return false; 2775 } 2776 2777 return true; 2778 } 2779 2780 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2781 const unsigned Opc = Inst.getOpcode(); 2782 const MCInstrDesc &Desc = MII.get(Opc); 2783 2784 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2785 return true; 2786 2787 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2788 if (DimIdx < 0) 2789 return true; 2790 2791 long Imm = Inst.getOperand(DimIdx).getImm(); 2792 if (Imm < 0 || Imm >= 8) 2793 return false; 2794 2795 return true; 2796 } 2797 2798 static bool IsRevOpcode(const unsigned Opcode) 2799 { 2800 switch (Opcode) { 2801 case AMDGPU::V_SUBREV_F32_e32: 2802 case AMDGPU::V_SUBREV_F32_e64: 2803 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2804 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2805 case AMDGPU::V_SUBREV_F32_e32_vi: 2806 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2807 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 2808 case AMDGPU::V_SUBREV_F32_e64_vi: 2809 2810 case AMDGPU::V_SUBREV_I32_e32: 2811 case AMDGPU::V_SUBREV_I32_e64: 2812 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 2813 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 2814 2815 case AMDGPU::V_SUBBREV_U32_e32: 2816 case AMDGPU::V_SUBBREV_U32_e64: 2817 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 2818 case AMDGPU::V_SUBBREV_U32_e32_vi: 2819 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 2820 case AMDGPU::V_SUBBREV_U32_e64_vi: 2821 2822 case AMDGPU::V_SUBREV_U32_e32: 2823 case AMDGPU::V_SUBREV_U32_e64: 2824 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2825 case AMDGPU::V_SUBREV_U32_e32_vi: 2826 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2827 case AMDGPU::V_SUBREV_U32_e64_vi: 2828 2829 case AMDGPU::V_SUBREV_F16_e32: 2830 case AMDGPU::V_SUBREV_F16_e64: 2831 case AMDGPU::V_SUBREV_F16_e32_gfx10: 2832 case AMDGPU::V_SUBREV_F16_e32_vi: 2833 case AMDGPU::V_SUBREV_F16_e64_gfx10: 2834 case AMDGPU::V_SUBREV_F16_e64_vi: 2835 2836 case AMDGPU::V_SUBREV_U16_e32: 2837 case AMDGPU::V_SUBREV_U16_e64: 2838 case AMDGPU::V_SUBREV_U16_e32_vi: 2839 case AMDGPU::V_SUBREV_U16_e64_vi: 2840 2841 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2842 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 2843 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2844 2845 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2846 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2847 2848 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 2849 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 2850 2851 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 2852 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 2853 2854 case AMDGPU::V_LSHRREV_B32_e32: 2855 case AMDGPU::V_LSHRREV_B32_e64: 2856 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 2857 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 2858 case AMDGPU::V_LSHRREV_B32_e32_vi: 2859 case AMDGPU::V_LSHRREV_B32_e64_vi: 2860 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 2861 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 2862 2863 case AMDGPU::V_ASHRREV_I32_e32: 2864 case AMDGPU::V_ASHRREV_I32_e64: 2865 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 2866 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 2867 case AMDGPU::V_ASHRREV_I32_e32_vi: 2868 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 2869 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 2870 case AMDGPU::V_ASHRREV_I32_e64_vi: 2871 2872 case AMDGPU::V_LSHLREV_B32_e32: 2873 case AMDGPU::V_LSHLREV_B32_e64: 2874 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 2875 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 2876 case AMDGPU::V_LSHLREV_B32_e32_vi: 2877 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 2878 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 2879 case AMDGPU::V_LSHLREV_B32_e64_vi: 2880 2881 case AMDGPU::V_LSHLREV_B16_e32: 2882 case AMDGPU::V_LSHLREV_B16_e64: 2883 case AMDGPU::V_LSHLREV_B16_e32_vi: 2884 case AMDGPU::V_LSHLREV_B16_e64_vi: 2885 case AMDGPU::V_LSHLREV_B16_gfx10: 2886 2887 case AMDGPU::V_LSHRREV_B16_e32: 2888 case AMDGPU::V_LSHRREV_B16_e64: 2889 case AMDGPU::V_LSHRREV_B16_e32_vi: 2890 case AMDGPU::V_LSHRREV_B16_e64_vi: 2891 case AMDGPU::V_LSHRREV_B16_gfx10: 2892 2893 case AMDGPU::V_ASHRREV_I16_e32: 2894 case AMDGPU::V_ASHRREV_I16_e64: 2895 case AMDGPU::V_ASHRREV_I16_e32_vi: 2896 case AMDGPU::V_ASHRREV_I16_e64_vi: 2897 case AMDGPU::V_ASHRREV_I16_gfx10: 2898 2899 case AMDGPU::V_LSHLREV_B64: 2900 case AMDGPU::V_LSHLREV_B64_gfx10: 2901 case AMDGPU::V_LSHLREV_B64_vi: 2902 2903 case AMDGPU::V_LSHRREV_B64: 2904 case AMDGPU::V_LSHRREV_B64_gfx10: 2905 case AMDGPU::V_LSHRREV_B64_vi: 2906 2907 case AMDGPU::V_ASHRREV_I64: 2908 case AMDGPU::V_ASHRREV_I64_gfx10: 2909 case AMDGPU::V_ASHRREV_I64_vi: 2910 2911 case AMDGPU::V_PK_LSHLREV_B16: 2912 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 2913 case AMDGPU::V_PK_LSHLREV_B16_vi: 2914 2915 case AMDGPU::V_PK_LSHRREV_B16: 2916 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 2917 case AMDGPU::V_PK_LSHRREV_B16_vi: 2918 case AMDGPU::V_PK_ASHRREV_I16: 2919 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 2920 case AMDGPU::V_PK_ASHRREV_I16_vi: 2921 return true; 2922 default: 2923 return false; 2924 } 2925 } 2926 2927 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2928 2929 using namespace SIInstrFlags; 2930 const unsigned Opcode = Inst.getOpcode(); 2931 const MCInstrDesc &Desc = MII.get(Opcode); 2932 2933 // lds_direct register is defined so that it can be used 2934 // with 9-bit operands only. Ignore encodings which do not accept these. 2935 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2936 return true; 2937 2938 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2939 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2940 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2941 2942 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2943 2944 // lds_direct cannot be specified as either src1 or src2. 2945 for (int SrcIdx : SrcIndices) { 2946 if (SrcIdx == -1) break; 2947 const MCOperand &Src = Inst.getOperand(SrcIdx); 2948 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2949 return false; 2950 } 2951 } 2952 2953 if (Src0Idx == -1) 2954 return true; 2955 2956 const MCOperand &Src = Inst.getOperand(Src0Idx); 2957 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2958 return true; 2959 2960 // lds_direct is specified as src0. Check additional limitations. 2961 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2962 } 2963 2964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2965 unsigned Opcode = Inst.getOpcode(); 2966 const MCInstrDesc &Desc = MII.get(Opcode); 2967 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2968 return true; 2969 2970 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2971 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2972 2973 const int OpIndices[] = { Src0Idx, Src1Idx }; 2974 2975 unsigned NumLiterals = 0; 2976 uint32_t LiteralValue; 2977 2978 for (int OpIdx : OpIndices) { 2979 if (OpIdx == -1) break; 2980 2981 const MCOperand &MO = Inst.getOperand(OpIdx); 2982 if (MO.isImm() && 2983 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2984 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2985 !isInlineConstant(Inst, OpIdx)) { 2986 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2987 if (NumLiterals == 0 || LiteralValue != Value) { 2988 LiteralValue = Value; 2989 ++NumLiterals; 2990 } 2991 } 2992 } 2993 2994 return NumLiterals <= 1; 2995 } 2996 2997 // VOP3 literal is only allowed in GFX10+ and only one can be used 2998 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 2999 unsigned Opcode = Inst.getOpcode(); 3000 const MCInstrDesc &Desc = MII.get(Opcode); 3001 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3002 return true; 3003 3004 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3005 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3006 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3007 3008 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3009 3010 unsigned NumLiterals = 0; 3011 uint32_t LiteralValue; 3012 3013 for (int OpIdx : OpIndices) { 3014 if (OpIdx == -1) break; 3015 3016 const MCOperand &MO = Inst.getOperand(OpIdx); 3017 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3018 continue; 3019 3020 if (!isInlineConstant(Inst, OpIdx)) { 3021 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3022 if (NumLiterals == 0 || LiteralValue != Value) { 3023 LiteralValue = Value; 3024 ++NumLiterals; 3025 } 3026 } 3027 } 3028 3029 return !NumLiterals || 3030 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3031 } 3032 3033 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3034 const SMLoc &IDLoc) { 3035 if (!validateLdsDirect(Inst)) { 3036 Error(IDLoc, 3037 "invalid use of lds_direct"); 3038 return false; 3039 } 3040 if (!validateSOPLiteral(Inst)) { 3041 Error(IDLoc, 3042 "only one literal operand is allowed"); 3043 return false; 3044 } 3045 if (!validateVOP3Literal(Inst)) { 3046 Error(IDLoc, 3047 "invalid literal operand"); 3048 return false; 3049 } 3050 if (!validateConstantBusLimitations(Inst)) { 3051 Error(IDLoc, 3052 "invalid operand (violates constant bus restrictions)"); 3053 return false; 3054 } 3055 if (!validateEarlyClobberLimitations(Inst)) { 3056 Error(IDLoc, 3057 "destination must be different than all sources"); 3058 return false; 3059 } 3060 if (!validateIntClampSupported(Inst)) { 3061 Error(IDLoc, 3062 "integer clamping is not supported on this GPU"); 3063 return false; 3064 } 3065 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3066 if (!validateMIMGD16(Inst)) { 3067 Error(IDLoc, 3068 "d16 modifier is not supported on this GPU"); 3069 return false; 3070 } 3071 if (!validateMIMGDim(Inst)) { 3072 Error(IDLoc, "dim modifier is required on this GPU"); 3073 return false; 3074 } 3075 if (!validateMIMGDataSize(Inst)) { 3076 Error(IDLoc, 3077 "image data size does not match dmask and tfe"); 3078 return false; 3079 } 3080 if (!validateMIMGAddrSize(Inst)) { 3081 Error(IDLoc, 3082 "image address size does not match dim and a16"); 3083 return false; 3084 } 3085 if (!validateMIMGAtomicDMask(Inst)) { 3086 Error(IDLoc, 3087 "invalid atomic image dmask"); 3088 return false; 3089 } 3090 if (!validateMIMGGatherDMask(Inst)) { 3091 Error(IDLoc, 3092 "invalid image_gather dmask: only one bit must be set"); 3093 return false; 3094 } 3095 3096 return true; 3097 } 3098 3099 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3100 const FeatureBitset &FBS, 3101 unsigned VariantID = 0); 3102 3103 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3104 OperandVector &Operands, 3105 MCStreamer &Out, 3106 uint64_t &ErrorInfo, 3107 bool MatchingInlineAsm) { 3108 MCInst Inst; 3109 unsigned Result = Match_Success; 3110 for (auto Variant : getMatchedVariants()) { 3111 uint64_t EI; 3112 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3113 Variant); 3114 // We order match statuses from least to most specific. We use most specific 3115 // status as resulting 3116 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3117 if ((R == Match_Success) || 3118 (R == Match_PreferE32) || 3119 (R == Match_MissingFeature && Result != Match_PreferE32) || 3120 (R == Match_InvalidOperand && Result != Match_MissingFeature 3121 && Result != Match_PreferE32) || 3122 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3123 && Result != Match_MissingFeature 3124 && Result != Match_PreferE32)) { 3125 Result = R; 3126 ErrorInfo = EI; 3127 } 3128 if (R == Match_Success) 3129 break; 3130 } 3131 3132 switch (Result) { 3133 default: break; 3134 case Match_Success: 3135 if (!validateInstruction(Inst, IDLoc)) { 3136 return true; 3137 } 3138 Inst.setLoc(IDLoc); 3139 Out.EmitInstruction(Inst, getSTI()); 3140 return false; 3141 3142 case Match_MissingFeature: 3143 return Error(IDLoc, "instruction not supported on this GPU"); 3144 3145 case Match_MnemonicFail: { 3146 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3147 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3148 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3149 return Error(IDLoc, "invalid instruction" + Suggestion, 3150 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3151 } 3152 3153 case Match_InvalidOperand: { 3154 SMLoc ErrorLoc = IDLoc; 3155 if (ErrorInfo != ~0ULL) { 3156 if (ErrorInfo >= Operands.size()) { 3157 return Error(IDLoc, "too few operands for instruction"); 3158 } 3159 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3160 if (ErrorLoc == SMLoc()) 3161 ErrorLoc = IDLoc; 3162 } 3163 return Error(ErrorLoc, "invalid operand for instruction"); 3164 } 3165 3166 case Match_PreferE32: 3167 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3168 "should be encoded as e32"); 3169 } 3170 llvm_unreachable("Implement any new match types added!"); 3171 } 3172 3173 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3174 int64_t Tmp = -1; 3175 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3176 return true; 3177 } 3178 if (getParser().parseAbsoluteExpression(Tmp)) { 3179 return true; 3180 } 3181 Ret = static_cast<uint32_t>(Tmp); 3182 return false; 3183 } 3184 3185 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3186 uint32_t &Minor) { 3187 if (ParseAsAbsoluteExpression(Major)) 3188 return TokError("invalid major version"); 3189 3190 if (getLexer().isNot(AsmToken::Comma)) 3191 return TokError("minor version number required, comma expected"); 3192 Lex(); 3193 3194 if (ParseAsAbsoluteExpression(Minor)) 3195 return TokError("invalid minor version"); 3196 3197 return false; 3198 } 3199 3200 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3201 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3202 return TokError("directive only supported for amdgcn architecture"); 3203 3204 std::string Target; 3205 3206 SMLoc TargetStart = getTok().getLoc(); 3207 if (getParser().parseEscapedString(Target)) 3208 return true; 3209 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3210 3211 std::string ExpectedTarget; 3212 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3213 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3214 3215 if (Target != ExpectedTargetOS.str()) 3216 return getParser().Error(TargetRange.Start, "target must match options", 3217 TargetRange); 3218 3219 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3220 return false; 3221 } 3222 3223 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3224 return getParser().Error(Range.Start, "value out of range", Range); 3225 } 3226 3227 bool AMDGPUAsmParser::calculateGPRBlocks( 3228 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3229 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 3230 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 3231 unsigned &SGPRBlocks) { 3232 // TODO(scott.linder): These calculations are duplicated from 3233 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3234 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3235 3236 unsigned NumVGPRs = NextFreeVGPR; 3237 unsigned NumSGPRs = NextFreeSGPR; 3238 3239 if (Version.Major >= 10) 3240 NumSGPRs = 0; 3241 else { 3242 unsigned MaxAddressableNumSGPRs = 3243 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3244 3245 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3246 NumSGPRs > MaxAddressableNumSGPRs) 3247 return OutOfRangeError(SGPRRange); 3248 3249 NumSGPRs += 3250 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3251 3252 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3253 NumSGPRs > MaxAddressableNumSGPRs) 3254 return OutOfRangeError(SGPRRange); 3255 3256 if (Features.test(FeatureSGPRInitBug)) 3257 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3258 } 3259 3260 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 3261 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3262 3263 return false; 3264 } 3265 3266 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3267 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3268 return TokError("directive only supported for amdgcn architecture"); 3269 3270 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3271 return TokError("directive only supported for amdhsa OS"); 3272 3273 StringRef KernelName; 3274 if (getParser().parseIdentifier(KernelName)) 3275 return true; 3276 3277 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3278 3279 StringSet<> Seen; 3280 3281 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3282 3283 SMRange VGPRRange; 3284 uint64_t NextFreeVGPR = 0; 3285 SMRange SGPRRange; 3286 uint64_t NextFreeSGPR = 0; 3287 unsigned UserSGPRCount = 0; 3288 bool ReserveVCC = true; 3289 bool ReserveFlatScr = true; 3290 bool ReserveXNACK = hasXNACK(); 3291 3292 while (true) { 3293 while (getLexer().is(AsmToken::EndOfStatement)) 3294 Lex(); 3295 3296 if (getLexer().isNot(AsmToken::Identifier)) 3297 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3298 3299 StringRef ID = getTok().getIdentifier(); 3300 SMRange IDRange = getTok().getLocRange(); 3301 Lex(); 3302 3303 if (ID == ".end_amdhsa_kernel") 3304 break; 3305 3306 if (Seen.find(ID) != Seen.end()) 3307 return TokError(".amdhsa_ directives cannot be repeated"); 3308 Seen.insert(ID); 3309 3310 SMLoc ValStart = getTok().getLoc(); 3311 int64_t IVal; 3312 if (getParser().parseAbsoluteExpression(IVal)) 3313 return true; 3314 SMLoc ValEnd = getTok().getLoc(); 3315 SMRange ValRange = SMRange(ValStart, ValEnd); 3316 3317 if (IVal < 0) 3318 return OutOfRangeError(ValRange); 3319 3320 uint64_t Val = IVal; 3321 3322 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3323 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3324 return OutOfRangeError(RANGE); \ 3325 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3326 3327 if (ID == ".amdhsa_group_segment_fixed_size") { 3328 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3329 return OutOfRangeError(ValRange); 3330 KD.group_segment_fixed_size = Val; 3331 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3332 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3333 return OutOfRangeError(ValRange); 3334 KD.private_segment_fixed_size = Val; 3335 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3336 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3337 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3338 Val, ValRange); 3339 UserSGPRCount += 4; 3340 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3341 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3342 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3343 ValRange); 3344 UserSGPRCount += 2; 3345 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3346 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3347 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3348 ValRange); 3349 UserSGPRCount += 2; 3350 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3351 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3352 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3353 Val, ValRange); 3354 UserSGPRCount += 2; 3355 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3356 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3357 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3358 ValRange); 3359 UserSGPRCount += 2; 3360 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3361 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3362 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3363 ValRange); 3364 UserSGPRCount += 2; 3365 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3366 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3367 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3368 Val, ValRange); 3369 UserSGPRCount += 1; 3370 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3371 PARSE_BITS_ENTRY( 3372 KD.compute_pgm_rsrc2, 3373 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3374 ValRange); 3375 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3376 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3377 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3378 ValRange); 3379 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3380 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3381 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3382 ValRange); 3383 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3384 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3385 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3386 ValRange); 3387 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3388 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3389 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3390 ValRange); 3391 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3392 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3393 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3394 ValRange); 3395 } else if (ID == ".amdhsa_next_free_vgpr") { 3396 VGPRRange = ValRange; 3397 NextFreeVGPR = Val; 3398 } else if (ID == ".amdhsa_next_free_sgpr") { 3399 SGPRRange = ValRange; 3400 NextFreeSGPR = Val; 3401 } else if (ID == ".amdhsa_reserve_vcc") { 3402 if (!isUInt<1>(Val)) 3403 return OutOfRangeError(ValRange); 3404 ReserveVCC = Val; 3405 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3406 if (IVersion.Major < 7) 3407 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3408 IDRange); 3409 if (!isUInt<1>(Val)) 3410 return OutOfRangeError(ValRange); 3411 ReserveFlatScr = Val; 3412 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3413 if (IVersion.Major < 8) 3414 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3415 IDRange); 3416 if (!isUInt<1>(Val)) 3417 return OutOfRangeError(ValRange); 3418 ReserveXNACK = Val; 3419 } else if (ID == ".amdhsa_float_round_mode_32") { 3420 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3421 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3422 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3423 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3424 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3425 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3426 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3427 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3428 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3429 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3430 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3431 ValRange); 3432 } else if (ID == ".amdhsa_dx10_clamp") { 3433 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3434 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3435 } else if (ID == ".amdhsa_ieee_mode") { 3436 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3437 Val, ValRange); 3438 } else if (ID == ".amdhsa_fp16_overflow") { 3439 if (IVersion.Major < 9) 3440 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3441 IDRange); 3442 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3443 ValRange); 3444 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3445 if (IVersion.Major < 10) 3446 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3447 IDRange); 3448 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3449 ValRange); 3450 } else if (ID == ".amdhsa_memory_ordered") { 3451 if (IVersion.Major < 10) 3452 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3453 IDRange); 3454 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3455 ValRange); 3456 } else if (ID == ".amdhsa_forward_progress") { 3457 if (IVersion.Major < 10) 3458 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3459 IDRange); 3460 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3461 ValRange); 3462 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3463 PARSE_BITS_ENTRY( 3464 KD.compute_pgm_rsrc2, 3465 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3466 ValRange); 3467 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3468 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3469 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3470 Val, ValRange); 3471 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3472 PARSE_BITS_ENTRY( 3473 KD.compute_pgm_rsrc2, 3474 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3475 ValRange); 3476 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3477 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3478 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3479 Val, ValRange); 3480 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3481 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3482 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3483 Val, ValRange); 3484 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3485 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3486 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3487 Val, ValRange); 3488 } else if (ID == ".amdhsa_exception_int_div_zero") { 3489 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3490 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3491 Val, ValRange); 3492 } else { 3493 return getParser().Error(IDRange.Start, 3494 "unknown .amdhsa_kernel directive", IDRange); 3495 } 3496 3497 #undef PARSE_BITS_ENTRY 3498 } 3499 3500 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3501 return TokError(".amdhsa_next_free_vgpr directive is required"); 3502 3503 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3504 return TokError(".amdhsa_next_free_sgpr directive is required"); 3505 3506 unsigned VGPRBlocks; 3507 unsigned SGPRBlocks; 3508 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3509 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3510 SGPRRange, VGPRBlocks, SGPRBlocks)) 3511 return true; 3512 3513 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3514 VGPRBlocks)) 3515 return OutOfRangeError(VGPRRange); 3516 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3517 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3518 3519 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3520 SGPRBlocks)) 3521 return OutOfRangeError(SGPRRange); 3522 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3523 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3524 SGPRBlocks); 3525 3526 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3527 return TokError("too many user SGPRs enabled"); 3528 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3529 UserSGPRCount); 3530 3531 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3532 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3533 ReserveFlatScr, ReserveXNACK); 3534 return false; 3535 } 3536 3537 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3538 uint32_t Major; 3539 uint32_t Minor; 3540 3541 if (ParseDirectiveMajorMinor(Major, Minor)) 3542 return true; 3543 3544 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3545 return false; 3546 } 3547 3548 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3549 uint32_t Major; 3550 uint32_t Minor; 3551 uint32_t Stepping; 3552 StringRef VendorName; 3553 StringRef ArchName; 3554 3555 // If this directive has no arguments, then use the ISA version for the 3556 // targeted GPU. 3557 if (getLexer().is(AsmToken::EndOfStatement)) { 3558 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3559 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3560 ISA.Stepping, 3561 "AMD", "AMDGPU"); 3562 return false; 3563 } 3564 3565 if (ParseDirectiveMajorMinor(Major, Minor)) 3566 return true; 3567 3568 if (getLexer().isNot(AsmToken::Comma)) 3569 return TokError("stepping version number required, comma expected"); 3570 Lex(); 3571 3572 if (ParseAsAbsoluteExpression(Stepping)) 3573 return TokError("invalid stepping version"); 3574 3575 if (getLexer().isNot(AsmToken::Comma)) 3576 return TokError("vendor name required, comma expected"); 3577 Lex(); 3578 3579 if (getLexer().isNot(AsmToken::String)) 3580 return TokError("invalid vendor name"); 3581 3582 VendorName = getLexer().getTok().getStringContents(); 3583 Lex(); 3584 3585 if (getLexer().isNot(AsmToken::Comma)) 3586 return TokError("arch name required, comma expected"); 3587 Lex(); 3588 3589 if (getLexer().isNot(AsmToken::String)) 3590 return TokError("invalid arch name"); 3591 3592 ArchName = getLexer().getTok().getStringContents(); 3593 Lex(); 3594 3595 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3596 VendorName, ArchName); 3597 return false; 3598 } 3599 3600 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3601 amd_kernel_code_t &Header) { 3602 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3603 // assembly for backwards compatibility. 3604 if (ID == "max_scratch_backing_memory_byte_size") { 3605 Parser.eatToEndOfStatement(); 3606 return false; 3607 } 3608 3609 SmallString<40> ErrStr; 3610 raw_svector_ostream Err(ErrStr); 3611 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3612 return TokError(Err.str()); 3613 } 3614 Lex(); 3615 3616 if (ID == "enable_wgp_mode") { 3617 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3618 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3619 } 3620 3621 if (ID == "enable_mem_ordered") { 3622 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3623 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3624 } 3625 3626 if (ID == "enable_fwd_progress") { 3627 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3628 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3629 } 3630 3631 return false; 3632 } 3633 3634 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3635 amd_kernel_code_t Header; 3636 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3637 3638 while (true) { 3639 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3640 // will set the current token to EndOfStatement. 3641 while(getLexer().is(AsmToken::EndOfStatement)) 3642 Lex(); 3643 3644 if (getLexer().isNot(AsmToken::Identifier)) 3645 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3646 3647 StringRef ID = getLexer().getTok().getIdentifier(); 3648 Lex(); 3649 3650 if (ID == ".end_amd_kernel_code_t") 3651 break; 3652 3653 if (ParseAMDKernelCodeTValue(ID, Header)) 3654 return true; 3655 } 3656 3657 getTargetStreamer().EmitAMDKernelCodeT(Header); 3658 3659 return false; 3660 } 3661 3662 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3663 if (getLexer().isNot(AsmToken::Identifier)) 3664 return TokError("expected symbol name"); 3665 3666 StringRef KernelName = Parser.getTok().getString(); 3667 3668 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3669 ELF::STT_AMDGPU_HSA_KERNEL); 3670 Lex(); 3671 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3672 KernelScope.initialize(getContext()); 3673 return false; 3674 } 3675 3676 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3677 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3678 return Error(getParser().getTok().getLoc(), 3679 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3680 "architectures"); 3681 } 3682 3683 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3684 3685 std::string ISAVersionStringFromSTI; 3686 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3687 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3688 3689 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3690 return Error(getParser().getTok().getLoc(), 3691 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3692 "arguments specified through the command line"); 3693 } 3694 3695 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3696 Lex(); 3697 3698 return false; 3699 } 3700 3701 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3702 const char *AssemblerDirectiveBegin; 3703 const char *AssemblerDirectiveEnd; 3704 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3705 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3706 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3707 HSAMD::V3::AssemblerDirectiveEnd) 3708 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3709 HSAMD::AssemblerDirectiveEnd); 3710 3711 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3712 return Error(getParser().getTok().getLoc(), 3713 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3714 "not available on non-amdhsa OSes")).str()); 3715 } 3716 3717 std::string HSAMetadataString; 3718 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3719 HSAMetadataString)) 3720 return true; 3721 3722 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3723 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3724 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3725 } else { 3726 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3727 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3728 } 3729 3730 return false; 3731 } 3732 3733 /// Common code to parse out a block of text (typically YAML) between start and 3734 /// end directives. 3735 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3736 const char *AssemblerDirectiveEnd, 3737 std::string &CollectString) { 3738 3739 raw_string_ostream CollectStream(CollectString); 3740 3741 getLexer().setSkipSpace(false); 3742 3743 bool FoundEnd = false; 3744 while (!getLexer().is(AsmToken::Eof)) { 3745 while (getLexer().is(AsmToken::Space)) { 3746 CollectStream << getLexer().getTok().getString(); 3747 Lex(); 3748 } 3749 3750 if (getLexer().is(AsmToken::Identifier)) { 3751 StringRef ID = getLexer().getTok().getIdentifier(); 3752 if (ID == AssemblerDirectiveEnd) { 3753 Lex(); 3754 FoundEnd = true; 3755 break; 3756 } 3757 } 3758 3759 CollectStream << Parser.parseStringToEndOfStatement() 3760 << getContext().getAsmInfo()->getSeparatorString(); 3761 3762 Parser.eatToEndOfStatement(); 3763 } 3764 3765 getLexer().setSkipSpace(true); 3766 3767 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3768 return TokError(Twine("expected directive ") + 3769 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3770 } 3771 3772 CollectStream.flush(); 3773 return false; 3774 } 3775 3776 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3777 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3778 std::string String; 3779 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3780 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3781 return true; 3782 3783 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3784 if (!PALMetadata->setFromString(String)) 3785 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3786 return false; 3787 } 3788 3789 /// Parse the assembler directive for old linear-format PAL metadata. 3790 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3791 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3792 return Error(getParser().getTok().getLoc(), 3793 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3794 "not available on non-amdpal OSes")).str()); 3795 } 3796 3797 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3798 PALMetadata->setLegacy(); 3799 for (;;) { 3800 uint32_t Key, Value; 3801 if (ParseAsAbsoluteExpression(Key)) { 3802 return TokError(Twine("invalid value in ") + 3803 Twine(PALMD::AssemblerDirective)); 3804 } 3805 if (getLexer().isNot(AsmToken::Comma)) { 3806 return TokError(Twine("expected an even number of values in ") + 3807 Twine(PALMD::AssemblerDirective)); 3808 } 3809 Lex(); 3810 if (ParseAsAbsoluteExpression(Value)) { 3811 return TokError(Twine("invalid value in ") + 3812 Twine(PALMD::AssemblerDirective)); 3813 } 3814 PALMetadata->setRegister(Key, Value); 3815 if (getLexer().isNot(AsmToken::Comma)) 3816 break; 3817 Lex(); 3818 } 3819 return false; 3820 } 3821 3822 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3823 StringRef IDVal = DirectiveID.getString(); 3824 3825 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3826 if (IDVal == ".amdgcn_target") 3827 return ParseDirectiveAMDGCNTarget(); 3828 3829 if (IDVal == ".amdhsa_kernel") 3830 return ParseDirectiveAMDHSAKernel(); 3831 3832 // TODO: Restructure/combine with PAL metadata directive. 3833 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3834 return ParseDirectiveHSAMetadata(); 3835 } else { 3836 if (IDVal == ".hsa_code_object_version") 3837 return ParseDirectiveHSACodeObjectVersion(); 3838 3839 if (IDVal == ".hsa_code_object_isa") 3840 return ParseDirectiveHSACodeObjectISA(); 3841 3842 if (IDVal == ".amd_kernel_code_t") 3843 return ParseDirectiveAMDKernelCodeT(); 3844 3845 if (IDVal == ".amdgpu_hsa_kernel") 3846 return ParseDirectiveAMDGPUHsaKernel(); 3847 3848 if (IDVal == ".amd_amdgpu_isa") 3849 return ParseDirectiveISAVersion(); 3850 3851 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3852 return ParseDirectiveHSAMetadata(); 3853 } 3854 3855 if (IDVal == PALMD::AssemblerDirectiveBegin) 3856 return ParseDirectivePALMetadataBegin(); 3857 3858 if (IDVal == PALMD::AssemblerDirective) 3859 return ParseDirectivePALMetadata(); 3860 3861 return true; 3862 } 3863 3864 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3865 unsigned RegNo) const { 3866 3867 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3868 R.isValid(); ++R) { 3869 if (*R == RegNo) 3870 return isGFX9() || isGFX10(); 3871 } 3872 3873 // GFX10 has 2 more SGPRs 104 and 105. 3874 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 3875 R.isValid(); ++R) { 3876 if (*R == RegNo) 3877 return hasSGPR104_SGPR105(); 3878 } 3879 3880 switch (RegNo) { 3881 case AMDGPU::TBA: 3882 case AMDGPU::TBA_LO: 3883 case AMDGPU::TBA_HI: 3884 case AMDGPU::TMA: 3885 case AMDGPU::TMA_LO: 3886 case AMDGPU::TMA_HI: 3887 return !isGFX9() && !isGFX10(); 3888 case AMDGPU::XNACK_MASK: 3889 case AMDGPU::XNACK_MASK_LO: 3890 case AMDGPU::XNACK_MASK_HI: 3891 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 3892 case AMDGPU::SGPR_NULL: 3893 return isGFX10(); 3894 default: 3895 break; 3896 } 3897 3898 if (isInlineValue(RegNo)) 3899 return !isCI() && !isSI() && !isVI(); 3900 3901 if (isCI()) 3902 return true; 3903 3904 if (isSI() || isGFX10()) { 3905 // No flat_scr on SI. 3906 // On GFX10 flat scratch is not a valid register operand and can only be 3907 // accessed with s_setreg/s_getreg. 3908 switch (RegNo) { 3909 case AMDGPU::FLAT_SCR: 3910 case AMDGPU::FLAT_SCR_LO: 3911 case AMDGPU::FLAT_SCR_HI: 3912 return false; 3913 default: 3914 return true; 3915 } 3916 } 3917 3918 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3919 // SI/CI have. 3920 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3921 R.isValid(); ++R) { 3922 if (*R == RegNo) 3923 return hasSGPR102_SGPR103(); 3924 } 3925 3926 return true; 3927 } 3928 3929 OperandMatchResultTy 3930 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 3931 OperandMode Mode) { 3932 // Try to parse with a custom parser 3933 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3934 3935 // If we successfully parsed the operand or if there as an error parsing, 3936 // we are done. 3937 // 3938 // If we are parsing after we reach EndOfStatement then this means we 3939 // are appending default values to the Operands list. This is only done 3940 // by custom parser, so we shouldn't continue on to the generic parsing. 3941 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3942 getLexer().is(AsmToken::EndOfStatement)) 3943 return ResTy; 3944 3945 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 3946 unsigned Prefix = Operands.size(); 3947 SMLoc LBraceLoc = getTok().getLoc(); 3948 Parser.Lex(); // eat the '[' 3949 3950 for (;;) { 3951 ResTy = parseReg(Operands); 3952 if (ResTy != MatchOperand_Success) 3953 return ResTy; 3954 3955 if (getLexer().is(AsmToken::RBrac)) 3956 break; 3957 3958 if (getLexer().isNot(AsmToken::Comma)) 3959 return MatchOperand_ParseFail; 3960 Parser.Lex(); 3961 } 3962 3963 if (Operands.size() - Prefix > 1) { 3964 Operands.insert(Operands.begin() + Prefix, 3965 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 3966 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 3967 getTok().getLoc())); 3968 } 3969 3970 Parser.Lex(); // eat the ']' 3971 return MatchOperand_Success; 3972 } 3973 3974 return parseRegOrImm(Operands); 3975 } 3976 3977 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3978 // Clear any forced encodings from the previous instruction. 3979 setForcedEncodingSize(0); 3980 setForcedDPP(false); 3981 setForcedSDWA(false); 3982 3983 if (Name.endswith("_e64")) { 3984 setForcedEncodingSize(64); 3985 return Name.substr(0, Name.size() - 4); 3986 } else if (Name.endswith("_e32")) { 3987 setForcedEncodingSize(32); 3988 return Name.substr(0, Name.size() - 4); 3989 } else if (Name.endswith("_dpp")) { 3990 setForcedDPP(true); 3991 return Name.substr(0, Name.size() - 4); 3992 } else if (Name.endswith("_sdwa")) { 3993 setForcedSDWA(true); 3994 return Name.substr(0, Name.size() - 5); 3995 } 3996 return Name; 3997 } 3998 3999 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4000 StringRef Name, 4001 SMLoc NameLoc, OperandVector &Operands) { 4002 // Add the instruction mnemonic 4003 Name = parseMnemonicSuffix(Name); 4004 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4005 4006 bool IsMIMG = Name.startswith("image_"); 4007 4008 while (!getLexer().is(AsmToken::EndOfStatement)) { 4009 OperandMode Mode = OperandMode_Default; 4010 if (IsMIMG && isGFX10() && Operands.size() == 2) 4011 Mode = OperandMode_NSA; 4012 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4013 4014 // Eat the comma or space if there is one. 4015 if (getLexer().is(AsmToken::Comma)) 4016 Parser.Lex(); 4017 4018 switch (Res) { 4019 case MatchOperand_Success: break; 4020 case MatchOperand_ParseFail: 4021 // FIXME: use real operand location rather than the current location. 4022 Error(getLexer().getLoc(), "failed parsing operand."); 4023 while (!getLexer().is(AsmToken::EndOfStatement)) { 4024 Parser.Lex(); 4025 } 4026 return true; 4027 case MatchOperand_NoMatch: 4028 // FIXME: use real operand location rather than the current location. 4029 Error(getLexer().getLoc(), "not a valid operand."); 4030 while (!getLexer().is(AsmToken::EndOfStatement)) { 4031 Parser.Lex(); 4032 } 4033 return true; 4034 } 4035 } 4036 4037 return false; 4038 } 4039 4040 //===----------------------------------------------------------------------===// 4041 // Utility functions 4042 //===----------------------------------------------------------------------===// 4043 4044 OperandMatchResultTy 4045 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4046 4047 if (!trySkipId(Prefix, AsmToken::Colon)) 4048 return MatchOperand_NoMatch; 4049 4050 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4051 } 4052 4053 OperandMatchResultTy 4054 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4055 AMDGPUOperand::ImmTy ImmTy, 4056 bool (*ConvertResult)(int64_t&)) { 4057 SMLoc S = getLoc(); 4058 int64_t Value = 0; 4059 4060 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4061 if (Res != MatchOperand_Success) 4062 return Res; 4063 4064 if (ConvertResult && !ConvertResult(Value)) { 4065 Error(S, "invalid " + StringRef(Prefix) + " value."); 4066 } 4067 4068 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4069 return MatchOperand_Success; 4070 } 4071 4072 OperandMatchResultTy 4073 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4074 OperandVector &Operands, 4075 AMDGPUOperand::ImmTy ImmTy, 4076 bool (*ConvertResult)(int64_t&)) { 4077 SMLoc S = getLoc(); 4078 if (!trySkipId(Prefix, AsmToken::Colon)) 4079 return MatchOperand_NoMatch; 4080 4081 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4082 return MatchOperand_ParseFail; 4083 4084 unsigned Val = 0; 4085 const unsigned MaxSize = 4; 4086 4087 // FIXME: How to verify the number of elements matches the number of src 4088 // operands? 4089 for (int I = 0; ; ++I) { 4090 int64_t Op; 4091 SMLoc Loc = getLoc(); 4092 if (!parseExpr(Op)) 4093 return MatchOperand_ParseFail; 4094 4095 if (Op != 0 && Op != 1) { 4096 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4097 return MatchOperand_ParseFail; 4098 } 4099 4100 Val |= (Op << I); 4101 4102 if (trySkipToken(AsmToken::RBrac)) 4103 break; 4104 4105 if (I + 1 == MaxSize) { 4106 Error(getLoc(), "expected a closing square bracket"); 4107 return MatchOperand_ParseFail; 4108 } 4109 4110 if (!skipToken(AsmToken::Comma, "expected a comma")) 4111 return MatchOperand_ParseFail; 4112 } 4113 4114 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4115 return MatchOperand_Success; 4116 } 4117 4118 OperandMatchResultTy 4119 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4120 AMDGPUOperand::ImmTy ImmTy) { 4121 int64_t Bit = 0; 4122 SMLoc S = Parser.getTok().getLoc(); 4123 4124 // We are at the end of the statement, and this is a default argument, so 4125 // use a default value. 4126 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4127 switch(getLexer().getKind()) { 4128 case AsmToken::Identifier: { 4129 StringRef Tok = Parser.getTok().getString(); 4130 if (Tok == Name) { 4131 if (Tok == "r128" && isGFX9()) 4132 Error(S, "r128 modifier is not supported on this GPU"); 4133 if (Tok == "a16" && !isGFX9()) 4134 Error(S, "a16 modifier is not supported on this GPU"); 4135 Bit = 1; 4136 Parser.Lex(); 4137 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4138 Bit = 0; 4139 Parser.Lex(); 4140 } else { 4141 return MatchOperand_NoMatch; 4142 } 4143 break; 4144 } 4145 default: 4146 return MatchOperand_NoMatch; 4147 } 4148 } 4149 4150 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4151 return MatchOperand_ParseFail; 4152 4153 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4154 return MatchOperand_Success; 4155 } 4156 4157 static void addOptionalImmOperand( 4158 MCInst& Inst, const OperandVector& Operands, 4159 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4160 AMDGPUOperand::ImmTy ImmT, 4161 int64_t Default = 0) { 4162 auto i = OptionalIdx.find(ImmT); 4163 if (i != OptionalIdx.end()) { 4164 unsigned Idx = i->second; 4165 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4166 } else { 4167 Inst.addOperand(MCOperand::createImm(Default)); 4168 } 4169 } 4170 4171 OperandMatchResultTy 4172 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4173 if (getLexer().isNot(AsmToken::Identifier)) { 4174 return MatchOperand_NoMatch; 4175 } 4176 StringRef Tok = Parser.getTok().getString(); 4177 if (Tok != Prefix) { 4178 return MatchOperand_NoMatch; 4179 } 4180 4181 Parser.Lex(); 4182 if (getLexer().isNot(AsmToken::Colon)) { 4183 return MatchOperand_ParseFail; 4184 } 4185 4186 Parser.Lex(); 4187 if (getLexer().isNot(AsmToken::Identifier)) { 4188 return MatchOperand_ParseFail; 4189 } 4190 4191 Value = Parser.getTok().getString(); 4192 return MatchOperand_Success; 4193 } 4194 4195 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4196 // values to live in a joint format operand in the MCInst encoding. 4197 OperandMatchResultTy 4198 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4199 SMLoc S = Parser.getTok().getLoc(); 4200 int64_t Dfmt = 0, Nfmt = 0; 4201 // dfmt and nfmt can appear in either order, and each is optional. 4202 bool GotDfmt = false, GotNfmt = false; 4203 while (!GotDfmt || !GotNfmt) { 4204 if (!GotDfmt) { 4205 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4206 if (Res != MatchOperand_NoMatch) { 4207 if (Res != MatchOperand_Success) 4208 return Res; 4209 if (Dfmt >= 16) { 4210 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4211 return MatchOperand_ParseFail; 4212 } 4213 GotDfmt = true; 4214 Parser.Lex(); 4215 continue; 4216 } 4217 } 4218 if (!GotNfmt) { 4219 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4220 if (Res != MatchOperand_NoMatch) { 4221 if (Res != MatchOperand_Success) 4222 return Res; 4223 if (Nfmt >= 8) { 4224 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4225 return MatchOperand_ParseFail; 4226 } 4227 GotNfmt = true; 4228 Parser.Lex(); 4229 continue; 4230 } 4231 } 4232 break; 4233 } 4234 if (!GotDfmt && !GotNfmt) 4235 return MatchOperand_NoMatch; 4236 auto Format = Dfmt | Nfmt << 4; 4237 Operands.push_back( 4238 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4239 return MatchOperand_Success; 4240 } 4241 4242 //===----------------------------------------------------------------------===// 4243 // ds 4244 //===----------------------------------------------------------------------===// 4245 4246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4247 const OperandVector &Operands) { 4248 OptionalImmIndexMap OptionalIdx; 4249 4250 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4251 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4252 4253 // Add the register arguments 4254 if (Op.isReg()) { 4255 Op.addRegOperands(Inst, 1); 4256 continue; 4257 } 4258 4259 // Handle optional arguments 4260 OptionalIdx[Op.getImmTy()] = i; 4261 } 4262 4263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4266 4267 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4268 } 4269 4270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4271 bool IsGdsHardcoded) { 4272 OptionalImmIndexMap OptionalIdx; 4273 4274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4276 4277 // Add the register arguments 4278 if (Op.isReg()) { 4279 Op.addRegOperands(Inst, 1); 4280 continue; 4281 } 4282 4283 if (Op.isToken() && Op.getToken() == "gds") { 4284 IsGdsHardcoded = true; 4285 continue; 4286 } 4287 4288 // Handle optional arguments 4289 OptionalIdx[Op.getImmTy()] = i; 4290 } 4291 4292 AMDGPUOperand::ImmTy OffsetType = 4293 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4294 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4295 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4296 AMDGPUOperand::ImmTyOffset; 4297 4298 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4299 4300 if (!IsGdsHardcoded) { 4301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4302 } 4303 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4304 } 4305 4306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4307 OptionalImmIndexMap OptionalIdx; 4308 4309 unsigned OperandIdx[4]; 4310 unsigned EnMask = 0; 4311 int SrcIdx = 0; 4312 4313 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4314 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4315 4316 // Add the register arguments 4317 if (Op.isReg()) { 4318 assert(SrcIdx < 4); 4319 OperandIdx[SrcIdx] = Inst.size(); 4320 Op.addRegOperands(Inst, 1); 4321 ++SrcIdx; 4322 continue; 4323 } 4324 4325 if (Op.isOff()) { 4326 assert(SrcIdx < 4); 4327 OperandIdx[SrcIdx] = Inst.size(); 4328 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4329 ++SrcIdx; 4330 continue; 4331 } 4332 4333 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4334 Op.addImmOperands(Inst, 1); 4335 continue; 4336 } 4337 4338 if (Op.isToken() && Op.getToken() == "done") 4339 continue; 4340 4341 // Handle optional arguments 4342 OptionalIdx[Op.getImmTy()] = i; 4343 } 4344 4345 assert(SrcIdx == 4); 4346 4347 bool Compr = false; 4348 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4349 Compr = true; 4350 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4351 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4352 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4353 } 4354 4355 for (auto i = 0; i < SrcIdx; ++i) { 4356 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4357 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4358 } 4359 } 4360 4361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4363 4364 Inst.addOperand(MCOperand::createImm(EnMask)); 4365 } 4366 4367 //===----------------------------------------------------------------------===// 4368 // s_waitcnt 4369 //===----------------------------------------------------------------------===// 4370 4371 static bool 4372 encodeCnt( 4373 const AMDGPU::IsaVersion ISA, 4374 int64_t &IntVal, 4375 int64_t CntVal, 4376 bool Saturate, 4377 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4378 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4379 { 4380 bool Failed = false; 4381 4382 IntVal = encode(ISA, IntVal, CntVal); 4383 if (CntVal != decode(ISA, IntVal)) { 4384 if (Saturate) { 4385 IntVal = encode(ISA, IntVal, -1); 4386 } else { 4387 Failed = true; 4388 } 4389 } 4390 return Failed; 4391 } 4392 4393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4394 4395 SMLoc CntLoc = getLoc(); 4396 StringRef CntName = getTokenStr(); 4397 4398 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4399 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4400 return false; 4401 4402 int64_t CntVal; 4403 SMLoc ValLoc = getLoc(); 4404 if (!parseExpr(CntVal)) 4405 return false; 4406 4407 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4408 4409 bool Failed = true; 4410 bool Sat = CntName.endswith("_sat"); 4411 4412 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4413 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4414 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4415 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4416 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4417 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4418 } else { 4419 Error(CntLoc, "invalid counter name " + CntName); 4420 return false; 4421 } 4422 4423 if (Failed) { 4424 Error(ValLoc, "too large value for " + CntName); 4425 return false; 4426 } 4427 4428 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4429 return false; 4430 4431 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4432 if (isToken(AsmToken::EndOfStatement)) { 4433 Error(getLoc(), "expected a counter name"); 4434 return false; 4435 } 4436 } 4437 4438 return true; 4439 } 4440 4441 OperandMatchResultTy 4442 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4443 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4444 int64_t Waitcnt = getWaitcntBitMask(ISA); 4445 SMLoc S = getLoc(); 4446 4447 // If parse failed, do not return error code 4448 // to avoid excessive error messages. 4449 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4450 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4451 } else { 4452 parseExpr(Waitcnt); 4453 } 4454 4455 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4456 return MatchOperand_Success; 4457 } 4458 4459 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4460 int64_t &Width) { 4461 using namespace llvm::AMDGPU::Hwreg; 4462 4463 if (Parser.getTok().getString() != "hwreg") 4464 return true; 4465 Parser.Lex(); 4466 4467 if (getLexer().isNot(AsmToken::LParen)) 4468 return true; 4469 Parser.Lex(); 4470 4471 if (getLexer().is(AsmToken::Identifier)) { 4472 HwReg.IsSymbolic = true; 4473 HwReg.Id = ID_UNKNOWN_; 4474 const StringRef tok = Parser.getTok().getString(); 4475 int Last = ID_SYMBOLIC_LAST_; 4476 if (isSI() || isCI() || isVI()) 4477 Last = ID_SYMBOLIC_FIRST_GFX9_; 4478 else if (isGFX9()) 4479 Last = ID_SYMBOLIC_FIRST_GFX10_; 4480 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4481 if (tok == IdSymbolic[i]) { 4482 HwReg.Id = i; 4483 break; 4484 } 4485 } 4486 Parser.Lex(); 4487 } else { 4488 HwReg.IsSymbolic = false; 4489 if (getLexer().isNot(AsmToken::Integer)) 4490 return true; 4491 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4492 return true; 4493 } 4494 4495 if (getLexer().is(AsmToken::RParen)) { 4496 Parser.Lex(); 4497 return false; 4498 } 4499 4500 // optional params 4501 if (getLexer().isNot(AsmToken::Comma)) 4502 return true; 4503 Parser.Lex(); 4504 4505 if (getLexer().isNot(AsmToken::Integer)) 4506 return true; 4507 if (getParser().parseAbsoluteExpression(Offset)) 4508 return true; 4509 4510 if (getLexer().isNot(AsmToken::Comma)) 4511 return true; 4512 Parser.Lex(); 4513 4514 if (getLexer().isNot(AsmToken::Integer)) 4515 return true; 4516 if (getParser().parseAbsoluteExpression(Width)) 4517 return true; 4518 4519 if (getLexer().isNot(AsmToken::RParen)) 4520 return true; 4521 Parser.Lex(); 4522 4523 return false; 4524 } 4525 4526 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4527 using namespace llvm::AMDGPU::Hwreg; 4528 4529 int64_t Imm16Val = 0; 4530 SMLoc S = Parser.getTok().getLoc(); 4531 4532 switch(getLexer().getKind()) { 4533 default: return MatchOperand_NoMatch; 4534 case AsmToken::Integer: 4535 // The operand can be an integer value. 4536 if (getParser().parseAbsoluteExpression(Imm16Val)) 4537 return MatchOperand_NoMatch; 4538 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4539 Error(S, "invalid immediate: only 16-bit values are legal"); 4540 // Do not return error code, but create an imm operand anyway and proceed 4541 // to the next operand, if any. That avoids unneccessary error messages. 4542 } 4543 break; 4544 4545 case AsmToken::Identifier: { 4546 OperandInfoTy HwReg(ID_UNKNOWN_); 4547 int64_t Offset = OFFSET_DEFAULT_; 4548 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4549 if (parseHwregConstruct(HwReg, Offset, Width)) 4550 return MatchOperand_ParseFail; 4551 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4552 if (HwReg.IsSymbolic) 4553 Error(S, "invalid symbolic name of hardware register"); 4554 else 4555 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4556 } 4557 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4558 Error(S, "invalid bit offset: only 5-bit values are legal"); 4559 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4560 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4561 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4562 } 4563 break; 4564 } 4565 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4566 return MatchOperand_Success; 4567 } 4568 4569 bool AMDGPUOperand::isSWaitCnt() const { 4570 return isImm(); 4571 } 4572 4573 bool AMDGPUOperand::isHwreg() const { 4574 return isImmTy(ImmTyHwreg); 4575 } 4576 4577 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4578 using namespace llvm::AMDGPU::SendMsg; 4579 4580 if (Parser.getTok().getString() != "sendmsg") 4581 return true; 4582 Parser.Lex(); 4583 4584 if (getLexer().isNot(AsmToken::LParen)) 4585 return true; 4586 Parser.Lex(); 4587 4588 if (getLexer().is(AsmToken::Identifier)) { 4589 Msg.IsSymbolic = true; 4590 Msg.Id = ID_UNKNOWN_; 4591 const std::string tok = Parser.getTok().getString(); 4592 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4593 switch(i) { 4594 default: continue; // Omit gaps. 4595 case ID_GS_ALLOC_REQ: 4596 if (isSI() || isCI() || isVI()) 4597 continue; 4598 break; 4599 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4600 case ID_SYSMSG: break; 4601 } 4602 if (tok == IdSymbolic[i]) { 4603 Msg.Id = i; 4604 break; 4605 } 4606 } 4607 Parser.Lex(); 4608 } else { 4609 Msg.IsSymbolic = false; 4610 if (getLexer().isNot(AsmToken::Integer)) 4611 return true; 4612 if (getParser().parseAbsoluteExpression(Msg.Id)) 4613 return true; 4614 if (getLexer().is(AsmToken::Integer)) 4615 if (getParser().parseAbsoluteExpression(Msg.Id)) 4616 Msg.Id = ID_UNKNOWN_; 4617 } 4618 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4619 return false; 4620 4621 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4622 if (getLexer().isNot(AsmToken::RParen)) 4623 return true; 4624 Parser.Lex(); 4625 return false; 4626 } 4627 4628 if (getLexer().isNot(AsmToken::Comma)) 4629 return true; 4630 Parser.Lex(); 4631 4632 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4633 Operation.Id = ID_UNKNOWN_; 4634 if (getLexer().is(AsmToken::Identifier)) { 4635 Operation.IsSymbolic = true; 4636 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4637 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4638 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4639 const StringRef Tok = Parser.getTok().getString(); 4640 for (int i = F; i < L; ++i) { 4641 if (Tok == S[i]) { 4642 Operation.Id = i; 4643 break; 4644 } 4645 } 4646 Parser.Lex(); 4647 } else { 4648 Operation.IsSymbolic = false; 4649 if (getLexer().isNot(AsmToken::Integer)) 4650 return true; 4651 if (getParser().parseAbsoluteExpression(Operation.Id)) 4652 return true; 4653 } 4654 4655 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4656 // Stream id is optional. 4657 if (getLexer().is(AsmToken::RParen)) { 4658 Parser.Lex(); 4659 return false; 4660 } 4661 4662 if (getLexer().isNot(AsmToken::Comma)) 4663 return true; 4664 Parser.Lex(); 4665 4666 if (getLexer().isNot(AsmToken::Integer)) 4667 return true; 4668 if (getParser().parseAbsoluteExpression(StreamId)) 4669 return true; 4670 } 4671 4672 if (getLexer().isNot(AsmToken::RParen)) 4673 return true; 4674 Parser.Lex(); 4675 return false; 4676 } 4677 4678 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4679 if (getLexer().getKind() != AsmToken::Identifier) 4680 return MatchOperand_NoMatch; 4681 4682 StringRef Str = Parser.getTok().getString(); 4683 int Slot = StringSwitch<int>(Str) 4684 .Case("p10", 0) 4685 .Case("p20", 1) 4686 .Case("p0", 2) 4687 .Default(-1); 4688 4689 SMLoc S = Parser.getTok().getLoc(); 4690 if (Slot == -1) 4691 return MatchOperand_ParseFail; 4692 4693 Parser.Lex(); 4694 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4695 AMDGPUOperand::ImmTyInterpSlot)); 4696 return MatchOperand_Success; 4697 } 4698 4699 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4700 if (getLexer().getKind() != AsmToken::Identifier) 4701 return MatchOperand_NoMatch; 4702 4703 StringRef Str = Parser.getTok().getString(); 4704 if (!Str.startswith("attr")) 4705 return MatchOperand_NoMatch; 4706 4707 StringRef Chan = Str.take_back(2); 4708 int AttrChan = StringSwitch<int>(Chan) 4709 .Case(".x", 0) 4710 .Case(".y", 1) 4711 .Case(".z", 2) 4712 .Case(".w", 3) 4713 .Default(-1); 4714 if (AttrChan == -1) 4715 return MatchOperand_ParseFail; 4716 4717 Str = Str.drop_back(2).drop_front(4); 4718 4719 uint8_t Attr; 4720 if (Str.getAsInteger(10, Attr)) 4721 return MatchOperand_ParseFail; 4722 4723 SMLoc S = Parser.getTok().getLoc(); 4724 Parser.Lex(); 4725 if (Attr > 63) { 4726 Error(S, "out of bounds attr"); 4727 return MatchOperand_Success; 4728 } 4729 4730 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4731 4732 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4733 AMDGPUOperand::ImmTyInterpAttr)); 4734 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4735 AMDGPUOperand::ImmTyAttrChan)); 4736 return MatchOperand_Success; 4737 } 4738 4739 void AMDGPUAsmParser::errorExpTgt() { 4740 Error(Parser.getTok().getLoc(), "invalid exp target"); 4741 } 4742 4743 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4744 uint8_t &Val) { 4745 if (Str == "null") { 4746 Val = 9; 4747 return MatchOperand_Success; 4748 } 4749 4750 if (Str.startswith("mrt")) { 4751 Str = Str.drop_front(3); 4752 if (Str == "z") { // == mrtz 4753 Val = 8; 4754 return MatchOperand_Success; 4755 } 4756 4757 if (Str.getAsInteger(10, Val)) 4758 return MatchOperand_ParseFail; 4759 4760 if (Val > 7) 4761 errorExpTgt(); 4762 4763 return MatchOperand_Success; 4764 } 4765 4766 if (Str.startswith("pos")) { 4767 Str = Str.drop_front(3); 4768 if (Str.getAsInteger(10, Val)) 4769 return MatchOperand_ParseFail; 4770 4771 if (Val > 4 || (Val == 4 && !isGFX10())) 4772 errorExpTgt(); 4773 4774 Val += 12; 4775 return MatchOperand_Success; 4776 } 4777 4778 if (isGFX10() && Str == "prim") { 4779 Val = 20; 4780 return MatchOperand_Success; 4781 } 4782 4783 if (Str.startswith("param")) { 4784 Str = Str.drop_front(5); 4785 if (Str.getAsInteger(10, Val)) 4786 return MatchOperand_ParseFail; 4787 4788 if (Val >= 32) 4789 errorExpTgt(); 4790 4791 Val += 32; 4792 return MatchOperand_Success; 4793 } 4794 4795 if (Str.startswith("invalid_target_")) { 4796 Str = Str.drop_front(15); 4797 if (Str.getAsInteger(10, Val)) 4798 return MatchOperand_ParseFail; 4799 4800 errorExpTgt(); 4801 return MatchOperand_Success; 4802 } 4803 4804 return MatchOperand_NoMatch; 4805 } 4806 4807 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4808 uint8_t Val; 4809 StringRef Str = Parser.getTok().getString(); 4810 4811 auto Res = parseExpTgtImpl(Str, Val); 4812 if (Res != MatchOperand_Success) 4813 return Res; 4814 4815 SMLoc S = Parser.getTok().getLoc(); 4816 Parser.Lex(); 4817 4818 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4819 AMDGPUOperand::ImmTyExpTgt)); 4820 return MatchOperand_Success; 4821 } 4822 4823 OperandMatchResultTy 4824 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4825 using namespace llvm::AMDGPU::SendMsg; 4826 4827 int64_t Imm16Val = 0; 4828 SMLoc S = Parser.getTok().getLoc(); 4829 4830 switch(getLexer().getKind()) { 4831 default: 4832 return MatchOperand_NoMatch; 4833 case AsmToken::Integer: 4834 // The operand can be an integer value. 4835 if (getParser().parseAbsoluteExpression(Imm16Val)) 4836 return MatchOperand_NoMatch; 4837 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4838 Error(S, "invalid immediate: only 16-bit values are legal"); 4839 // Do not return error code, but create an imm operand anyway and proceed 4840 // to the next operand, if any. That avoids unneccessary error messages. 4841 } 4842 break; 4843 case AsmToken::Identifier: { 4844 OperandInfoTy Msg(ID_UNKNOWN_); 4845 OperandInfoTy Operation(OP_UNKNOWN_); 4846 int64_t StreamId = STREAM_ID_DEFAULT_; 4847 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4848 return MatchOperand_ParseFail; 4849 do { 4850 // Validate and encode message ID. 4851 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4852 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4853 || Msg.Id == ID_SYSMSG)) { 4854 if (Msg.IsSymbolic) 4855 Error(S, "invalid/unsupported symbolic name of message"); 4856 else 4857 Error(S, "invalid/unsupported code of message"); 4858 break; 4859 } 4860 Imm16Val = (Msg.Id << ID_SHIFT_); 4861 // Validate and encode operation ID. 4862 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4863 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4864 if (Operation.IsSymbolic) 4865 Error(S, "invalid symbolic name of GS_OP"); 4866 else 4867 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4868 break; 4869 } 4870 if (Operation.Id == OP_GS_NOP 4871 && Msg.Id != ID_GS_DONE) { 4872 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4873 break; 4874 } 4875 Imm16Val |= (Operation.Id << OP_SHIFT_); 4876 } 4877 if (Msg.Id == ID_SYSMSG) { 4878 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4879 if (Operation.IsSymbolic) 4880 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4881 else 4882 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4883 break; 4884 } 4885 Imm16Val |= (Operation.Id << OP_SHIFT_); 4886 } 4887 // Validate and encode stream ID. 4888 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4889 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4890 Error(S, "invalid stream id: only 2-bit values are legal"); 4891 break; 4892 } 4893 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4894 } 4895 } while (false); 4896 } 4897 break; 4898 } 4899 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4900 return MatchOperand_Success; 4901 } 4902 4903 bool AMDGPUOperand::isSendMsg() const { 4904 return isImmTy(ImmTySendMsg); 4905 } 4906 4907 //===----------------------------------------------------------------------===// 4908 // parser helpers 4909 //===----------------------------------------------------------------------===// 4910 4911 bool 4912 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4913 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4914 } 4915 4916 bool 4917 AMDGPUAsmParser::isId(const StringRef Id) const { 4918 return isId(getToken(), Id); 4919 } 4920 4921 bool 4922 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4923 return getTokenKind() == Kind; 4924 } 4925 4926 bool 4927 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4928 if (isId(Id)) { 4929 lex(); 4930 return true; 4931 } 4932 return false; 4933 } 4934 4935 bool 4936 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 4937 if (isId(Id) && peekToken().is(Kind)) { 4938 lex(); 4939 lex(); 4940 return true; 4941 } 4942 return false; 4943 } 4944 4945 bool 4946 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4947 if (isToken(Kind)) { 4948 lex(); 4949 return true; 4950 } 4951 return false; 4952 } 4953 4954 bool 4955 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4956 const StringRef ErrMsg) { 4957 if (!trySkipToken(Kind)) { 4958 Error(getLoc(), ErrMsg); 4959 return false; 4960 } 4961 return true; 4962 } 4963 4964 bool 4965 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4966 return !getParser().parseAbsoluteExpression(Imm); 4967 } 4968 4969 bool 4970 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4971 if (isToken(AsmToken::String)) { 4972 Val = getToken().getStringContents(); 4973 lex(); 4974 return true; 4975 } else { 4976 Error(getLoc(), ErrMsg); 4977 return false; 4978 } 4979 } 4980 4981 AsmToken 4982 AMDGPUAsmParser::getToken() const { 4983 return Parser.getTok(); 4984 } 4985 4986 AsmToken 4987 AMDGPUAsmParser::peekToken() { 4988 return getLexer().peekTok(); 4989 } 4990 4991 void 4992 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4993 auto TokCount = getLexer().peekTokens(Tokens); 4994 4995 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4996 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4997 } 4998 4999 AsmToken::TokenKind 5000 AMDGPUAsmParser::getTokenKind() const { 5001 return getLexer().getKind(); 5002 } 5003 5004 SMLoc 5005 AMDGPUAsmParser::getLoc() const { 5006 return getToken().getLoc(); 5007 } 5008 5009 StringRef 5010 AMDGPUAsmParser::getTokenStr() const { 5011 return getToken().getString(); 5012 } 5013 5014 void 5015 AMDGPUAsmParser::lex() { 5016 Parser.Lex(); 5017 } 5018 5019 //===----------------------------------------------------------------------===// 5020 // swizzle 5021 //===----------------------------------------------------------------------===// 5022 5023 LLVM_READNONE 5024 static unsigned 5025 encodeBitmaskPerm(const unsigned AndMask, 5026 const unsigned OrMask, 5027 const unsigned XorMask) { 5028 using namespace llvm::AMDGPU::Swizzle; 5029 5030 return BITMASK_PERM_ENC | 5031 (AndMask << BITMASK_AND_SHIFT) | 5032 (OrMask << BITMASK_OR_SHIFT) | 5033 (XorMask << BITMASK_XOR_SHIFT); 5034 } 5035 5036 bool 5037 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5038 const unsigned MinVal, 5039 const unsigned MaxVal, 5040 const StringRef ErrMsg) { 5041 for (unsigned i = 0; i < OpNum; ++i) { 5042 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5043 return false; 5044 } 5045 SMLoc ExprLoc = Parser.getTok().getLoc(); 5046 if (!parseExpr(Op[i])) { 5047 return false; 5048 } 5049 if (Op[i] < MinVal || Op[i] > MaxVal) { 5050 Error(ExprLoc, ErrMsg); 5051 return false; 5052 } 5053 } 5054 5055 return true; 5056 } 5057 5058 bool 5059 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5060 using namespace llvm::AMDGPU::Swizzle; 5061 5062 int64_t Lane[LANE_NUM]; 5063 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5064 "expected a 2-bit lane id")) { 5065 Imm = QUAD_PERM_ENC; 5066 for (unsigned I = 0; I < LANE_NUM; ++I) { 5067 Imm |= Lane[I] << (LANE_SHIFT * I); 5068 } 5069 return true; 5070 } 5071 return false; 5072 } 5073 5074 bool 5075 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5076 using namespace llvm::AMDGPU::Swizzle; 5077 5078 SMLoc S = Parser.getTok().getLoc(); 5079 int64_t GroupSize; 5080 int64_t LaneIdx; 5081 5082 if (!parseSwizzleOperands(1, &GroupSize, 5083 2, 32, 5084 "group size must be in the interval [2,32]")) { 5085 return false; 5086 } 5087 if (!isPowerOf2_64(GroupSize)) { 5088 Error(S, "group size must be a power of two"); 5089 return false; 5090 } 5091 if (parseSwizzleOperands(1, &LaneIdx, 5092 0, GroupSize - 1, 5093 "lane id must be in the interval [0,group size - 1]")) { 5094 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5095 return true; 5096 } 5097 return false; 5098 } 5099 5100 bool 5101 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5102 using namespace llvm::AMDGPU::Swizzle; 5103 5104 SMLoc S = Parser.getTok().getLoc(); 5105 int64_t GroupSize; 5106 5107 if (!parseSwizzleOperands(1, &GroupSize, 5108 2, 32, "group size must be in the interval [2,32]")) { 5109 return false; 5110 } 5111 if (!isPowerOf2_64(GroupSize)) { 5112 Error(S, "group size must be a power of two"); 5113 return false; 5114 } 5115 5116 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5117 return true; 5118 } 5119 5120 bool 5121 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5122 using namespace llvm::AMDGPU::Swizzle; 5123 5124 SMLoc S = Parser.getTok().getLoc(); 5125 int64_t GroupSize; 5126 5127 if (!parseSwizzleOperands(1, &GroupSize, 5128 1, 16, "group size must be in the interval [1,16]")) { 5129 return false; 5130 } 5131 if (!isPowerOf2_64(GroupSize)) { 5132 Error(S, "group size must be a power of two"); 5133 return false; 5134 } 5135 5136 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5137 return true; 5138 } 5139 5140 bool 5141 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5142 using namespace llvm::AMDGPU::Swizzle; 5143 5144 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5145 return false; 5146 } 5147 5148 StringRef Ctl; 5149 SMLoc StrLoc = Parser.getTok().getLoc(); 5150 if (!parseString(Ctl)) { 5151 return false; 5152 } 5153 if (Ctl.size() != BITMASK_WIDTH) { 5154 Error(StrLoc, "expected a 5-character mask"); 5155 return false; 5156 } 5157 5158 unsigned AndMask = 0; 5159 unsigned OrMask = 0; 5160 unsigned XorMask = 0; 5161 5162 for (size_t i = 0; i < Ctl.size(); ++i) { 5163 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5164 switch(Ctl[i]) { 5165 default: 5166 Error(StrLoc, "invalid mask"); 5167 return false; 5168 case '0': 5169 break; 5170 case '1': 5171 OrMask |= Mask; 5172 break; 5173 case 'p': 5174 AndMask |= Mask; 5175 break; 5176 case 'i': 5177 AndMask |= Mask; 5178 XorMask |= Mask; 5179 break; 5180 } 5181 } 5182 5183 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5184 return true; 5185 } 5186 5187 bool 5188 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5189 5190 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5191 5192 if (!parseExpr(Imm)) { 5193 return false; 5194 } 5195 if (!isUInt<16>(Imm)) { 5196 Error(OffsetLoc, "expected a 16-bit offset"); 5197 return false; 5198 } 5199 return true; 5200 } 5201 5202 bool 5203 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5204 using namespace llvm::AMDGPU::Swizzle; 5205 5206 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5207 5208 SMLoc ModeLoc = Parser.getTok().getLoc(); 5209 bool Ok = false; 5210 5211 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5212 Ok = parseSwizzleQuadPerm(Imm); 5213 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5214 Ok = parseSwizzleBitmaskPerm(Imm); 5215 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5216 Ok = parseSwizzleBroadcast(Imm); 5217 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5218 Ok = parseSwizzleSwap(Imm); 5219 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5220 Ok = parseSwizzleReverse(Imm); 5221 } else { 5222 Error(ModeLoc, "expected a swizzle mode"); 5223 } 5224 5225 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5226 } 5227 5228 return false; 5229 } 5230 5231 OperandMatchResultTy 5232 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5233 SMLoc S = Parser.getTok().getLoc(); 5234 int64_t Imm = 0; 5235 5236 if (trySkipId("offset")) { 5237 5238 bool Ok = false; 5239 if (skipToken(AsmToken::Colon, "expected a colon")) { 5240 if (trySkipId("swizzle")) { 5241 Ok = parseSwizzleMacro(Imm); 5242 } else { 5243 Ok = parseSwizzleOffset(Imm); 5244 } 5245 } 5246 5247 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5248 5249 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5250 } else { 5251 // Swizzle "offset" operand is optional. 5252 // If it is omitted, try parsing other optional operands. 5253 return parseOptionalOpr(Operands); 5254 } 5255 } 5256 5257 bool 5258 AMDGPUOperand::isSwizzle() const { 5259 return isImmTy(ImmTySwizzle); 5260 } 5261 5262 //===----------------------------------------------------------------------===// 5263 // VGPR Index Mode 5264 //===----------------------------------------------------------------------===// 5265 5266 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5267 5268 using namespace llvm::AMDGPU::VGPRIndexMode; 5269 5270 if (trySkipToken(AsmToken::RParen)) { 5271 return OFF; 5272 } 5273 5274 int64_t Imm = 0; 5275 5276 while (true) { 5277 unsigned Mode = 0; 5278 SMLoc S = Parser.getTok().getLoc(); 5279 5280 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5281 if (trySkipId(IdSymbolic[ModeId])) { 5282 Mode = 1 << ModeId; 5283 break; 5284 } 5285 } 5286 5287 if (Mode == 0) { 5288 Error(S, (Imm == 0)? 5289 "expected a VGPR index mode or a closing parenthesis" : 5290 "expected a VGPR index mode"); 5291 break; 5292 } 5293 5294 if (Imm & Mode) { 5295 Error(S, "duplicate VGPR index mode"); 5296 break; 5297 } 5298 Imm |= Mode; 5299 5300 if (trySkipToken(AsmToken::RParen)) 5301 break; 5302 if (!skipToken(AsmToken::Comma, 5303 "expected a comma or a closing parenthesis")) 5304 break; 5305 } 5306 5307 return Imm; 5308 } 5309 5310 OperandMatchResultTy 5311 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5312 5313 int64_t Imm = 0; 5314 SMLoc S = Parser.getTok().getLoc(); 5315 5316 if (getLexer().getKind() == AsmToken::Identifier && 5317 Parser.getTok().getString() == "gpr_idx" && 5318 getLexer().peekTok().is(AsmToken::LParen)) { 5319 5320 Parser.Lex(); 5321 Parser.Lex(); 5322 5323 // If parse failed, trigger an error but do not return error code 5324 // to avoid excessive error messages. 5325 Imm = parseGPRIdxMacro(); 5326 5327 } else { 5328 if (getParser().parseAbsoluteExpression(Imm)) 5329 return MatchOperand_NoMatch; 5330 if (Imm < 0 || !isUInt<4>(Imm)) { 5331 Error(S, "invalid immediate: only 4-bit values are legal"); 5332 } 5333 } 5334 5335 Operands.push_back( 5336 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5337 return MatchOperand_Success; 5338 } 5339 5340 bool AMDGPUOperand::isGPRIdxMode() const { 5341 return isImmTy(ImmTyGprIdxMode); 5342 } 5343 5344 //===----------------------------------------------------------------------===// 5345 // sopp branch targets 5346 //===----------------------------------------------------------------------===// 5347 5348 OperandMatchResultTy 5349 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5350 SMLoc S = Parser.getTok().getLoc(); 5351 5352 switch (getLexer().getKind()) { 5353 default: return MatchOperand_ParseFail; 5354 case AsmToken::Integer: { 5355 int64_t Imm; 5356 if (getParser().parseAbsoluteExpression(Imm)) 5357 return MatchOperand_ParseFail; 5358 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5359 return MatchOperand_Success; 5360 } 5361 5362 case AsmToken::Identifier: 5363 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5364 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5365 Parser.getTok().getString()), getContext()), S)); 5366 Parser.Lex(); 5367 return MatchOperand_Success; 5368 } 5369 } 5370 5371 //===----------------------------------------------------------------------===// 5372 // mubuf 5373 //===----------------------------------------------------------------------===// 5374 5375 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5376 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5377 } 5378 5379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5380 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5381 } 5382 5383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5384 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5385 } 5386 5387 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5388 const OperandVector &Operands, 5389 bool IsAtomic, 5390 bool IsAtomicReturn, 5391 bool IsLds) { 5392 bool IsLdsOpcode = IsLds; 5393 bool HasLdsModifier = false; 5394 OptionalImmIndexMap OptionalIdx; 5395 assert(IsAtomicReturn ? IsAtomic : true); 5396 unsigned FirstOperandIdx = 1; 5397 5398 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5399 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5400 5401 // Add the register arguments 5402 if (Op.isReg()) { 5403 Op.addRegOperands(Inst, 1); 5404 // Insert a tied src for atomic return dst. 5405 // This cannot be postponed as subsequent calls to 5406 // addImmOperands rely on correct number of MC operands. 5407 if (IsAtomicReturn && i == FirstOperandIdx) 5408 Op.addRegOperands(Inst, 1); 5409 continue; 5410 } 5411 5412 // Handle the case where soffset is an immediate 5413 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5414 Op.addImmOperands(Inst, 1); 5415 continue; 5416 } 5417 5418 HasLdsModifier |= Op.isLDS(); 5419 5420 // Handle tokens like 'offen' which are sometimes hard-coded into the 5421 // asm string. There are no MCInst operands for these. 5422 if (Op.isToken()) { 5423 continue; 5424 } 5425 assert(Op.isImm()); 5426 5427 // Handle optional arguments 5428 OptionalIdx[Op.getImmTy()] = i; 5429 } 5430 5431 // This is a workaround for an llvm quirk which may result in an 5432 // incorrect instruction selection. Lds and non-lds versions of 5433 // MUBUF instructions are identical except that lds versions 5434 // have mandatory 'lds' modifier. However this modifier follows 5435 // optional modifiers and llvm asm matcher regards this 'lds' 5436 // modifier as an optional one. As a result, an lds version 5437 // of opcode may be selected even if it has no 'lds' modifier. 5438 if (IsLdsOpcode && !HasLdsModifier) { 5439 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5440 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5441 Inst.setOpcode(NoLdsOpcode); 5442 IsLdsOpcode = false; 5443 } 5444 } 5445 5446 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5447 if (!IsAtomic) { // glc is hard-coded. 5448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5449 } 5450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5451 5452 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5454 } 5455 5456 if (isGFX10()) 5457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5458 } 5459 5460 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5461 OptionalImmIndexMap OptionalIdx; 5462 5463 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5464 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5465 5466 // Add the register arguments 5467 if (Op.isReg()) { 5468 Op.addRegOperands(Inst, 1); 5469 continue; 5470 } 5471 5472 // Handle the case where soffset is an immediate 5473 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5474 Op.addImmOperands(Inst, 1); 5475 continue; 5476 } 5477 5478 // Handle tokens like 'offen' which are sometimes hard-coded into the 5479 // asm string. There are no MCInst operands for these. 5480 if (Op.isToken()) { 5481 continue; 5482 } 5483 assert(Op.isImm()); 5484 5485 // Handle optional arguments 5486 OptionalIdx[Op.getImmTy()] = i; 5487 } 5488 5489 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5490 AMDGPUOperand::ImmTyOffset); 5491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5492 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5495 5496 if (isGFX10()) 5497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5498 } 5499 5500 //===----------------------------------------------------------------------===// 5501 // mimg 5502 //===----------------------------------------------------------------------===// 5503 5504 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5505 bool IsAtomic) { 5506 unsigned I = 1; 5507 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5508 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5509 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5510 } 5511 5512 if (IsAtomic) { 5513 // Add src, same as dst 5514 assert(Desc.getNumDefs() == 1); 5515 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5516 } 5517 5518 OptionalImmIndexMap OptionalIdx; 5519 5520 for (unsigned E = Operands.size(); I != E; ++I) { 5521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5522 5523 // Add the register arguments 5524 if (Op.isReg()) { 5525 Op.addRegOperands(Inst, 1); 5526 } else if (Op.isImmModifier()) { 5527 OptionalIdx[Op.getImmTy()] = I; 5528 } else if (!Op.isToken()) { 5529 llvm_unreachable("unexpected operand type"); 5530 } 5531 } 5532 5533 bool IsGFX10 = isGFX10(); 5534 5535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5536 if (IsGFX10) 5537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5538 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5539 if (IsGFX10) 5540 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5541 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5546 if (!IsGFX10) 5547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5549 } 5550 5551 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5552 cvtMIMG(Inst, Operands, true); 5553 } 5554 5555 //===----------------------------------------------------------------------===// 5556 // smrd 5557 //===----------------------------------------------------------------------===// 5558 5559 bool AMDGPUOperand::isSMRDOffset8() const { 5560 return isImm() && isUInt<8>(getImm()); 5561 } 5562 5563 bool AMDGPUOperand::isSMRDOffset20() const { 5564 return isImm() && isUInt<20>(getImm()); 5565 } 5566 5567 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5568 // 32-bit literals are only supported on CI and we only want to use them 5569 // when the offset is > 8-bits. 5570 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5571 } 5572 5573 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5574 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5575 } 5576 5577 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5578 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5579 } 5580 5581 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5582 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5583 } 5584 5585 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5586 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5587 } 5588 5589 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5590 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5591 } 5592 5593 //===----------------------------------------------------------------------===// 5594 // vop3 5595 //===----------------------------------------------------------------------===// 5596 5597 static bool ConvertOmodMul(int64_t &Mul) { 5598 if (Mul != 1 && Mul != 2 && Mul != 4) 5599 return false; 5600 5601 Mul >>= 1; 5602 return true; 5603 } 5604 5605 static bool ConvertOmodDiv(int64_t &Div) { 5606 if (Div == 1) { 5607 Div = 0; 5608 return true; 5609 } 5610 5611 if (Div == 2) { 5612 Div = 3; 5613 return true; 5614 } 5615 5616 return false; 5617 } 5618 5619 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5620 if (BoundCtrl == 0) { 5621 BoundCtrl = 1; 5622 return true; 5623 } 5624 5625 if (BoundCtrl == -1) { 5626 BoundCtrl = 0; 5627 return true; 5628 } 5629 5630 return false; 5631 } 5632 5633 // Note: the order in this table matches the order of operands in AsmString. 5634 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5635 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5636 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5637 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5638 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5639 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5640 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5641 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5642 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5643 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5644 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5645 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5646 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5647 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5648 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5649 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5650 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5651 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5652 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5653 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5654 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5655 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5656 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5657 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5658 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5659 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5660 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5661 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5662 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5663 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5664 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5665 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5666 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5667 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5668 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5669 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5670 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5671 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5672 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5673 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5674 }; 5675 5676 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5677 unsigned size = Operands.size(); 5678 assert(size > 0); 5679 5680 OperandMatchResultTy res = parseOptionalOpr(Operands); 5681 5682 // This is a hack to enable hardcoded mandatory operands which follow 5683 // optional operands. 5684 // 5685 // Current design assumes that all operands after the first optional operand 5686 // are also optional. However implementation of some instructions violates 5687 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5688 // 5689 // To alleviate this problem, we have to (implicitly) parse extra operands 5690 // to make sure autogenerated parser of custom operands never hit hardcoded 5691 // mandatory operands. 5692 5693 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5694 5695 // We have parsed the first optional operand. 5696 // Parse as many operands as necessary to skip all mandatory operands. 5697 5698 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5699 if (res != MatchOperand_Success || 5700 getLexer().is(AsmToken::EndOfStatement)) break; 5701 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5702 res = parseOptionalOpr(Operands); 5703 } 5704 } 5705 5706 return res; 5707 } 5708 5709 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5710 OperandMatchResultTy res; 5711 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5712 // try to parse any optional operand here 5713 if (Op.IsBit) { 5714 res = parseNamedBit(Op.Name, Operands, Op.Type); 5715 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5716 res = parseOModOperand(Operands); 5717 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5718 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5719 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5720 res = parseSDWASel(Operands, Op.Name, Op.Type); 5721 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5722 res = parseSDWADstUnused(Operands); 5723 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5724 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5725 Op.Type == AMDGPUOperand::ImmTyNegLo || 5726 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5727 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5728 Op.ConvertResult); 5729 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5730 res = parseDim(Operands); 5731 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 5732 res = parseDfmtNfmt(Operands); 5733 } else { 5734 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5735 } 5736 if (res != MatchOperand_NoMatch) { 5737 return res; 5738 } 5739 } 5740 return MatchOperand_NoMatch; 5741 } 5742 5743 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5744 StringRef Name = Parser.getTok().getString(); 5745 if (Name == "mul") { 5746 return parseIntWithPrefix("mul", Operands, 5747 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5748 } 5749 5750 if (Name == "div") { 5751 return parseIntWithPrefix("div", Operands, 5752 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5753 } 5754 5755 return MatchOperand_NoMatch; 5756 } 5757 5758 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5759 cvtVOP3P(Inst, Operands); 5760 5761 int Opc = Inst.getOpcode(); 5762 5763 int SrcNum; 5764 const int Ops[] = { AMDGPU::OpName::src0, 5765 AMDGPU::OpName::src1, 5766 AMDGPU::OpName::src2 }; 5767 for (SrcNum = 0; 5768 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5769 ++SrcNum); 5770 assert(SrcNum > 0); 5771 5772 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5773 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5774 5775 if ((OpSel & (1 << SrcNum)) != 0) { 5776 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5777 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5778 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5779 } 5780 } 5781 5782 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5783 // 1. This operand is input modifiers 5784 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5785 // 2. This is not last operand 5786 && Desc.NumOperands > (OpNum + 1) 5787 // 3. Next operand is register class 5788 && Desc.OpInfo[OpNum + 1].RegClass != -1 5789 // 4. Next register is not tied to any other operand 5790 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5791 } 5792 5793 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5794 { 5795 OptionalImmIndexMap OptionalIdx; 5796 unsigned Opc = Inst.getOpcode(); 5797 5798 unsigned I = 1; 5799 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5800 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5801 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5802 } 5803 5804 for (unsigned E = Operands.size(); I != E; ++I) { 5805 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5806 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5807 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5808 } else if (Op.isInterpSlot() || 5809 Op.isInterpAttr() || 5810 Op.isAttrChan()) { 5811 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5812 } else if (Op.isImmModifier()) { 5813 OptionalIdx[Op.getImmTy()] = I; 5814 } else { 5815 llvm_unreachable("unhandled operand type"); 5816 } 5817 } 5818 5819 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5821 } 5822 5823 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5825 } 5826 5827 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5828 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5829 } 5830 } 5831 5832 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5833 OptionalImmIndexMap &OptionalIdx) { 5834 unsigned Opc = Inst.getOpcode(); 5835 5836 unsigned I = 1; 5837 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5838 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5839 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5840 } 5841 5842 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5843 // This instruction has src modifiers 5844 for (unsigned E = Operands.size(); I != E; ++I) { 5845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5846 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5847 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5848 } else if (Op.isImmModifier()) { 5849 OptionalIdx[Op.getImmTy()] = I; 5850 } else if (Op.isRegOrImm()) { 5851 Op.addRegOrImmOperands(Inst, 1); 5852 } else { 5853 llvm_unreachable("unhandled operand type"); 5854 } 5855 } 5856 } else { 5857 // No src modifiers 5858 for (unsigned E = Operands.size(); I != E; ++I) { 5859 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5860 if (Op.isMod()) { 5861 OptionalIdx[Op.getImmTy()] = I; 5862 } else { 5863 Op.addRegOrImmOperands(Inst, 1); 5864 } 5865 } 5866 } 5867 5868 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5870 } 5871 5872 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5874 } 5875 5876 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 5877 // it has src2 register operand that is tied to dst operand 5878 // we don't allow modifiers for this operand in assembler so src2_modifiers 5879 // should be 0. 5880 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 5881 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 5882 Opc == AMDGPU::V_MAC_F32_e64_vi || 5883 Opc == AMDGPU::V_MAC_F16_e64_vi || 5884 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 5885 Opc == AMDGPU::V_FMAC_F32_e64_vi || 5886 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 5887 auto it = Inst.begin(); 5888 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5889 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5890 ++it; 5891 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5892 } 5893 } 5894 5895 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5896 OptionalImmIndexMap OptionalIdx; 5897 cvtVOP3(Inst, Operands, OptionalIdx); 5898 } 5899 5900 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5901 const OperandVector &Operands) { 5902 OptionalImmIndexMap OptIdx; 5903 const int Opc = Inst.getOpcode(); 5904 const MCInstrDesc &Desc = MII.get(Opc); 5905 5906 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5907 5908 cvtVOP3(Inst, Operands, OptIdx); 5909 5910 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5911 assert(!IsPacked); 5912 Inst.addOperand(Inst.getOperand(0)); 5913 } 5914 5915 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5916 // instruction, and then figure out where to actually put the modifiers 5917 5918 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5919 5920 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5921 if (OpSelHiIdx != -1) { 5922 int DefaultVal = IsPacked ? -1 : 0; 5923 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5924 DefaultVal); 5925 } 5926 5927 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5928 if (NegLoIdx != -1) { 5929 assert(IsPacked); 5930 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5931 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5932 } 5933 5934 const int Ops[] = { AMDGPU::OpName::src0, 5935 AMDGPU::OpName::src1, 5936 AMDGPU::OpName::src2 }; 5937 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5938 AMDGPU::OpName::src1_modifiers, 5939 AMDGPU::OpName::src2_modifiers }; 5940 5941 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5942 5943 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5944 unsigned OpSelHi = 0; 5945 unsigned NegLo = 0; 5946 unsigned NegHi = 0; 5947 5948 if (OpSelHiIdx != -1) { 5949 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5950 } 5951 5952 if (NegLoIdx != -1) { 5953 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5954 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5955 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5956 } 5957 5958 for (int J = 0; J < 3; ++J) { 5959 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5960 if (OpIdx == -1) 5961 break; 5962 5963 uint32_t ModVal = 0; 5964 5965 if ((OpSel & (1 << J)) != 0) 5966 ModVal |= SISrcMods::OP_SEL_0; 5967 5968 if ((OpSelHi & (1 << J)) != 0) 5969 ModVal |= SISrcMods::OP_SEL_1; 5970 5971 if ((NegLo & (1 << J)) != 0) 5972 ModVal |= SISrcMods::NEG; 5973 5974 if ((NegHi & (1 << J)) != 0) 5975 ModVal |= SISrcMods::NEG_HI; 5976 5977 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5978 5979 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5980 } 5981 } 5982 5983 //===----------------------------------------------------------------------===// 5984 // dpp 5985 //===----------------------------------------------------------------------===// 5986 5987 bool AMDGPUOperand::isDPPCtrl() const { 5988 using namespace AMDGPU::DPP; 5989 5990 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5991 if (result) { 5992 int64_t Imm = getImm(); 5993 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5994 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5995 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5996 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5997 (Imm == DppCtrl::WAVE_SHL1) || 5998 (Imm == DppCtrl::WAVE_ROL1) || 5999 (Imm == DppCtrl::WAVE_SHR1) || 6000 (Imm == DppCtrl::WAVE_ROR1) || 6001 (Imm == DppCtrl::ROW_MIRROR) || 6002 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6003 (Imm == DppCtrl::BCAST15) || 6004 (Imm == DppCtrl::BCAST31); 6005 } 6006 return false; 6007 } 6008 6009 bool AMDGPUOperand::isS16Imm() const { 6010 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6011 } 6012 6013 bool AMDGPUOperand::isU16Imm() const { 6014 return isImm() && isUInt<16>(getImm()); 6015 } 6016 6017 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6018 if (!isGFX10()) 6019 return MatchOperand_NoMatch; 6020 6021 SMLoc S = Parser.getTok().getLoc(); 6022 6023 if (getLexer().isNot(AsmToken::Identifier)) 6024 return MatchOperand_NoMatch; 6025 if (getLexer().getTok().getString() != "dim") 6026 return MatchOperand_NoMatch; 6027 6028 Parser.Lex(); 6029 if (getLexer().isNot(AsmToken::Colon)) 6030 return MatchOperand_ParseFail; 6031 6032 Parser.Lex(); 6033 6034 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6035 // integer. 6036 std::string Token; 6037 if (getLexer().is(AsmToken::Integer)) { 6038 SMLoc Loc = getLexer().getTok().getEndLoc(); 6039 Token = getLexer().getTok().getString(); 6040 Parser.Lex(); 6041 if (getLexer().getTok().getLoc() != Loc) 6042 return MatchOperand_ParseFail; 6043 } 6044 if (getLexer().isNot(AsmToken::Identifier)) 6045 return MatchOperand_ParseFail; 6046 Token += getLexer().getTok().getString(); 6047 6048 StringRef DimId = Token; 6049 if (DimId.startswith("SQ_RSRC_IMG_")) 6050 DimId = DimId.substr(12); 6051 6052 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6053 if (!DimInfo) 6054 return MatchOperand_ParseFail; 6055 6056 Parser.Lex(); 6057 6058 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6059 AMDGPUOperand::ImmTyDim)); 6060 return MatchOperand_Success; 6061 } 6062 6063 OperandMatchResultTy 6064 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6065 using namespace AMDGPU::DPP; 6066 6067 SMLoc S = Parser.getTok().getLoc(); 6068 StringRef Prefix; 6069 int64_t Int; 6070 6071 if (getLexer().getKind() == AsmToken::Identifier) { 6072 Prefix = Parser.getTok().getString(); 6073 } else { 6074 return MatchOperand_NoMatch; 6075 } 6076 6077 if (Prefix == "row_mirror") { 6078 Int = DppCtrl::ROW_MIRROR; 6079 Parser.Lex(); 6080 } else if (Prefix == "row_half_mirror") { 6081 Int = DppCtrl::ROW_HALF_MIRROR; 6082 Parser.Lex(); 6083 } else { 6084 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6085 if (Prefix != "quad_perm" 6086 && Prefix != "row_shl" 6087 && Prefix != "row_shr" 6088 && Prefix != "row_ror" 6089 && Prefix != "wave_shl" 6090 && Prefix != "wave_rol" 6091 && Prefix != "wave_shr" 6092 && Prefix != "wave_ror" 6093 && Prefix != "row_bcast") { 6094 return MatchOperand_NoMatch; 6095 } 6096 6097 Parser.Lex(); 6098 if (getLexer().isNot(AsmToken::Colon)) 6099 return MatchOperand_ParseFail; 6100 6101 if (Prefix == "quad_perm") { 6102 // quad_perm:[%d,%d,%d,%d] 6103 Parser.Lex(); 6104 if (getLexer().isNot(AsmToken::LBrac)) 6105 return MatchOperand_ParseFail; 6106 Parser.Lex(); 6107 6108 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6109 return MatchOperand_ParseFail; 6110 6111 for (int i = 0; i < 3; ++i) { 6112 if (getLexer().isNot(AsmToken::Comma)) 6113 return MatchOperand_ParseFail; 6114 Parser.Lex(); 6115 6116 int64_t Temp; 6117 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6118 return MatchOperand_ParseFail; 6119 const int shift = i*2 + 2; 6120 Int += (Temp << shift); 6121 } 6122 6123 if (getLexer().isNot(AsmToken::RBrac)) 6124 return MatchOperand_ParseFail; 6125 Parser.Lex(); 6126 } else { 6127 // sel:%d 6128 Parser.Lex(); 6129 if (getParser().parseAbsoluteExpression(Int)) 6130 return MatchOperand_ParseFail; 6131 6132 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6133 Int |= DppCtrl::ROW_SHL0; 6134 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6135 Int |= DppCtrl::ROW_SHR0; 6136 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6137 Int |= DppCtrl::ROW_ROR0; 6138 } else if (Prefix == "wave_shl" && 1 == Int) { 6139 Int = DppCtrl::WAVE_SHL1; 6140 } else if (Prefix == "wave_rol" && 1 == Int) { 6141 Int = DppCtrl::WAVE_ROL1; 6142 } else if (Prefix == "wave_shr" && 1 == Int) { 6143 Int = DppCtrl::WAVE_SHR1; 6144 } else if (Prefix == "wave_ror" && 1 == Int) { 6145 Int = DppCtrl::WAVE_ROR1; 6146 } else if (Prefix == "row_bcast") { 6147 if (Int == 15) { 6148 Int = DppCtrl::BCAST15; 6149 } else if (Int == 31) { 6150 Int = DppCtrl::BCAST31; 6151 } else { 6152 return MatchOperand_ParseFail; 6153 } 6154 } else { 6155 return MatchOperand_ParseFail; 6156 } 6157 } 6158 } 6159 6160 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6161 return MatchOperand_Success; 6162 } 6163 6164 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6165 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6166 } 6167 6168 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6169 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6170 } 6171 6172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6173 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6174 } 6175 6176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6177 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6178 } 6179 6180 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 6181 OptionalImmIndexMap OptionalIdx; 6182 6183 unsigned I = 1; 6184 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6185 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6186 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6187 } 6188 6189 for (unsigned E = Operands.size(); I != E; ++I) { 6190 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6191 MCOI::TIED_TO); 6192 if (TiedTo != -1) { 6193 assert((unsigned)TiedTo < Inst.getNumOperands()); 6194 // handle tied old or src2 for MAC instructions 6195 Inst.addOperand(Inst.getOperand(TiedTo)); 6196 } 6197 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6198 // Add the register arguments 6199 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6200 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6201 // Skip it. 6202 continue; 6203 } 6204 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6205 Op.addRegWithFPInputModsOperands(Inst, 2); 6206 } else if (Op.isDPPCtrl()) { 6207 Op.addImmOperands(Inst, 1); 6208 } else if (Op.isImm()) { 6209 // Handle optional arguments 6210 OptionalIdx[Op.getImmTy()] = I; 6211 } else { 6212 llvm_unreachable("Invalid operand type"); 6213 } 6214 } 6215 6216 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6219 } 6220 6221 //===----------------------------------------------------------------------===// 6222 // sdwa 6223 //===----------------------------------------------------------------------===// 6224 6225 OperandMatchResultTy 6226 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6227 AMDGPUOperand::ImmTy Type) { 6228 using namespace llvm::AMDGPU::SDWA; 6229 6230 SMLoc S = Parser.getTok().getLoc(); 6231 StringRef Value; 6232 OperandMatchResultTy res; 6233 6234 res = parseStringWithPrefix(Prefix, Value); 6235 if (res != MatchOperand_Success) { 6236 return res; 6237 } 6238 6239 int64_t Int; 6240 Int = StringSwitch<int64_t>(Value) 6241 .Case("BYTE_0", SdwaSel::BYTE_0) 6242 .Case("BYTE_1", SdwaSel::BYTE_1) 6243 .Case("BYTE_2", SdwaSel::BYTE_2) 6244 .Case("BYTE_3", SdwaSel::BYTE_3) 6245 .Case("WORD_0", SdwaSel::WORD_0) 6246 .Case("WORD_1", SdwaSel::WORD_1) 6247 .Case("DWORD", SdwaSel::DWORD) 6248 .Default(0xffffffff); 6249 Parser.Lex(); // eat last token 6250 6251 if (Int == 0xffffffff) { 6252 return MatchOperand_ParseFail; 6253 } 6254 6255 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6256 return MatchOperand_Success; 6257 } 6258 6259 OperandMatchResultTy 6260 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6261 using namespace llvm::AMDGPU::SDWA; 6262 6263 SMLoc S = Parser.getTok().getLoc(); 6264 StringRef Value; 6265 OperandMatchResultTy res; 6266 6267 res = parseStringWithPrefix("dst_unused", Value); 6268 if (res != MatchOperand_Success) { 6269 return res; 6270 } 6271 6272 int64_t Int; 6273 Int = StringSwitch<int64_t>(Value) 6274 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6275 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6276 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6277 .Default(0xffffffff); 6278 Parser.Lex(); // eat last token 6279 6280 if (Int == 0xffffffff) { 6281 return MatchOperand_ParseFail; 6282 } 6283 6284 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6285 return MatchOperand_Success; 6286 } 6287 6288 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6289 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6290 } 6291 6292 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6293 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6294 } 6295 6296 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6297 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6298 } 6299 6300 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6301 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6302 } 6303 6304 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6305 uint64_t BasicInstType, bool skipVcc) { 6306 using namespace llvm::AMDGPU::SDWA; 6307 6308 OptionalImmIndexMap OptionalIdx; 6309 bool skippedVcc = false; 6310 6311 unsigned I = 1; 6312 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6313 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6314 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6315 } 6316 6317 for (unsigned E = Operands.size(); I != E; ++I) { 6318 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6319 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 6320 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6321 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6322 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6323 // Skip VCC only if we didn't skip it on previous iteration. 6324 if (BasicInstType == SIInstrFlags::VOP2 && 6325 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6326 skippedVcc = true; 6327 continue; 6328 } else if (BasicInstType == SIInstrFlags::VOPC && 6329 Inst.getNumOperands() == 0) { 6330 skippedVcc = true; 6331 continue; 6332 } 6333 } 6334 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6335 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6336 } else if (Op.isImm()) { 6337 // Handle optional arguments 6338 OptionalIdx[Op.getImmTy()] = I; 6339 } else { 6340 llvm_unreachable("Invalid operand type"); 6341 } 6342 skippedVcc = false; 6343 } 6344 6345 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6346 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6347 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6348 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6349 switch (BasicInstType) { 6350 case SIInstrFlags::VOP1: 6351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6352 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6354 } 6355 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6358 break; 6359 6360 case SIInstrFlags::VOP2: 6361 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6362 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6363 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6364 } 6365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6368 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6369 break; 6370 6371 case SIInstrFlags::VOPC: 6372 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6373 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6376 break; 6377 6378 default: 6379 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6380 } 6381 } 6382 6383 // special case v_mac_{f16, f32}: 6384 // it has src2 register operand that is tied to dst operand 6385 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6386 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6387 auto it = Inst.begin(); 6388 std::advance( 6389 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6390 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6391 } 6392 } 6393 6394 /// Force static initialization. 6395 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6396 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6397 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6398 } 6399 6400 #define GET_REGISTER_MATCHER 6401 #define GET_MATCHER_IMPLEMENTATION 6402 #define GET_MNEMONIC_SPELL_CHECKER 6403 #include "AMDGPUGenAsmMatcher.inc" 6404 6405 // This fuction should be defined after auto-generated include so that we have 6406 // MatchClassKind enum defined 6407 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6408 unsigned Kind) { 6409 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6410 // But MatchInstructionImpl() expects to meet token and fails to validate 6411 // operand. This method checks if we are given immediate operand but expect to 6412 // get corresponding token. 6413 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6414 switch (Kind) { 6415 case MCK_addr64: 6416 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6417 case MCK_gds: 6418 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6419 case MCK_lds: 6420 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6421 case MCK_glc: 6422 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6423 case MCK_idxen: 6424 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6425 case MCK_offen: 6426 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6427 case MCK_SSrcB32: 6428 // When operands have expression values, they will return true for isToken, 6429 // because it is not possible to distinguish between a token and an 6430 // expression at parse time. MatchInstructionImpl() will always try to 6431 // match an operand as a token, when isToken returns true, and when the 6432 // name of the expression is not a valid token, the match will fail, 6433 // so we need to handle it here. 6434 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6435 case MCK_SSrcF32: 6436 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6437 case MCK_SoppBrTarget: 6438 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6439 case MCK_VReg32OrOff: 6440 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6441 case MCK_InterpSlot: 6442 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6443 case MCK_Attr: 6444 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6445 case MCK_AttrChan: 6446 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6447 default: 6448 return Match_InvalidOperand; 6449 } 6450 } 6451 6452 //===----------------------------------------------------------------------===// 6453 // endpgm 6454 //===----------------------------------------------------------------------===// 6455 6456 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6457 SMLoc S = Parser.getTok().getLoc(); 6458 int64_t Imm = 0; 6459 6460 if (!parseExpr(Imm)) { 6461 // The operand is optional, if not present default to 0 6462 Imm = 0; 6463 } 6464 6465 if (!isUInt<16>(Imm)) { 6466 Error(S, "expected a 16-bit value"); 6467 return MatchOperand_ParseFail; 6468 } 6469 6470 Operands.push_back( 6471 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6472 return MatchOperand_Success; 6473 } 6474 6475 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6476