1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth); 1092 1093 bool isRegister(); 1094 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1095 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1096 void initializeGprCountSymbol(RegisterKind RegKind); 1097 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1098 unsigned RegWidth); 1099 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1100 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1101 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1102 bool IsGdsHardcoded); 1103 1104 public: 1105 enum AMDGPUMatchResultTy { 1106 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1107 }; 1108 enum OperandMode { 1109 OperandMode_Default, 1110 OperandMode_NSA, 1111 }; 1112 1113 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1114 1115 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1116 const MCInstrInfo &MII, 1117 const MCTargetOptions &Options) 1118 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1119 MCAsmParserExtension::Initialize(Parser); 1120 1121 if (getFeatureBits().none()) { 1122 // Set default features. 1123 copySTI().ToggleFeature("southern-islands"); 1124 } 1125 1126 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1127 1128 { 1129 // TODO: make those pre-defined variables read-only. 1130 // Currently there is none suitable machinery in the core llvm-mc for this. 1131 // MCSymbol::isRedefinable is intended for another purpose, and 1132 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1133 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1134 MCContext &Ctx = getContext(); 1135 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1136 MCSymbol *Sym = 1137 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1138 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1139 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1140 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1141 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1143 } else { 1144 MCSymbol *Sym = 1145 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1147 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1149 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1151 } 1152 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1153 initializeGprCountSymbol(IS_VGPR); 1154 initializeGprCountSymbol(IS_SGPR); 1155 } else 1156 KernelScope.initialize(getContext()); 1157 } 1158 } 1159 1160 bool hasXNACK() const { 1161 return AMDGPU::hasXNACK(getSTI()); 1162 } 1163 1164 bool hasMIMG_R128() const { 1165 return AMDGPU::hasMIMG_R128(getSTI()); 1166 } 1167 1168 bool hasPackedD16() const { 1169 return AMDGPU::hasPackedD16(getSTI()); 1170 } 1171 1172 bool hasGFX10A16() const { 1173 return AMDGPU::hasGFX10A16(getSTI()); 1174 } 1175 1176 bool isSI() const { 1177 return AMDGPU::isSI(getSTI()); 1178 } 1179 1180 bool isCI() const { 1181 return AMDGPU::isCI(getSTI()); 1182 } 1183 1184 bool isVI() const { 1185 return AMDGPU::isVI(getSTI()); 1186 } 1187 1188 bool isGFX9() const { 1189 return AMDGPU::isGFX9(getSTI()); 1190 } 1191 1192 bool isGFX10() const { 1193 return AMDGPU::isGFX10(getSTI()); 1194 } 1195 1196 bool isGFX10_BEncoding() const { 1197 return AMDGPU::isGFX10_BEncoding(getSTI()); 1198 } 1199 1200 bool hasInv2PiInlineImm() const { 1201 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1202 } 1203 1204 bool hasFlatOffsets() const { 1205 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1206 } 1207 1208 bool hasSGPR102_SGPR103() const { 1209 return !isVI() && !isGFX9(); 1210 } 1211 1212 bool hasSGPR104_SGPR105() const { 1213 return isGFX10(); 1214 } 1215 1216 bool hasIntClamp() const { 1217 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1218 } 1219 1220 AMDGPUTargetStreamer &getTargetStreamer() { 1221 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1222 return static_cast<AMDGPUTargetStreamer &>(TS); 1223 } 1224 1225 const MCRegisterInfo *getMRI() const { 1226 // We need this const_cast because for some reason getContext() is not const 1227 // in MCAsmParser. 1228 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1229 } 1230 1231 const MCInstrInfo *getMII() const { 1232 return &MII; 1233 } 1234 1235 const FeatureBitset &getFeatureBits() const { 1236 return getSTI().getFeatureBits(); 1237 } 1238 1239 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1240 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1241 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1242 1243 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1244 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1245 bool isForcedDPP() const { return ForcedDPP; } 1246 bool isForcedSDWA() const { return ForcedSDWA; } 1247 ArrayRef<unsigned> getMatchedVariants() const; 1248 1249 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1250 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1251 bool RestoreOnFailure); 1252 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1253 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1254 SMLoc &EndLoc) override; 1255 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1256 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1257 unsigned Kind) override; 1258 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1259 OperandVector &Operands, MCStreamer &Out, 1260 uint64_t &ErrorInfo, 1261 bool MatchingInlineAsm) override; 1262 bool ParseDirective(AsmToken DirectiveID) override; 1263 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1264 OperandMode Mode = OperandMode_Default); 1265 StringRef parseMnemonicSuffix(StringRef Name); 1266 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1267 SMLoc NameLoc, OperandVector &Operands) override; 1268 //bool ProcessInstruction(MCInst &Inst); 1269 1270 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1271 1272 OperandMatchResultTy 1273 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1274 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1275 bool (*ConvertResult)(int64_t &) = nullptr); 1276 1277 OperandMatchResultTy 1278 parseOperandArrayWithPrefix(const char *Prefix, 1279 OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t&) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseNamedBit(const char *Name, OperandVector &Operands, 1285 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1286 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1287 StringRef &Value); 1288 1289 bool isModifier(); 1290 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1291 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1292 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1293 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1294 bool parseSP3NegModifier(); 1295 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1296 OperandMatchResultTy parseReg(OperandVector &Operands); 1297 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1298 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1299 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1300 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1301 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1302 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1303 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1304 OperandMatchResultTy parseUfmt(int64_t &Format); 1305 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1306 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1307 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1308 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1309 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1310 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1311 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1312 1313 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1314 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1315 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1316 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1317 1318 bool parseCnt(int64_t &IntVal); 1319 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1320 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1321 1322 private: 1323 struct OperandInfoTy { 1324 int64_t Id; 1325 bool IsSymbolic = false; 1326 bool IsDefined = false; 1327 1328 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1329 }; 1330 1331 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1332 bool validateSendMsg(const OperandInfoTy &Msg, 1333 const OperandInfoTy &Op, 1334 const OperandInfoTy &Stream, 1335 const SMLoc Loc); 1336 1337 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1338 bool validateHwreg(const OperandInfoTy &HwReg, 1339 const int64_t Offset, 1340 const int64_t Width, 1341 const SMLoc Loc); 1342 1343 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1344 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1345 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1346 1347 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1348 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1349 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1350 bool validateSOPLiteral(const MCInst &Inst) const; 1351 bool validateConstantBusLimitations(const MCInst &Inst); 1352 bool validateEarlyClobberLimitations(const MCInst &Inst); 1353 bool validateIntClampSupported(const MCInst &Inst); 1354 bool validateMIMGAtomicDMask(const MCInst &Inst); 1355 bool validateMIMGGatherDMask(const MCInst &Inst); 1356 bool validateMovrels(const MCInst &Inst); 1357 bool validateMIMGDataSize(const MCInst &Inst); 1358 bool validateMIMGAddrSize(const MCInst &Inst); 1359 bool validateMIMGD16(const MCInst &Inst); 1360 bool validateMIMGDim(const MCInst &Inst); 1361 bool validateLdsDirect(const MCInst &Inst); 1362 bool validateOpSel(const MCInst &Inst); 1363 bool validateVccOperand(unsigned Reg) const; 1364 bool validateVOP3Literal(const MCInst &Inst) const; 1365 bool validateMAIAccWrite(const MCInst &Inst); 1366 unsigned getConstantBusLimit(unsigned Opcode) const; 1367 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1368 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1369 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1370 1371 bool isId(const StringRef Id) const; 1372 bool isId(const AsmToken &Token, const StringRef Id) const; 1373 bool isToken(const AsmToken::TokenKind Kind) const; 1374 bool trySkipId(const StringRef Id); 1375 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1376 bool trySkipToken(const AsmToken::TokenKind Kind); 1377 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1378 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1379 bool parseId(StringRef &Val, const StringRef ErrMsg); 1380 1381 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1382 AsmToken::TokenKind getTokenKind() const; 1383 bool parseExpr(int64_t &Imm); 1384 bool parseExpr(OperandVector &Operands); 1385 StringRef getTokenStr() const; 1386 AsmToken peekToken(); 1387 AsmToken getToken() const; 1388 SMLoc getLoc() const; 1389 void lex(); 1390 1391 public: 1392 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1393 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1394 1395 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1396 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1397 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1398 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1399 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1400 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1401 1402 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1403 const unsigned MinVal, 1404 const unsigned MaxVal, 1405 const StringRef ErrMsg); 1406 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1407 bool parseSwizzleOffset(int64_t &Imm); 1408 bool parseSwizzleMacro(int64_t &Imm); 1409 bool parseSwizzleQuadPerm(int64_t &Imm); 1410 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1411 bool parseSwizzleBroadcast(int64_t &Imm); 1412 bool parseSwizzleSwap(int64_t &Imm); 1413 bool parseSwizzleReverse(int64_t &Imm); 1414 1415 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1416 int64_t parseGPRIdxMacro(); 1417 1418 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1419 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1420 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1421 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1422 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1423 1424 AMDGPUOperand::Ptr defaultDLC() const; 1425 AMDGPUOperand::Ptr defaultGLC() const; 1426 AMDGPUOperand::Ptr defaultSLC() const; 1427 1428 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1429 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1430 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1431 AMDGPUOperand::Ptr defaultFlatOffset() const; 1432 1433 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1434 1435 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1436 OptionalImmIndexMap &OptionalIdx); 1437 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1438 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1439 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1440 1441 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1442 1443 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1444 bool IsAtomic = false); 1445 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1446 1447 OperandMatchResultTy parseDim(OperandVector &Operands); 1448 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1449 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1450 AMDGPUOperand::Ptr defaultRowMask() const; 1451 AMDGPUOperand::Ptr defaultBankMask() const; 1452 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1453 AMDGPUOperand::Ptr defaultFI() const; 1454 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1455 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1456 1457 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1458 AMDGPUOperand::ImmTy Type); 1459 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1460 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1461 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1462 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1463 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1464 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1465 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1466 uint64_t BasicInstType, 1467 bool SkipDstVcc = false, 1468 bool SkipSrcVcc = false); 1469 1470 AMDGPUOperand::Ptr defaultBLGP() const; 1471 AMDGPUOperand::Ptr defaultCBSZ() const; 1472 AMDGPUOperand::Ptr defaultABID() const; 1473 1474 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1475 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1476 }; 1477 1478 struct OptionalOperand { 1479 const char *Name; 1480 AMDGPUOperand::ImmTy Type; 1481 bool IsBit; 1482 bool (*ConvertResult)(int64_t&); 1483 }; 1484 1485 } // end anonymous namespace 1486 1487 // May be called with integer type with equivalent bitwidth. 1488 static const fltSemantics *getFltSemantics(unsigned Size) { 1489 switch (Size) { 1490 case 4: 1491 return &APFloat::IEEEsingle(); 1492 case 8: 1493 return &APFloat::IEEEdouble(); 1494 case 2: 1495 return &APFloat::IEEEhalf(); 1496 default: 1497 llvm_unreachable("unsupported fp type"); 1498 } 1499 } 1500 1501 static const fltSemantics *getFltSemantics(MVT VT) { 1502 return getFltSemantics(VT.getSizeInBits() / 8); 1503 } 1504 1505 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1506 switch (OperandType) { 1507 case AMDGPU::OPERAND_REG_IMM_INT32: 1508 case AMDGPU::OPERAND_REG_IMM_FP32: 1509 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1510 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1511 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1512 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1513 return &APFloat::IEEEsingle(); 1514 case AMDGPU::OPERAND_REG_IMM_INT64: 1515 case AMDGPU::OPERAND_REG_IMM_FP64: 1516 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1517 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1518 return &APFloat::IEEEdouble(); 1519 case AMDGPU::OPERAND_REG_IMM_INT16: 1520 case AMDGPU::OPERAND_REG_IMM_FP16: 1521 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1522 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1523 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1524 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1525 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1528 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1529 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1530 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1531 return &APFloat::IEEEhalf(); 1532 default: 1533 llvm_unreachable("unsupported fp type"); 1534 } 1535 } 1536 1537 //===----------------------------------------------------------------------===// 1538 // Operand 1539 //===----------------------------------------------------------------------===// 1540 1541 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1542 bool Lost; 1543 1544 // Convert literal to single precision 1545 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1546 APFloat::rmNearestTiesToEven, 1547 &Lost); 1548 // We allow precision lost but not overflow or underflow 1549 if (Status != APFloat::opOK && 1550 Lost && 1551 ((Status & APFloat::opOverflow) != 0 || 1552 (Status & APFloat::opUnderflow) != 0)) { 1553 return false; 1554 } 1555 1556 return true; 1557 } 1558 1559 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1560 return isUIntN(Size, Val) || isIntN(Size, Val); 1561 } 1562 1563 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1564 if (VT.getScalarType() == MVT::i16) { 1565 // FP immediate values are broken. 1566 return isInlinableIntLiteral(Val); 1567 } 1568 1569 // f16/v2f16 operands work correctly for all values. 1570 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1571 } 1572 1573 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1574 1575 // This is a hack to enable named inline values like 1576 // shared_base with both 32-bit and 64-bit operands. 1577 // Note that these values are defined as 1578 // 32-bit operands only. 1579 if (isInlineValue()) { 1580 return true; 1581 } 1582 1583 if (!isImmTy(ImmTyNone)) { 1584 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1585 return false; 1586 } 1587 // TODO: We should avoid using host float here. It would be better to 1588 // check the float bit values which is what a few other places do. 1589 // We've had bot failures before due to weird NaN support on mips hosts. 1590 1591 APInt Literal(64, Imm.Val); 1592 1593 if (Imm.IsFPImm) { // We got fp literal token 1594 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1595 return AMDGPU::isInlinableLiteral64(Imm.Val, 1596 AsmParser->hasInv2PiInlineImm()); 1597 } 1598 1599 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1600 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1601 return false; 1602 1603 if (type.getScalarSizeInBits() == 16) { 1604 return isInlineableLiteralOp16( 1605 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1606 type, AsmParser->hasInv2PiInlineImm()); 1607 } 1608 1609 // Check if single precision literal is inlinable 1610 return AMDGPU::isInlinableLiteral32( 1611 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1612 AsmParser->hasInv2PiInlineImm()); 1613 } 1614 1615 // We got int literal token. 1616 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1617 return AMDGPU::isInlinableLiteral64(Imm.Val, 1618 AsmParser->hasInv2PiInlineImm()); 1619 } 1620 1621 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1622 return false; 1623 } 1624 1625 if (type.getScalarSizeInBits() == 16) { 1626 return isInlineableLiteralOp16( 1627 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1628 type, AsmParser->hasInv2PiInlineImm()); 1629 } 1630 1631 return AMDGPU::isInlinableLiteral32( 1632 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1633 AsmParser->hasInv2PiInlineImm()); 1634 } 1635 1636 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1637 // Check that this immediate can be added as literal 1638 if (!isImmTy(ImmTyNone)) { 1639 return false; 1640 } 1641 1642 if (!Imm.IsFPImm) { 1643 // We got int literal token. 1644 1645 if (type == MVT::f64 && hasFPModifiers()) { 1646 // Cannot apply fp modifiers to int literals preserving the same semantics 1647 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1648 // disable these cases. 1649 return false; 1650 } 1651 1652 unsigned Size = type.getSizeInBits(); 1653 if (Size == 64) 1654 Size = 32; 1655 1656 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1657 // types. 1658 return isSafeTruncation(Imm.Val, Size); 1659 } 1660 1661 // We got fp literal token 1662 if (type == MVT::f64) { // Expected 64-bit fp operand 1663 // We would set low 64-bits of literal to zeroes but we accept this literals 1664 return true; 1665 } 1666 1667 if (type == MVT::i64) { // Expected 64-bit int operand 1668 // We don't allow fp literals in 64-bit integer instructions. It is 1669 // unclear how we should encode them. 1670 return false; 1671 } 1672 1673 // We allow fp literals with f16x2 operands assuming that the specified 1674 // literal goes into the lower half and the upper half is zero. We also 1675 // require that the literal may be losslesly converted to f16. 1676 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1677 (type == MVT::v2i16)? MVT::i16 : type; 1678 1679 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1680 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1681 } 1682 1683 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1684 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1685 } 1686 1687 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1688 if (AsmParser->isVI()) 1689 return isVReg32(); 1690 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1691 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1692 else 1693 return false; 1694 } 1695 1696 bool AMDGPUOperand::isSDWAFP16Operand() const { 1697 return isSDWAOperand(MVT::f16); 1698 } 1699 1700 bool AMDGPUOperand::isSDWAFP32Operand() const { 1701 return isSDWAOperand(MVT::f32); 1702 } 1703 1704 bool AMDGPUOperand::isSDWAInt16Operand() const { 1705 return isSDWAOperand(MVT::i16); 1706 } 1707 1708 bool AMDGPUOperand::isSDWAInt32Operand() const { 1709 return isSDWAOperand(MVT::i32); 1710 } 1711 1712 bool AMDGPUOperand::isBoolReg() const { 1713 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1714 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1715 } 1716 1717 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1718 { 1719 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1720 assert(Size == 2 || Size == 4 || Size == 8); 1721 1722 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1723 1724 if (Imm.Mods.Abs) { 1725 Val &= ~FpSignMask; 1726 } 1727 if (Imm.Mods.Neg) { 1728 Val ^= FpSignMask; 1729 } 1730 1731 return Val; 1732 } 1733 1734 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1735 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1736 Inst.getNumOperands())) { 1737 addLiteralImmOperand(Inst, Imm.Val, 1738 ApplyModifiers & 1739 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1740 } else { 1741 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1742 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1743 } 1744 } 1745 1746 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1747 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1748 auto OpNum = Inst.getNumOperands(); 1749 // Check that this operand accepts literals 1750 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1751 1752 if (ApplyModifiers) { 1753 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1754 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1755 Val = applyInputFPModifiers(Val, Size); 1756 } 1757 1758 APInt Literal(64, Val); 1759 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1760 1761 if (Imm.IsFPImm) { // We got fp literal token 1762 switch (OpTy) { 1763 case AMDGPU::OPERAND_REG_IMM_INT64: 1764 case AMDGPU::OPERAND_REG_IMM_FP64: 1765 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1766 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1767 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1768 AsmParser->hasInv2PiInlineImm())) { 1769 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1770 return; 1771 } 1772 1773 // Non-inlineable 1774 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1775 // For fp operands we check if low 32 bits are zeros 1776 if (Literal.getLoBits(32) != 0) { 1777 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1778 "Can't encode literal as exact 64-bit floating-point operand. " 1779 "Low 32-bits will be set to zero"); 1780 } 1781 1782 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1783 return; 1784 } 1785 1786 // We don't allow fp literals in 64-bit integer instructions. It is 1787 // unclear how we should encode them. This case should be checked earlier 1788 // in predicate methods (isLiteralImm()) 1789 llvm_unreachable("fp literal in 64-bit integer instruction."); 1790 1791 case AMDGPU::OPERAND_REG_IMM_INT32: 1792 case AMDGPU::OPERAND_REG_IMM_FP32: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1797 case AMDGPU::OPERAND_REG_IMM_INT16: 1798 case AMDGPU::OPERAND_REG_IMM_FP16: 1799 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1800 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1803 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1807 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1808 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1809 bool lost; 1810 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1811 // Convert literal to single precision 1812 FPLiteral.convert(*getOpFltSemantics(OpTy), 1813 APFloat::rmNearestTiesToEven, &lost); 1814 // We allow precision lost but not overflow or underflow. This should be 1815 // checked earlier in isLiteralImm() 1816 1817 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1818 Inst.addOperand(MCOperand::createImm(ImmVal)); 1819 return; 1820 } 1821 default: 1822 llvm_unreachable("invalid operand size"); 1823 } 1824 1825 return; 1826 } 1827 1828 // We got int literal token. 1829 // Only sign extend inline immediates. 1830 switch (OpTy) { 1831 case AMDGPU::OPERAND_REG_IMM_INT32: 1832 case AMDGPU::OPERAND_REG_IMM_FP32: 1833 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1834 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1835 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1836 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1837 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1838 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1839 if (isSafeTruncation(Val, 32) && 1840 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1841 AsmParser->hasInv2PiInlineImm())) { 1842 Inst.addOperand(MCOperand::createImm(Val)); 1843 return; 1844 } 1845 1846 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1847 return; 1848 1849 case AMDGPU::OPERAND_REG_IMM_INT64: 1850 case AMDGPU::OPERAND_REG_IMM_FP64: 1851 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1852 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1853 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1854 Inst.addOperand(MCOperand::createImm(Val)); 1855 return; 1856 } 1857 1858 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1859 return; 1860 1861 case AMDGPU::OPERAND_REG_IMM_INT16: 1862 case AMDGPU::OPERAND_REG_IMM_FP16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1865 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1867 if (isSafeTruncation(Val, 16) && 1868 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1869 AsmParser->hasInv2PiInlineImm())) { 1870 Inst.addOperand(MCOperand::createImm(Val)); 1871 return; 1872 } 1873 1874 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1875 return; 1876 1877 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1878 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1879 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1881 assert(isSafeTruncation(Val, 16)); 1882 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1883 AsmParser->hasInv2PiInlineImm())); 1884 1885 Inst.addOperand(MCOperand::createImm(Val)); 1886 return; 1887 } 1888 default: 1889 llvm_unreachable("invalid operand size"); 1890 } 1891 } 1892 1893 template <unsigned Bitwidth> 1894 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1895 APInt Literal(64, Imm.Val); 1896 1897 if (!Imm.IsFPImm) { 1898 // We got int literal token. 1899 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1900 return; 1901 } 1902 1903 bool Lost; 1904 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1905 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1906 APFloat::rmNearestTiesToEven, &Lost); 1907 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1908 } 1909 1910 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1911 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1912 } 1913 1914 static bool isInlineValue(unsigned Reg) { 1915 switch (Reg) { 1916 case AMDGPU::SRC_SHARED_BASE: 1917 case AMDGPU::SRC_SHARED_LIMIT: 1918 case AMDGPU::SRC_PRIVATE_BASE: 1919 case AMDGPU::SRC_PRIVATE_LIMIT: 1920 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1921 return true; 1922 case AMDGPU::SRC_VCCZ: 1923 case AMDGPU::SRC_EXECZ: 1924 case AMDGPU::SRC_SCC: 1925 return true; 1926 case AMDGPU::SGPR_NULL: 1927 return true; 1928 default: 1929 return false; 1930 } 1931 } 1932 1933 bool AMDGPUOperand::isInlineValue() const { 1934 return isRegKind() && ::isInlineValue(getReg()); 1935 } 1936 1937 //===----------------------------------------------------------------------===// 1938 // AsmParser 1939 //===----------------------------------------------------------------------===// 1940 1941 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1942 if (Is == IS_VGPR) { 1943 switch (RegWidth) { 1944 default: return -1; 1945 case 1: return AMDGPU::VGPR_32RegClassID; 1946 case 2: return AMDGPU::VReg_64RegClassID; 1947 case 3: return AMDGPU::VReg_96RegClassID; 1948 case 4: return AMDGPU::VReg_128RegClassID; 1949 case 5: return AMDGPU::VReg_160RegClassID; 1950 case 6: return AMDGPU::VReg_192RegClassID; 1951 case 8: return AMDGPU::VReg_256RegClassID; 1952 case 16: return AMDGPU::VReg_512RegClassID; 1953 case 32: return AMDGPU::VReg_1024RegClassID; 1954 } 1955 } else if (Is == IS_TTMP) { 1956 switch (RegWidth) { 1957 default: return -1; 1958 case 1: return AMDGPU::TTMP_32RegClassID; 1959 case 2: return AMDGPU::TTMP_64RegClassID; 1960 case 4: return AMDGPU::TTMP_128RegClassID; 1961 case 8: return AMDGPU::TTMP_256RegClassID; 1962 case 16: return AMDGPU::TTMP_512RegClassID; 1963 } 1964 } else if (Is == IS_SGPR) { 1965 switch (RegWidth) { 1966 default: return -1; 1967 case 1: return AMDGPU::SGPR_32RegClassID; 1968 case 2: return AMDGPU::SGPR_64RegClassID; 1969 case 3: return AMDGPU::SGPR_96RegClassID; 1970 case 4: return AMDGPU::SGPR_128RegClassID; 1971 case 5: return AMDGPU::SGPR_160RegClassID; 1972 case 6: return AMDGPU::SGPR_192RegClassID; 1973 case 8: return AMDGPU::SGPR_256RegClassID; 1974 case 16: return AMDGPU::SGPR_512RegClassID; 1975 } 1976 } else if (Is == IS_AGPR) { 1977 switch (RegWidth) { 1978 default: return -1; 1979 case 1: return AMDGPU::AGPR_32RegClassID; 1980 case 2: return AMDGPU::AReg_64RegClassID; 1981 case 3: return AMDGPU::AReg_96RegClassID; 1982 case 4: return AMDGPU::AReg_128RegClassID; 1983 case 5: return AMDGPU::AReg_160RegClassID; 1984 case 6: return AMDGPU::AReg_192RegClassID; 1985 case 8: return AMDGPU::AReg_256RegClassID; 1986 case 16: return AMDGPU::AReg_512RegClassID; 1987 case 32: return AMDGPU::AReg_1024RegClassID; 1988 } 1989 } 1990 return -1; 1991 } 1992 1993 static unsigned getSpecialRegForName(StringRef RegName) { 1994 return StringSwitch<unsigned>(RegName) 1995 .Case("exec", AMDGPU::EXEC) 1996 .Case("vcc", AMDGPU::VCC) 1997 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1998 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1999 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2000 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2001 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2002 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2003 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2004 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2005 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2006 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2007 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2008 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2009 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2010 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2011 .Case("m0", AMDGPU::M0) 2012 .Case("vccz", AMDGPU::SRC_VCCZ) 2013 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2014 .Case("execz", AMDGPU::SRC_EXECZ) 2015 .Case("src_execz", AMDGPU::SRC_EXECZ) 2016 .Case("scc", AMDGPU::SRC_SCC) 2017 .Case("src_scc", AMDGPU::SRC_SCC) 2018 .Case("tba", AMDGPU::TBA) 2019 .Case("tma", AMDGPU::TMA) 2020 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2021 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2022 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2023 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2024 .Case("vcc_lo", AMDGPU::VCC_LO) 2025 .Case("vcc_hi", AMDGPU::VCC_HI) 2026 .Case("exec_lo", AMDGPU::EXEC_LO) 2027 .Case("exec_hi", AMDGPU::EXEC_HI) 2028 .Case("tma_lo", AMDGPU::TMA_LO) 2029 .Case("tma_hi", AMDGPU::TMA_HI) 2030 .Case("tba_lo", AMDGPU::TBA_LO) 2031 .Case("tba_hi", AMDGPU::TBA_HI) 2032 .Case("pc", AMDGPU::PC_REG) 2033 .Case("null", AMDGPU::SGPR_NULL) 2034 .Default(AMDGPU::NoRegister); 2035 } 2036 2037 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2038 SMLoc &EndLoc, bool RestoreOnFailure) { 2039 auto R = parseRegister(); 2040 if (!R) return true; 2041 assert(R->isReg()); 2042 RegNo = R->getReg(); 2043 StartLoc = R->getStartLoc(); 2044 EndLoc = R->getEndLoc(); 2045 return false; 2046 } 2047 2048 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2049 SMLoc &EndLoc) { 2050 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2051 } 2052 2053 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2054 SMLoc &StartLoc, 2055 SMLoc &EndLoc) { 2056 bool Result = 2057 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2058 bool PendingErrors = getParser().hasPendingError(); 2059 getParser().clearPendingErrors(); 2060 if (PendingErrors) 2061 return MatchOperand_ParseFail; 2062 if (Result) 2063 return MatchOperand_NoMatch; 2064 return MatchOperand_Success; 2065 } 2066 2067 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2068 RegisterKind RegKind, unsigned Reg1) { 2069 switch (RegKind) { 2070 case IS_SPECIAL: 2071 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2072 Reg = AMDGPU::EXEC; 2073 RegWidth = 2; 2074 return true; 2075 } 2076 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2077 Reg = AMDGPU::FLAT_SCR; 2078 RegWidth = 2; 2079 return true; 2080 } 2081 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2082 Reg = AMDGPU::XNACK_MASK; 2083 RegWidth = 2; 2084 return true; 2085 } 2086 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2087 Reg = AMDGPU::VCC; 2088 RegWidth = 2; 2089 return true; 2090 } 2091 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2092 Reg = AMDGPU::TBA; 2093 RegWidth = 2; 2094 return true; 2095 } 2096 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2097 Reg = AMDGPU::TMA; 2098 RegWidth = 2; 2099 return true; 2100 } 2101 return false; 2102 case IS_VGPR: 2103 case IS_SGPR: 2104 case IS_AGPR: 2105 case IS_TTMP: 2106 if (Reg1 != Reg + RegWidth) { 2107 return false; 2108 } 2109 RegWidth++; 2110 return true; 2111 default: 2112 llvm_unreachable("unexpected register kind"); 2113 } 2114 } 2115 2116 struct RegInfo { 2117 StringLiteral Name; 2118 RegisterKind Kind; 2119 }; 2120 2121 static constexpr RegInfo RegularRegisters[] = { 2122 {{"v"}, IS_VGPR}, 2123 {{"s"}, IS_SGPR}, 2124 {{"ttmp"}, IS_TTMP}, 2125 {{"acc"}, IS_AGPR}, 2126 {{"a"}, IS_AGPR}, 2127 }; 2128 2129 static bool isRegularReg(RegisterKind Kind) { 2130 return Kind == IS_VGPR || 2131 Kind == IS_SGPR || 2132 Kind == IS_TTMP || 2133 Kind == IS_AGPR; 2134 } 2135 2136 static const RegInfo* getRegularRegInfo(StringRef Str) { 2137 for (const RegInfo &Reg : RegularRegisters) 2138 if (Str.startswith(Reg.Name)) 2139 return &Reg; 2140 return nullptr; 2141 } 2142 2143 static bool getRegNum(StringRef Str, unsigned& Num) { 2144 return !Str.getAsInteger(10, Num); 2145 } 2146 2147 bool 2148 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2149 const AsmToken &NextToken) const { 2150 2151 // A list of consecutive registers: [s0,s1,s2,s3] 2152 if (Token.is(AsmToken::LBrac)) 2153 return true; 2154 2155 if (!Token.is(AsmToken::Identifier)) 2156 return false; 2157 2158 // A single register like s0 or a range of registers like s[0:1] 2159 2160 StringRef Str = Token.getString(); 2161 const RegInfo *Reg = getRegularRegInfo(Str); 2162 if (Reg) { 2163 StringRef RegName = Reg->Name; 2164 StringRef RegSuffix = Str.substr(RegName.size()); 2165 if (!RegSuffix.empty()) { 2166 unsigned Num; 2167 // A single register with an index: rXX 2168 if (getRegNum(RegSuffix, Num)) 2169 return true; 2170 } else { 2171 // A range of registers: r[XX:YY]. 2172 if (NextToken.is(AsmToken::LBrac)) 2173 return true; 2174 } 2175 } 2176 2177 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2178 } 2179 2180 bool 2181 AMDGPUAsmParser::isRegister() 2182 { 2183 return isRegister(getToken(), peekToken()); 2184 } 2185 2186 unsigned 2187 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2188 unsigned RegNum, 2189 unsigned RegWidth) { 2190 2191 assert(isRegularReg(RegKind)); 2192 2193 unsigned AlignSize = 1; 2194 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2195 // SGPR and TTMP registers must be aligned. 2196 // Max required alignment is 4 dwords. 2197 AlignSize = std::min(RegWidth, 4u); 2198 } 2199 2200 if (RegNum % AlignSize != 0) 2201 return AMDGPU::NoRegister; 2202 2203 unsigned RegIdx = RegNum / AlignSize; 2204 int RCID = getRegClass(RegKind, RegWidth); 2205 if (RCID == -1) 2206 return AMDGPU::NoRegister; 2207 2208 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2209 const MCRegisterClass RC = TRI->getRegClass(RCID); 2210 if (RegIdx >= RC.getNumRegs()) 2211 return AMDGPU::NoRegister; 2212 2213 return RC.getRegister(RegIdx); 2214 } 2215 2216 bool 2217 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2218 int64_t RegLo, RegHi; 2219 if (!trySkipToken(AsmToken::LBrac)) 2220 return false; 2221 2222 if (!parseExpr(RegLo)) 2223 return false; 2224 2225 if (trySkipToken(AsmToken::Colon)) { 2226 if (!parseExpr(RegHi)) 2227 return false; 2228 } else { 2229 RegHi = RegLo; 2230 } 2231 2232 if (!trySkipToken(AsmToken::RBrac)) 2233 return false; 2234 2235 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2236 return false; 2237 2238 Num = static_cast<unsigned>(RegLo); 2239 Width = (RegHi - RegLo) + 1; 2240 return true; 2241 } 2242 2243 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2244 unsigned &RegNum, unsigned &RegWidth, 2245 SmallVectorImpl<AsmToken> &Tokens) { 2246 assert(isToken(AsmToken::Identifier)); 2247 unsigned Reg = getSpecialRegForName(getTokenStr()); 2248 if (Reg) { 2249 RegNum = 0; 2250 RegWidth = 1; 2251 RegKind = IS_SPECIAL; 2252 Tokens.push_back(getToken()); 2253 lex(); // skip register name 2254 } 2255 return Reg; 2256 } 2257 2258 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2259 unsigned &RegNum, unsigned &RegWidth, 2260 SmallVectorImpl<AsmToken> &Tokens) { 2261 assert(isToken(AsmToken::Identifier)); 2262 StringRef RegName = getTokenStr(); 2263 2264 const RegInfo *RI = getRegularRegInfo(RegName); 2265 if (!RI) 2266 return AMDGPU::NoRegister; 2267 Tokens.push_back(getToken()); 2268 lex(); // skip register name 2269 2270 RegKind = RI->Kind; 2271 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2272 if (!RegSuffix.empty()) { 2273 // Single 32-bit register: vXX. 2274 if (!getRegNum(RegSuffix, RegNum)) 2275 return AMDGPU::NoRegister; 2276 RegWidth = 1; 2277 } else { 2278 // Range of registers: v[XX:YY]. ":YY" is optional. 2279 if (!ParseRegRange(RegNum, RegWidth)) 2280 return AMDGPU::NoRegister; 2281 } 2282 2283 return getRegularReg(RegKind, RegNum, RegWidth); 2284 } 2285 2286 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2287 unsigned &RegWidth, 2288 SmallVectorImpl<AsmToken> &Tokens) { 2289 unsigned Reg = AMDGPU::NoRegister; 2290 2291 if (!trySkipToken(AsmToken::LBrac)) 2292 return AMDGPU::NoRegister; 2293 2294 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2295 2296 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2297 return AMDGPU::NoRegister; 2298 if (RegWidth != 1) 2299 return AMDGPU::NoRegister; 2300 2301 for (; trySkipToken(AsmToken::Comma); ) { 2302 RegisterKind NextRegKind; 2303 unsigned NextReg, NextRegNum, NextRegWidth; 2304 2305 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2306 Tokens)) 2307 return AMDGPU::NoRegister; 2308 if (NextRegWidth != 1) 2309 return AMDGPU::NoRegister; 2310 if (NextRegKind != RegKind) 2311 return AMDGPU::NoRegister; 2312 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2313 return AMDGPU::NoRegister; 2314 } 2315 2316 if (!trySkipToken(AsmToken::RBrac)) 2317 return AMDGPU::NoRegister; 2318 2319 if (isRegularReg(RegKind)) 2320 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2321 2322 return Reg; 2323 } 2324 2325 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2326 unsigned &RegNum, unsigned &RegWidth, 2327 SmallVectorImpl<AsmToken> &Tokens) { 2328 Reg = AMDGPU::NoRegister; 2329 2330 if (isToken(AsmToken::Identifier)) { 2331 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2332 if (Reg == AMDGPU::NoRegister) 2333 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2334 } else { 2335 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2336 } 2337 2338 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2339 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2340 } 2341 2342 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2343 unsigned &RegNum, unsigned &RegWidth, 2344 bool RestoreOnFailure) { 2345 Reg = AMDGPU::NoRegister; 2346 2347 SmallVector<AsmToken, 1> Tokens; 2348 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2349 if (RestoreOnFailure) { 2350 while (!Tokens.empty()) { 2351 getLexer().UnLex(Tokens.pop_back_val()); 2352 } 2353 } 2354 return true; 2355 } 2356 return false; 2357 } 2358 2359 Optional<StringRef> 2360 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2361 switch (RegKind) { 2362 case IS_VGPR: 2363 return StringRef(".amdgcn.next_free_vgpr"); 2364 case IS_SGPR: 2365 return StringRef(".amdgcn.next_free_sgpr"); 2366 default: 2367 return None; 2368 } 2369 } 2370 2371 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2372 auto SymbolName = getGprCountSymbolName(RegKind); 2373 assert(SymbolName && "initializing invalid register kind"); 2374 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2375 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2376 } 2377 2378 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2379 unsigned DwordRegIndex, 2380 unsigned RegWidth) { 2381 // Symbols are only defined for GCN targets 2382 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2383 return true; 2384 2385 auto SymbolName = getGprCountSymbolName(RegKind); 2386 if (!SymbolName) 2387 return true; 2388 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2389 2390 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2391 int64_t OldCount; 2392 2393 if (!Sym->isVariable()) 2394 return !Error(getParser().getTok().getLoc(), 2395 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2396 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2397 return !Error( 2398 getParser().getTok().getLoc(), 2399 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2400 2401 if (OldCount <= NewMax) 2402 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2403 2404 return true; 2405 } 2406 2407 std::unique_ptr<AMDGPUOperand> 2408 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2409 const auto &Tok = Parser.getTok(); 2410 SMLoc StartLoc = Tok.getLoc(); 2411 SMLoc EndLoc = Tok.getEndLoc(); 2412 RegisterKind RegKind; 2413 unsigned Reg, RegNum, RegWidth; 2414 2415 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2416 //FIXME: improve error messages (bug 41303). 2417 Error(StartLoc, "not a valid operand."); 2418 return nullptr; 2419 } 2420 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2421 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2422 return nullptr; 2423 } else 2424 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2425 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2426 } 2427 2428 OperandMatchResultTy 2429 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2430 // TODO: add syntactic sugar for 1/(2*PI) 2431 2432 assert(!isRegister()); 2433 assert(!isModifier()); 2434 2435 const auto& Tok = getToken(); 2436 const auto& NextTok = peekToken(); 2437 bool IsReal = Tok.is(AsmToken::Real); 2438 SMLoc S = getLoc(); 2439 bool Negate = false; 2440 2441 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2442 lex(); 2443 IsReal = true; 2444 Negate = true; 2445 } 2446 2447 if (IsReal) { 2448 // Floating-point expressions are not supported. 2449 // Can only allow floating-point literals with an 2450 // optional sign. 2451 2452 StringRef Num = getTokenStr(); 2453 lex(); 2454 2455 APFloat RealVal(APFloat::IEEEdouble()); 2456 auto roundMode = APFloat::rmNearestTiesToEven; 2457 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2458 return MatchOperand_ParseFail; 2459 } 2460 if (Negate) 2461 RealVal.changeSign(); 2462 2463 Operands.push_back( 2464 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2465 AMDGPUOperand::ImmTyNone, true)); 2466 2467 return MatchOperand_Success; 2468 2469 } else { 2470 int64_t IntVal; 2471 const MCExpr *Expr; 2472 SMLoc S = getLoc(); 2473 2474 if (HasSP3AbsModifier) { 2475 // This is a workaround for handling expressions 2476 // as arguments of SP3 'abs' modifier, for example: 2477 // |1.0| 2478 // |-1| 2479 // |1+x| 2480 // This syntax is not compatible with syntax of standard 2481 // MC expressions (due to the trailing '|'). 2482 SMLoc EndLoc; 2483 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2484 return MatchOperand_ParseFail; 2485 } else { 2486 if (Parser.parseExpression(Expr)) 2487 return MatchOperand_ParseFail; 2488 } 2489 2490 if (Expr->evaluateAsAbsolute(IntVal)) { 2491 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2492 } else { 2493 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2494 } 2495 2496 return MatchOperand_Success; 2497 } 2498 2499 return MatchOperand_NoMatch; 2500 } 2501 2502 OperandMatchResultTy 2503 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2504 if (!isRegister()) 2505 return MatchOperand_NoMatch; 2506 2507 if (auto R = parseRegister()) { 2508 assert(R->isReg()); 2509 Operands.push_back(std::move(R)); 2510 return MatchOperand_Success; 2511 } 2512 return MatchOperand_ParseFail; 2513 } 2514 2515 OperandMatchResultTy 2516 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2517 auto res = parseReg(Operands); 2518 if (res != MatchOperand_NoMatch) { 2519 return res; 2520 } else if (isModifier()) { 2521 return MatchOperand_NoMatch; 2522 } else { 2523 return parseImm(Operands, HasSP3AbsMod); 2524 } 2525 } 2526 2527 bool 2528 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2529 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2530 const auto &str = Token.getString(); 2531 return str == "abs" || str == "neg" || str == "sext"; 2532 } 2533 return false; 2534 } 2535 2536 bool 2537 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2538 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2539 } 2540 2541 bool 2542 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2543 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2544 } 2545 2546 bool 2547 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2548 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2549 } 2550 2551 // Check if this is an operand modifier or an opcode modifier 2552 // which may look like an expression but it is not. We should 2553 // avoid parsing these modifiers as expressions. Currently 2554 // recognized sequences are: 2555 // |...| 2556 // abs(...) 2557 // neg(...) 2558 // sext(...) 2559 // -reg 2560 // -|...| 2561 // -abs(...) 2562 // name:... 2563 // Note that simple opcode modifiers like 'gds' may be parsed as 2564 // expressions; this is a special case. See getExpressionAsToken. 2565 // 2566 bool 2567 AMDGPUAsmParser::isModifier() { 2568 2569 AsmToken Tok = getToken(); 2570 AsmToken NextToken[2]; 2571 peekTokens(NextToken); 2572 2573 return isOperandModifier(Tok, NextToken[0]) || 2574 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2575 isOpcodeModifierWithVal(Tok, NextToken[0]); 2576 } 2577 2578 // Check if the current token is an SP3 'neg' modifier. 2579 // Currently this modifier is allowed in the following context: 2580 // 2581 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2582 // 2. Before an 'abs' modifier: -abs(...) 2583 // 3. Before an SP3 'abs' modifier: -|...| 2584 // 2585 // In all other cases "-" is handled as a part 2586 // of an expression that follows the sign. 2587 // 2588 // Note: When "-" is followed by an integer literal, 2589 // this is interpreted as integer negation rather 2590 // than a floating-point NEG modifier applied to N. 2591 // Beside being contr-intuitive, such use of floating-point 2592 // NEG modifier would have resulted in different meaning 2593 // of integer literals used with VOP1/2/C and VOP3, 2594 // for example: 2595 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2596 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2597 // Negative fp literals with preceding "-" are 2598 // handled likewise for unifomtity 2599 // 2600 bool 2601 AMDGPUAsmParser::parseSP3NegModifier() { 2602 2603 AsmToken NextToken[2]; 2604 peekTokens(NextToken); 2605 2606 if (isToken(AsmToken::Minus) && 2607 (isRegister(NextToken[0], NextToken[1]) || 2608 NextToken[0].is(AsmToken::Pipe) || 2609 isId(NextToken[0], "abs"))) { 2610 lex(); 2611 return true; 2612 } 2613 2614 return false; 2615 } 2616 2617 OperandMatchResultTy 2618 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2619 bool AllowImm) { 2620 bool Neg, SP3Neg; 2621 bool Abs, SP3Abs; 2622 SMLoc Loc; 2623 2624 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2625 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2626 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2627 return MatchOperand_ParseFail; 2628 } 2629 2630 SP3Neg = parseSP3NegModifier(); 2631 2632 Loc = getLoc(); 2633 Neg = trySkipId("neg"); 2634 if (Neg && SP3Neg) { 2635 Error(Loc, "expected register or immediate"); 2636 return MatchOperand_ParseFail; 2637 } 2638 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2639 return MatchOperand_ParseFail; 2640 2641 Abs = trySkipId("abs"); 2642 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2643 return MatchOperand_ParseFail; 2644 2645 Loc = getLoc(); 2646 SP3Abs = trySkipToken(AsmToken::Pipe); 2647 if (Abs && SP3Abs) { 2648 Error(Loc, "expected register or immediate"); 2649 return MatchOperand_ParseFail; 2650 } 2651 2652 OperandMatchResultTy Res; 2653 if (AllowImm) { 2654 Res = parseRegOrImm(Operands, SP3Abs); 2655 } else { 2656 Res = parseReg(Operands); 2657 } 2658 if (Res != MatchOperand_Success) { 2659 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2660 } 2661 2662 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2663 return MatchOperand_ParseFail; 2664 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2665 return MatchOperand_ParseFail; 2666 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2667 return MatchOperand_ParseFail; 2668 2669 AMDGPUOperand::Modifiers Mods; 2670 Mods.Abs = Abs || SP3Abs; 2671 Mods.Neg = Neg || SP3Neg; 2672 2673 if (Mods.hasFPModifiers()) { 2674 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2675 if (Op.isExpr()) { 2676 Error(Op.getStartLoc(), "expected an absolute expression"); 2677 return MatchOperand_ParseFail; 2678 } 2679 Op.setModifiers(Mods); 2680 } 2681 return MatchOperand_Success; 2682 } 2683 2684 OperandMatchResultTy 2685 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2686 bool AllowImm) { 2687 bool Sext = trySkipId("sext"); 2688 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2689 return MatchOperand_ParseFail; 2690 2691 OperandMatchResultTy Res; 2692 if (AllowImm) { 2693 Res = parseRegOrImm(Operands); 2694 } else { 2695 Res = parseReg(Operands); 2696 } 2697 if (Res != MatchOperand_Success) { 2698 return Sext? MatchOperand_ParseFail : Res; 2699 } 2700 2701 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2702 return MatchOperand_ParseFail; 2703 2704 AMDGPUOperand::Modifiers Mods; 2705 Mods.Sext = Sext; 2706 2707 if (Mods.hasIntModifiers()) { 2708 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2709 if (Op.isExpr()) { 2710 Error(Op.getStartLoc(), "expected an absolute expression"); 2711 return MatchOperand_ParseFail; 2712 } 2713 Op.setModifiers(Mods); 2714 } 2715 2716 return MatchOperand_Success; 2717 } 2718 2719 OperandMatchResultTy 2720 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2721 return parseRegOrImmWithFPInputMods(Operands, false); 2722 } 2723 2724 OperandMatchResultTy 2725 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2726 return parseRegOrImmWithIntInputMods(Operands, false); 2727 } 2728 2729 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2730 auto Loc = getLoc(); 2731 if (trySkipId("off")) { 2732 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2733 AMDGPUOperand::ImmTyOff, false)); 2734 return MatchOperand_Success; 2735 } 2736 2737 if (!isRegister()) 2738 return MatchOperand_NoMatch; 2739 2740 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2741 if (Reg) { 2742 Operands.push_back(std::move(Reg)); 2743 return MatchOperand_Success; 2744 } 2745 2746 return MatchOperand_ParseFail; 2747 2748 } 2749 2750 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2751 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2752 2753 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2754 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2755 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2756 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2757 return Match_InvalidOperand; 2758 2759 if ((TSFlags & SIInstrFlags::VOP3) && 2760 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2761 getForcedEncodingSize() != 64) 2762 return Match_PreferE32; 2763 2764 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2765 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2766 // v_mac_f32/16 allow only dst_sel == DWORD; 2767 auto OpNum = 2768 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2769 const auto &Op = Inst.getOperand(OpNum); 2770 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2771 return Match_InvalidOperand; 2772 } 2773 } 2774 2775 return Match_Success; 2776 } 2777 2778 // What asm variants we should check 2779 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2780 if (getForcedEncodingSize() == 32) { 2781 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2782 return makeArrayRef(Variants); 2783 } 2784 2785 if (isForcedVOP3()) { 2786 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2787 return makeArrayRef(Variants); 2788 } 2789 2790 if (isForcedSDWA()) { 2791 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2792 AMDGPUAsmVariants::SDWA9}; 2793 return makeArrayRef(Variants); 2794 } 2795 2796 if (isForcedDPP()) { 2797 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2798 return makeArrayRef(Variants); 2799 } 2800 2801 static const unsigned Variants[] = { 2802 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2803 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2804 }; 2805 2806 return makeArrayRef(Variants); 2807 } 2808 2809 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2810 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2811 const unsigned Num = Desc.getNumImplicitUses(); 2812 for (unsigned i = 0; i < Num; ++i) { 2813 unsigned Reg = Desc.ImplicitUses[i]; 2814 switch (Reg) { 2815 case AMDGPU::FLAT_SCR: 2816 case AMDGPU::VCC: 2817 case AMDGPU::VCC_LO: 2818 case AMDGPU::VCC_HI: 2819 case AMDGPU::M0: 2820 return Reg; 2821 default: 2822 break; 2823 } 2824 } 2825 return AMDGPU::NoRegister; 2826 } 2827 2828 // NB: This code is correct only when used to check constant 2829 // bus limitations because GFX7 support no f16 inline constants. 2830 // Note that there are no cases when a GFX7 opcode violates 2831 // constant bus limitations due to the use of an f16 constant. 2832 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2833 unsigned OpIdx) const { 2834 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2835 2836 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2837 return false; 2838 } 2839 2840 const MCOperand &MO = Inst.getOperand(OpIdx); 2841 2842 int64_t Val = MO.getImm(); 2843 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2844 2845 switch (OpSize) { // expected operand size 2846 case 8: 2847 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2848 case 4: 2849 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2850 case 2: { 2851 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2852 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2853 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2854 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2855 return AMDGPU::isInlinableIntLiteral(Val); 2856 2857 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2858 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2859 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2860 return AMDGPU::isInlinableIntLiteralV216(Val); 2861 2862 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2863 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2864 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2865 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2866 2867 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2868 } 2869 default: 2870 llvm_unreachable("invalid operand size"); 2871 } 2872 } 2873 2874 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2875 if (!isGFX10()) 2876 return 1; 2877 2878 switch (Opcode) { 2879 // 64-bit shift instructions can use only one scalar value input 2880 case AMDGPU::V_LSHLREV_B64: 2881 case AMDGPU::V_LSHLREV_B64_gfx10: 2882 case AMDGPU::V_LSHL_B64: 2883 case AMDGPU::V_LSHRREV_B64: 2884 case AMDGPU::V_LSHRREV_B64_gfx10: 2885 case AMDGPU::V_LSHR_B64: 2886 case AMDGPU::V_ASHRREV_I64: 2887 case AMDGPU::V_ASHRREV_I64_gfx10: 2888 case AMDGPU::V_ASHR_I64: 2889 return 1; 2890 default: 2891 return 2; 2892 } 2893 } 2894 2895 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2896 const MCOperand &MO = Inst.getOperand(OpIdx); 2897 if (MO.isImm()) { 2898 return !isInlineConstant(Inst, OpIdx); 2899 } else if (MO.isReg()) { 2900 auto Reg = MO.getReg(); 2901 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2902 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2903 } else { 2904 return true; 2905 } 2906 } 2907 2908 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2909 const unsigned Opcode = Inst.getOpcode(); 2910 const MCInstrDesc &Desc = MII.get(Opcode); 2911 unsigned ConstantBusUseCount = 0; 2912 unsigned NumLiterals = 0; 2913 unsigned LiteralSize; 2914 2915 if (Desc.TSFlags & 2916 (SIInstrFlags::VOPC | 2917 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2918 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2919 SIInstrFlags::SDWA)) { 2920 // Check special imm operands (used by madmk, etc) 2921 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2922 ++ConstantBusUseCount; 2923 } 2924 2925 SmallDenseSet<unsigned> SGPRsUsed; 2926 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2927 if (SGPRUsed != AMDGPU::NoRegister) { 2928 SGPRsUsed.insert(SGPRUsed); 2929 ++ConstantBusUseCount; 2930 } 2931 2932 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2933 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2934 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2935 2936 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2937 2938 for (int OpIdx : OpIndices) { 2939 if (OpIdx == -1) break; 2940 2941 const MCOperand &MO = Inst.getOperand(OpIdx); 2942 if (usesConstantBus(Inst, OpIdx)) { 2943 if (MO.isReg()) { 2944 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2945 // Pairs of registers with a partial intersections like these 2946 // s0, s[0:1] 2947 // flat_scratch_lo, flat_scratch 2948 // flat_scratch_lo, flat_scratch_hi 2949 // are theoretically valid but they are disabled anyway. 2950 // Note that this code mimics SIInstrInfo::verifyInstruction 2951 if (!SGPRsUsed.count(Reg)) { 2952 SGPRsUsed.insert(Reg); 2953 ++ConstantBusUseCount; 2954 } 2955 } else { // Expression or a literal 2956 2957 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2958 continue; // special operand like VINTERP attr_chan 2959 2960 // An instruction may use only one literal. 2961 // This has been validated on the previous step. 2962 // See validateVOP3Literal. 2963 // This literal may be used as more than one operand. 2964 // If all these operands are of the same size, 2965 // this literal counts as one scalar value. 2966 // Otherwise it counts as 2 scalar values. 2967 // See "GFX10 Shader Programming", section 3.6.2.3. 2968 2969 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2970 if (Size < 4) Size = 4; 2971 2972 if (NumLiterals == 0) { 2973 NumLiterals = 1; 2974 LiteralSize = Size; 2975 } else if (LiteralSize != Size) { 2976 NumLiterals = 2; 2977 } 2978 } 2979 } 2980 } 2981 } 2982 ConstantBusUseCount += NumLiterals; 2983 2984 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2985 } 2986 2987 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2988 const unsigned Opcode = Inst.getOpcode(); 2989 const MCInstrDesc &Desc = MII.get(Opcode); 2990 2991 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2992 if (DstIdx == -1 || 2993 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2994 return true; 2995 } 2996 2997 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2998 2999 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3000 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3001 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3002 3003 assert(DstIdx != -1); 3004 const MCOperand &Dst = Inst.getOperand(DstIdx); 3005 assert(Dst.isReg()); 3006 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3007 3008 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3009 3010 for (int SrcIdx : SrcIndices) { 3011 if (SrcIdx == -1) break; 3012 const MCOperand &Src = Inst.getOperand(SrcIdx); 3013 if (Src.isReg()) { 3014 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3015 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3016 return false; 3017 } 3018 } 3019 } 3020 3021 return true; 3022 } 3023 3024 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3025 3026 const unsigned Opc = Inst.getOpcode(); 3027 const MCInstrDesc &Desc = MII.get(Opc); 3028 3029 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3030 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3031 assert(ClampIdx != -1); 3032 return Inst.getOperand(ClampIdx).getImm() == 0; 3033 } 3034 3035 return true; 3036 } 3037 3038 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3039 3040 const unsigned Opc = Inst.getOpcode(); 3041 const MCInstrDesc &Desc = MII.get(Opc); 3042 3043 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3044 return true; 3045 3046 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3047 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3048 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3049 3050 assert(VDataIdx != -1); 3051 assert(DMaskIdx != -1); 3052 assert(TFEIdx != -1); 3053 3054 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3055 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3056 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3057 if (DMask == 0) 3058 DMask = 1; 3059 3060 unsigned DataSize = 3061 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3062 if (hasPackedD16()) { 3063 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3064 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3065 DataSize = (DataSize + 1) / 2; 3066 } 3067 3068 return (VDataSize / 4) == DataSize + TFESize; 3069 } 3070 3071 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3072 const unsigned Opc = Inst.getOpcode(); 3073 const MCInstrDesc &Desc = MII.get(Opc); 3074 3075 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3076 return true; 3077 3078 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3079 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3080 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3081 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3082 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3083 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3084 3085 assert(VAddr0Idx != -1); 3086 assert(SrsrcIdx != -1); 3087 assert(DimIdx != -1); 3088 assert(SrsrcIdx > VAddr0Idx); 3089 3090 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3091 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3092 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3093 unsigned VAddrSize = 3094 IsNSA ? SrsrcIdx - VAddr0Idx 3095 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3096 3097 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3098 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3099 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3100 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3101 if (!IsNSA) { 3102 if (AddrSize > 8) 3103 AddrSize = 16; 3104 else if (AddrSize > 4) 3105 AddrSize = 8; 3106 } 3107 3108 return VAddrSize == AddrSize; 3109 } 3110 3111 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3112 3113 const unsigned Opc = Inst.getOpcode(); 3114 const MCInstrDesc &Desc = MII.get(Opc); 3115 3116 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3117 return true; 3118 if (!Desc.mayLoad() || !Desc.mayStore()) 3119 return true; // Not atomic 3120 3121 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3122 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3123 3124 // This is an incomplete check because image_atomic_cmpswap 3125 // may only use 0x3 and 0xf while other atomic operations 3126 // may use 0x1 and 0x3. However these limitations are 3127 // verified when we check that dmask matches dst size. 3128 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3129 } 3130 3131 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3132 3133 const unsigned Opc = Inst.getOpcode(); 3134 const MCInstrDesc &Desc = MII.get(Opc); 3135 3136 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3137 return true; 3138 3139 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3140 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3141 3142 // GATHER4 instructions use dmask in a different fashion compared to 3143 // other MIMG instructions. The only useful DMASK values are 3144 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3145 // (red,red,red,red) etc.) The ISA document doesn't mention 3146 // this. 3147 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3148 } 3149 3150 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3151 { 3152 switch (Opcode) { 3153 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3154 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3155 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3156 return true; 3157 default: 3158 return false; 3159 } 3160 } 3161 3162 // movrels* opcodes should only allow VGPRS as src0. 3163 // This is specified in .td description for vop1/vop3, 3164 // but sdwa is handled differently. See isSDWAOperand. 3165 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3166 3167 const unsigned Opc = Inst.getOpcode(); 3168 const MCInstrDesc &Desc = MII.get(Opc); 3169 3170 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3171 return true; 3172 3173 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3174 assert(Src0Idx != -1); 3175 3176 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3177 if (!Src0.isReg()) 3178 return false; 3179 3180 auto Reg = Src0.getReg(); 3181 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3182 return !isSGPR(mc2PseudoReg(Reg), TRI); 3183 } 3184 3185 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3186 3187 const unsigned Opc = Inst.getOpcode(); 3188 3189 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3190 return true; 3191 3192 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3193 assert(Src0Idx != -1); 3194 3195 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3196 if (!Src0.isReg()) 3197 return true; 3198 3199 auto Reg = Src0.getReg(); 3200 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3201 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3202 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3203 return false; 3204 } 3205 3206 return true; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 3217 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3218 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3219 if (isCI() || isSI()) 3220 return false; 3221 } 3222 3223 return true; 3224 } 3225 3226 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3227 const unsigned Opc = Inst.getOpcode(); 3228 const MCInstrDesc &Desc = MII.get(Opc); 3229 3230 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3231 return true; 3232 3233 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3234 if (DimIdx < 0) 3235 return true; 3236 3237 long Imm = Inst.getOperand(DimIdx).getImm(); 3238 if (Imm < 0 || Imm >= 8) 3239 return false; 3240 3241 return true; 3242 } 3243 3244 static bool IsRevOpcode(const unsigned Opcode) 3245 { 3246 switch (Opcode) { 3247 case AMDGPU::V_SUBREV_F32_e32: 3248 case AMDGPU::V_SUBREV_F32_e64: 3249 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3250 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3251 case AMDGPU::V_SUBREV_F32_e32_vi: 3252 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3253 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3254 case AMDGPU::V_SUBREV_F32_e64_vi: 3255 3256 case AMDGPU::V_SUBREV_CO_U32_e32: 3257 case AMDGPU::V_SUBREV_CO_U32_e64: 3258 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3259 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3260 3261 case AMDGPU::V_SUBBREV_U32_e32: 3262 case AMDGPU::V_SUBBREV_U32_e64: 3263 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3264 case AMDGPU::V_SUBBREV_U32_e32_vi: 3265 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3266 case AMDGPU::V_SUBBREV_U32_e64_vi: 3267 3268 case AMDGPU::V_SUBREV_U32_e32: 3269 case AMDGPU::V_SUBREV_U32_e64: 3270 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3271 case AMDGPU::V_SUBREV_U32_e32_vi: 3272 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3273 case AMDGPU::V_SUBREV_U32_e64_vi: 3274 3275 case AMDGPU::V_SUBREV_F16_e32: 3276 case AMDGPU::V_SUBREV_F16_e64: 3277 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3278 case AMDGPU::V_SUBREV_F16_e32_vi: 3279 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3280 case AMDGPU::V_SUBREV_F16_e64_vi: 3281 3282 case AMDGPU::V_SUBREV_U16_e32: 3283 case AMDGPU::V_SUBREV_U16_e64: 3284 case AMDGPU::V_SUBREV_U16_e32_vi: 3285 case AMDGPU::V_SUBREV_U16_e64_vi: 3286 3287 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3288 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3289 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3290 3291 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3292 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3293 3294 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3295 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3296 3297 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3298 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3299 3300 case AMDGPU::V_LSHRREV_B32_e32: 3301 case AMDGPU::V_LSHRREV_B32_e64: 3302 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3303 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3304 case AMDGPU::V_LSHRREV_B32_e32_vi: 3305 case AMDGPU::V_LSHRREV_B32_e64_vi: 3306 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3307 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3308 3309 case AMDGPU::V_ASHRREV_I32_e32: 3310 case AMDGPU::V_ASHRREV_I32_e64: 3311 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3312 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3313 case AMDGPU::V_ASHRREV_I32_e32_vi: 3314 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3315 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3316 case AMDGPU::V_ASHRREV_I32_e64_vi: 3317 3318 case AMDGPU::V_LSHLREV_B32_e32: 3319 case AMDGPU::V_LSHLREV_B32_e64: 3320 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3321 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3322 case AMDGPU::V_LSHLREV_B32_e32_vi: 3323 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3324 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3325 case AMDGPU::V_LSHLREV_B32_e64_vi: 3326 3327 case AMDGPU::V_LSHLREV_B16_e32: 3328 case AMDGPU::V_LSHLREV_B16_e64: 3329 case AMDGPU::V_LSHLREV_B16_e32_vi: 3330 case AMDGPU::V_LSHLREV_B16_e64_vi: 3331 case AMDGPU::V_LSHLREV_B16_gfx10: 3332 3333 case AMDGPU::V_LSHRREV_B16_e32: 3334 case AMDGPU::V_LSHRREV_B16_e64: 3335 case AMDGPU::V_LSHRREV_B16_e32_vi: 3336 case AMDGPU::V_LSHRREV_B16_e64_vi: 3337 case AMDGPU::V_LSHRREV_B16_gfx10: 3338 3339 case AMDGPU::V_ASHRREV_I16_e32: 3340 case AMDGPU::V_ASHRREV_I16_e64: 3341 case AMDGPU::V_ASHRREV_I16_e32_vi: 3342 case AMDGPU::V_ASHRREV_I16_e64_vi: 3343 case AMDGPU::V_ASHRREV_I16_gfx10: 3344 3345 case AMDGPU::V_LSHLREV_B64: 3346 case AMDGPU::V_LSHLREV_B64_gfx10: 3347 case AMDGPU::V_LSHLREV_B64_vi: 3348 3349 case AMDGPU::V_LSHRREV_B64: 3350 case AMDGPU::V_LSHRREV_B64_gfx10: 3351 case AMDGPU::V_LSHRREV_B64_vi: 3352 3353 case AMDGPU::V_ASHRREV_I64: 3354 case AMDGPU::V_ASHRREV_I64_gfx10: 3355 case AMDGPU::V_ASHRREV_I64_vi: 3356 3357 case AMDGPU::V_PK_LSHLREV_B16: 3358 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3359 case AMDGPU::V_PK_LSHLREV_B16_vi: 3360 3361 case AMDGPU::V_PK_LSHRREV_B16: 3362 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3363 case AMDGPU::V_PK_LSHRREV_B16_vi: 3364 case AMDGPU::V_PK_ASHRREV_I16: 3365 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3366 case AMDGPU::V_PK_ASHRREV_I16_vi: 3367 return true; 3368 default: 3369 return false; 3370 } 3371 } 3372 3373 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3374 3375 using namespace SIInstrFlags; 3376 const unsigned Opcode = Inst.getOpcode(); 3377 const MCInstrDesc &Desc = MII.get(Opcode); 3378 3379 // lds_direct register is defined so that it can be used 3380 // with 9-bit operands only. Ignore encodings which do not accept these. 3381 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3382 return true; 3383 3384 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3385 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3386 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3387 3388 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3389 3390 // lds_direct cannot be specified as either src1 or src2. 3391 for (int SrcIdx : SrcIndices) { 3392 if (SrcIdx == -1) break; 3393 const MCOperand &Src = Inst.getOperand(SrcIdx); 3394 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3395 return false; 3396 } 3397 } 3398 3399 if (Src0Idx == -1) 3400 return true; 3401 3402 const MCOperand &Src = Inst.getOperand(Src0Idx); 3403 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3404 return true; 3405 3406 // lds_direct is specified as src0. Check additional limitations. 3407 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3408 } 3409 3410 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3411 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3412 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3413 if (Op.isFlatOffset()) 3414 return Op.getStartLoc(); 3415 } 3416 return getLoc(); 3417 } 3418 3419 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3420 const OperandVector &Operands) { 3421 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3422 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3423 return true; 3424 3425 auto Opcode = Inst.getOpcode(); 3426 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3427 assert(OpNum != -1); 3428 3429 const auto &Op = Inst.getOperand(OpNum); 3430 if (!hasFlatOffsets() && Op.getImm() != 0) { 3431 Error(getFlatOffsetLoc(Operands), 3432 "flat offset modifier is not supported on this GPU"); 3433 return false; 3434 } 3435 3436 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3437 // For FLAT segment the offset must be positive; 3438 // MSB is ignored and forced to zero. 3439 unsigned OffsetSize = isGFX9() ? 13 : 12; 3440 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3441 if (!isIntN(OffsetSize, Op.getImm())) { 3442 Error(getFlatOffsetLoc(Operands), 3443 isGFX9() ? "expected a 13-bit signed offset" : 3444 "expected a 12-bit signed offset"); 3445 return false; 3446 } 3447 } else { 3448 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3449 Error(getFlatOffsetLoc(Operands), 3450 isGFX9() ? "expected a 12-bit unsigned offset" : 3451 "expected an 11-bit unsigned offset"); 3452 return false; 3453 } 3454 } 3455 3456 return true; 3457 } 3458 3459 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3460 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3461 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3462 if (Op.isSMEMOffset()) 3463 return Op.getStartLoc(); 3464 } 3465 return getLoc(); 3466 } 3467 3468 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3469 const OperandVector &Operands) { 3470 if (isCI() || isSI()) 3471 return true; 3472 3473 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3474 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3475 return true; 3476 3477 auto Opcode = Inst.getOpcode(); 3478 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3479 if (OpNum == -1) 3480 return true; 3481 3482 const auto &Op = Inst.getOperand(OpNum); 3483 if (!Op.isImm()) 3484 return true; 3485 3486 uint64_t Offset = Op.getImm(); 3487 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3488 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3489 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3490 return true; 3491 3492 Error(getSMEMOffsetLoc(Operands), 3493 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3494 "expected a 21-bit signed offset"); 3495 3496 return false; 3497 } 3498 3499 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3500 unsigned Opcode = Inst.getOpcode(); 3501 const MCInstrDesc &Desc = MII.get(Opcode); 3502 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3503 return true; 3504 3505 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3506 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3507 3508 const int OpIndices[] = { Src0Idx, Src1Idx }; 3509 3510 unsigned NumExprs = 0; 3511 unsigned NumLiterals = 0; 3512 uint32_t LiteralValue; 3513 3514 for (int OpIdx : OpIndices) { 3515 if (OpIdx == -1) break; 3516 3517 const MCOperand &MO = Inst.getOperand(OpIdx); 3518 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3519 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3520 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3521 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3522 if (NumLiterals == 0 || LiteralValue != Value) { 3523 LiteralValue = Value; 3524 ++NumLiterals; 3525 } 3526 } else if (MO.isExpr()) { 3527 ++NumExprs; 3528 } 3529 } 3530 } 3531 3532 return NumLiterals + NumExprs <= 1; 3533 } 3534 3535 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3536 const unsigned Opc = Inst.getOpcode(); 3537 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3538 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3539 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3540 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3541 3542 if (OpSel & ~3) 3543 return false; 3544 } 3545 return true; 3546 } 3547 3548 // Check if VCC register matches wavefront size 3549 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3550 auto FB = getFeatureBits(); 3551 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3552 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3553 } 3554 3555 // VOP3 literal is only allowed in GFX10+ and only one can be used 3556 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3557 unsigned Opcode = Inst.getOpcode(); 3558 const MCInstrDesc &Desc = MII.get(Opcode); 3559 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3560 return true; 3561 3562 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3563 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3564 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3565 3566 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3567 3568 unsigned NumExprs = 0; 3569 unsigned NumLiterals = 0; 3570 uint32_t LiteralValue; 3571 3572 for (int OpIdx : OpIndices) { 3573 if (OpIdx == -1) break; 3574 3575 const MCOperand &MO = Inst.getOperand(OpIdx); 3576 if (!MO.isImm() && !MO.isExpr()) 3577 continue; 3578 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3579 continue; 3580 3581 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3582 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3583 return false; 3584 3585 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3586 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3587 if (NumLiterals == 0 || LiteralValue != Value) { 3588 LiteralValue = Value; 3589 ++NumLiterals; 3590 } 3591 } else if (MO.isExpr()) { 3592 ++NumExprs; 3593 } 3594 } 3595 NumLiterals += NumExprs; 3596 3597 return !NumLiterals || 3598 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3599 } 3600 3601 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3602 const SMLoc &IDLoc, 3603 const OperandVector &Operands) { 3604 if (!validateLdsDirect(Inst)) { 3605 Error(IDLoc, 3606 "invalid use of lds_direct"); 3607 return false; 3608 } 3609 if (!validateSOPLiteral(Inst)) { 3610 Error(IDLoc, 3611 "only one literal operand is allowed"); 3612 return false; 3613 } 3614 if (!validateVOP3Literal(Inst)) { 3615 Error(IDLoc, 3616 "invalid literal operand"); 3617 return false; 3618 } 3619 if (!validateConstantBusLimitations(Inst)) { 3620 Error(IDLoc, 3621 "invalid operand (violates constant bus restrictions)"); 3622 return false; 3623 } 3624 if (!validateEarlyClobberLimitations(Inst)) { 3625 Error(IDLoc, 3626 "destination must be different than all sources"); 3627 return false; 3628 } 3629 if (!validateIntClampSupported(Inst)) { 3630 Error(IDLoc, 3631 "integer clamping is not supported on this GPU"); 3632 return false; 3633 } 3634 if (!validateOpSel(Inst)) { 3635 Error(IDLoc, 3636 "invalid op_sel operand"); 3637 return false; 3638 } 3639 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3640 if (!validateMIMGD16(Inst)) { 3641 Error(IDLoc, 3642 "d16 modifier is not supported on this GPU"); 3643 return false; 3644 } 3645 if (!validateMIMGDim(Inst)) { 3646 Error(IDLoc, "dim modifier is required on this GPU"); 3647 return false; 3648 } 3649 if (!validateMIMGDataSize(Inst)) { 3650 Error(IDLoc, 3651 "image data size does not match dmask and tfe"); 3652 return false; 3653 } 3654 if (!validateMIMGAddrSize(Inst)) { 3655 Error(IDLoc, 3656 "image address size does not match dim and a16"); 3657 return false; 3658 } 3659 if (!validateMIMGAtomicDMask(Inst)) { 3660 Error(IDLoc, 3661 "invalid atomic image dmask"); 3662 return false; 3663 } 3664 if (!validateMIMGGatherDMask(Inst)) { 3665 Error(IDLoc, 3666 "invalid image_gather dmask: only one bit must be set"); 3667 return false; 3668 } 3669 if (!validateMovrels(Inst)) { 3670 Error(IDLoc, "source operand must be a VGPR"); 3671 return false; 3672 } 3673 if (!validateFlatOffset(Inst, Operands)) { 3674 return false; 3675 } 3676 if (!validateSMEMOffset(Inst, Operands)) { 3677 return false; 3678 } 3679 if (!validateMAIAccWrite(Inst)) { 3680 return false; 3681 } 3682 3683 return true; 3684 } 3685 3686 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3687 const FeatureBitset &FBS, 3688 unsigned VariantID = 0); 3689 3690 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3691 OperandVector &Operands, 3692 MCStreamer &Out, 3693 uint64_t &ErrorInfo, 3694 bool MatchingInlineAsm) { 3695 MCInst Inst; 3696 unsigned Result = Match_Success; 3697 for (auto Variant : getMatchedVariants()) { 3698 uint64_t EI; 3699 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3700 Variant); 3701 // We order match statuses from least to most specific. We use most specific 3702 // status as resulting 3703 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3704 if ((R == Match_Success) || 3705 (R == Match_PreferE32) || 3706 (R == Match_MissingFeature && Result != Match_PreferE32) || 3707 (R == Match_InvalidOperand && Result != Match_MissingFeature 3708 && Result != Match_PreferE32) || 3709 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3710 && Result != Match_MissingFeature 3711 && Result != Match_PreferE32)) { 3712 Result = R; 3713 ErrorInfo = EI; 3714 } 3715 if (R == Match_Success) 3716 break; 3717 } 3718 3719 switch (Result) { 3720 default: break; 3721 case Match_Success: 3722 if (!validateInstruction(Inst, IDLoc, Operands)) { 3723 return true; 3724 } 3725 Inst.setLoc(IDLoc); 3726 Out.emitInstruction(Inst, getSTI()); 3727 return false; 3728 3729 case Match_MissingFeature: 3730 return Error(IDLoc, "instruction not supported on this GPU"); 3731 3732 case Match_MnemonicFail: { 3733 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3734 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3735 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3736 return Error(IDLoc, "invalid instruction" + Suggestion, 3737 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3738 } 3739 3740 case Match_InvalidOperand: { 3741 SMLoc ErrorLoc = IDLoc; 3742 if (ErrorInfo != ~0ULL) { 3743 if (ErrorInfo >= Operands.size()) { 3744 return Error(IDLoc, "too few operands for instruction"); 3745 } 3746 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3747 if (ErrorLoc == SMLoc()) 3748 ErrorLoc = IDLoc; 3749 } 3750 return Error(ErrorLoc, "invalid operand for instruction"); 3751 } 3752 3753 case Match_PreferE32: 3754 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3755 "should be encoded as e32"); 3756 } 3757 llvm_unreachable("Implement any new match types added!"); 3758 } 3759 3760 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3761 int64_t Tmp = -1; 3762 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3763 return true; 3764 } 3765 if (getParser().parseAbsoluteExpression(Tmp)) { 3766 return true; 3767 } 3768 Ret = static_cast<uint32_t>(Tmp); 3769 return false; 3770 } 3771 3772 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3773 uint32_t &Minor) { 3774 if (ParseAsAbsoluteExpression(Major)) 3775 return TokError("invalid major version"); 3776 3777 if (getLexer().isNot(AsmToken::Comma)) 3778 return TokError("minor version number required, comma expected"); 3779 Lex(); 3780 3781 if (ParseAsAbsoluteExpression(Minor)) 3782 return TokError("invalid minor version"); 3783 3784 return false; 3785 } 3786 3787 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3788 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3789 return TokError("directive only supported for amdgcn architecture"); 3790 3791 std::string Target; 3792 3793 SMLoc TargetStart = getTok().getLoc(); 3794 if (getParser().parseEscapedString(Target)) 3795 return true; 3796 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3797 3798 std::string ExpectedTarget; 3799 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3800 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3801 3802 if (Target != ExpectedTargetOS.str()) 3803 return getParser().Error(TargetRange.Start, "target must match options", 3804 TargetRange); 3805 3806 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3807 return false; 3808 } 3809 3810 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3811 return getParser().Error(Range.Start, "value out of range", Range); 3812 } 3813 3814 bool AMDGPUAsmParser::calculateGPRBlocks( 3815 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3816 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3817 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3818 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3819 // TODO(scott.linder): These calculations are duplicated from 3820 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3821 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3822 3823 unsigned NumVGPRs = NextFreeVGPR; 3824 unsigned NumSGPRs = NextFreeSGPR; 3825 3826 if (Version.Major >= 10) 3827 NumSGPRs = 0; 3828 else { 3829 unsigned MaxAddressableNumSGPRs = 3830 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3831 3832 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3833 NumSGPRs > MaxAddressableNumSGPRs) 3834 return OutOfRangeError(SGPRRange); 3835 3836 NumSGPRs += 3837 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3838 3839 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3840 NumSGPRs > MaxAddressableNumSGPRs) 3841 return OutOfRangeError(SGPRRange); 3842 3843 if (Features.test(FeatureSGPRInitBug)) 3844 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3845 } 3846 3847 VGPRBlocks = 3848 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3849 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3850 3851 return false; 3852 } 3853 3854 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3855 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3856 return TokError("directive only supported for amdgcn architecture"); 3857 3858 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3859 return TokError("directive only supported for amdhsa OS"); 3860 3861 StringRef KernelName; 3862 if (getParser().parseIdentifier(KernelName)) 3863 return true; 3864 3865 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3866 3867 StringSet<> Seen; 3868 3869 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3870 3871 SMRange VGPRRange; 3872 uint64_t NextFreeVGPR = 0; 3873 SMRange SGPRRange; 3874 uint64_t NextFreeSGPR = 0; 3875 unsigned UserSGPRCount = 0; 3876 bool ReserveVCC = true; 3877 bool ReserveFlatScr = true; 3878 bool ReserveXNACK = hasXNACK(); 3879 Optional<bool> EnableWavefrontSize32; 3880 3881 while (true) { 3882 while (getLexer().is(AsmToken::EndOfStatement)) 3883 Lex(); 3884 3885 if (getLexer().isNot(AsmToken::Identifier)) 3886 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3887 3888 StringRef ID = getTok().getIdentifier(); 3889 SMRange IDRange = getTok().getLocRange(); 3890 Lex(); 3891 3892 if (ID == ".end_amdhsa_kernel") 3893 break; 3894 3895 if (Seen.find(ID) != Seen.end()) 3896 return TokError(".amdhsa_ directives cannot be repeated"); 3897 Seen.insert(ID); 3898 3899 SMLoc ValStart = getTok().getLoc(); 3900 int64_t IVal; 3901 if (getParser().parseAbsoluteExpression(IVal)) 3902 return true; 3903 SMLoc ValEnd = getTok().getLoc(); 3904 SMRange ValRange = SMRange(ValStart, ValEnd); 3905 3906 if (IVal < 0) 3907 return OutOfRangeError(ValRange); 3908 3909 uint64_t Val = IVal; 3910 3911 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3912 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3913 return OutOfRangeError(RANGE); \ 3914 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3915 3916 if (ID == ".amdhsa_group_segment_fixed_size") { 3917 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3918 return OutOfRangeError(ValRange); 3919 KD.group_segment_fixed_size = Val; 3920 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3921 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3922 return OutOfRangeError(ValRange); 3923 KD.private_segment_fixed_size = Val; 3924 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3925 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3926 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3927 Val, ValRange); 3928 if (Val) 3929 UserSGPRCount += 4; 3930 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3931 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3932 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3933 ValRange); 3934 if (Val) 3935 UserSGPRCount += 2; 3936 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3937 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3938 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3939 ValRange); 3940 if (Val) 3941 UserSGPRCount += 2; 3942 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3943 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3944 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3945 Val, ValRange); 3946 if (Val) 3947 UserSGPRCount += 2; 3948 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3949 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3950 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3951 ValRange); 3952 if (Val) 3953 UserSGPRCount += 2; 3954 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3955 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3956 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3957 ValRange); 3958 if (Val) 3959 UserSGPRCount += 2; 3960 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3961 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3962 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3963 Val, ValRange); 3964 if (Val) 3965 UserSGPRCount += 1; 3966 } else if (ID == ".amdhsa_wavefront_size32") { 3967 if (IVersion.Major < 10) 3968 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3969 IDRange); 3970 EnableWavefrontSize32 = Val; 3971 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3972 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3973 Val, ValRange); 3974 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3975 PARSE_BITS_ENTRY( 3976 KD.compute_pgm_rsrc2, 3977 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3978 ValRange); 3979 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3980 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3981 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3982 ValRange); 3983 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3984 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3985 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3986 ValRange); 3987 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3988 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3989 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3990 ValRange); 3991 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3992 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3993 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3994 ValRange); 3995 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3997 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3998 ValRange); 3999 } else if (ID == ".amdhsa_next_free_vgpr") { 4000 VGPRRange = ValRange; 4001 NextFreeVGPR = Val; 4002 } else if (ID == ".amdhsa_next_free_sgpr") { 4003 SGPRRange = ValRange; 4004 NextFreeSGPR = Val; 4005 } else if (ID == ".amdhsa_reserve_vcc") { 4006 if (!isUInt<1>(Val)) 4007 return OutOfRangeError(ValRange); 4008 ReserveVCC = Val; 4009 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4010 if (IVersion.Major < 7) 4011 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4012 IDRange); 4013 if (!isUInt<1>(Val)) 4014 return OutOfRangeError(ValRange); 4015 ReserveFlatScr = Val; 4016 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4017 if (IVersion.Major < 8) 4018 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4019 IDRange); 4020 if (!isUInt<1>(Val)) 4021 return OutOfRangeError(ValRange); 4022 ReserveXNACK = Val; 4023 } else if (ID == ".amdhsa_float_round_mode_32") { 4024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4025 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4026 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4028 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4029 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4031 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4032 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4033 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4034 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4035 ValRange); 4036 } else if (ID == ".amdhsa_dx10_clamp") { 4037 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4038 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4039 } else if (ID == ".amdhsa_ieee_mode") { 4040 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4041 Val, ValRange); 4042 } else if (ID == ".amdhsa_fp16_overflow") { 4043 if (IVersion.Major < 9) 4044 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4045 IDRange); 4046 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4047 ValRange); 4048 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4049 if (IVersion.Major < 10) 4050 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4051 IDRange); 4052 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4053 ValRange); 4054 } else if (ID == ".amdhsa_memory_ordered") { 4055 if (IVersion.Major < 10) 4056 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4057 IDRange); 4058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4059 ValRange); 4060 } else if (ID == ".amdhsa_forward_progress") { 4061 if (IVersion.Major < 10) 4062 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4063 IDRange); 4064 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4065 ValRange); 4066 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4067 PARSE_BITS_ENTRY( 4068 KD.compute_pgm_rsrc2, 4069 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4070 ValRange); 4071 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4072 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4073 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4074 Val, ValRange); 4075 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4076 PARSE_BITS_ENTRY( 4077 KD.compute_pgm_rsrc2, 4078 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4079 ValRange); 4080 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4082 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4083 Val, ValRange); 4084 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4086 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4087 Val, ValRange); 4088 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4089 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4090 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4091 Val, ValRange); 4092 } else if (ID == ".amdhsa_exception_int_div_zero") { 4093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4094 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4095 Val, ValRange); 4096 } else { 4097 return getParser().Error(IDRange.Start, 4098 "unknown .amdhsa_kernel directive", IDRange); 4099 } 4100 4101 #undef PARSE_BITS_ENTRY 4102 } 4103 4104 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4105 return TokError(".amdhsa_next_free_vgpr directive is required"); 4106 4107 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4108 return TokError(".amdhsa_next_free_sgpr directive is required"); 4109 4110 unsigned VGPRBlocks; 4111 unsigned SGPRBlocks; 4112 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4113 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4114 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4115 SGPRBlocks)) 4116 return true; 4117 4118 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4119 VGPRBlocks)) 4120 return OutOfRangeError(VGPRRange); 4121 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4122 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4123 4124 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4125 SGPRBlocks)) 4126 return OutOfRangeError(SGPRRange); 4127 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4128 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4129 SGPRBlocks); 4130 4131 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4132 return TokError("too many user SGPRs enabled"); 4133 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4134 UserSGPRCount); 4135 4136 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4137 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4138 ReserveFlatScr, ReserveXNACK); 4139 return false; 4140 } 4141 4142 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4143 uint32_t Major; 4144 uint32_t Minor; 4145 4146 if (ParseDirectiveMajorMinor(Major, Minor)) 4147 return true; 4148 4149 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4150 return false; 4151 } 4152 4153 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4154 uint32_t Major; 4155 uint32_t Minor; 4156 uint32_t Stepping; 4157 StringRef VendorName; 4158 StringRef ArchName; 4159 4160 // If this directive has no arguments, then use the ISA version for the 4161 // targeted GPU. 4162 if (getLexer().is(AsmToken::EndOfStatement)) { 4163 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4164 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4165 ISA.Stepping, 4166 "AMD", "AMDGPU"); 4167 return false; 4168 } 4169 4170 if (ParseDirectiveMajorMinor(Major, Minor)) 4171 return true; 4172 4173 if (getLexer().isNot(AsmToken::Comma)) 4174 return TokError("stepping version number required, comma expected"); 4175 Lex(); 4176 4177 if (ParseAsAbsoluteExpression(Stepping)) 4178 return TokError("invalid stepping version"); 4179 4180 if (getLexer().isNot(AsmToken::Comma)) 4181 return TokError("vendor name required, comma expected"); 4182 Lex(); 4183 4184 if (getLexer().isNot(AsmToken::String)) 4185 return TokError("invalid vendor name"); 4186 4187 VendorName = getLexer().getTok().getStringContents(); 4188 Lex(); 4189 4190 if (getLexer().isNot(AsmToken::Comma)) 4191 return TokError("arch name required, comma expected"); 4192 Lex(); 4193 4194 if (getLexer().isNot(AsmToken::String)) 4195 return TokError("invalid arch name"); 4196 4197 ArchName = getLexer().getTok().getStringContents(); 4198 Lex(); 4199 4200 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4201 VendorName, ArchName); 4202 return false; 4203 } 4204 4205 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4206 amd_kernel_code_t &Header) { 4207 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4208 // assembly for backwards compatibility. 4209 if (ID == "max_scratch_backing_memory_byte_size") { 4210 Parser.eatToEndOfStatement(); 4211 return false; 4212 } 4213 4214 SmallString<40> ErrStr; 4215 raw_svector_ostream Err(ErrStr); 4216 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4217 return TokError(Err.str()); 4218 } 4219 Lex(); 4220 4221 if (ID == "enable_wavefront_size32") { 4222 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4223 if (!isGFX10()) 4224 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4225 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4226 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4227 } else { 4228 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4229 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4230 } 4231 } 4232 4233 if (ID == "wavefront_size") { 4234 if (Header.wavefront_size == 5) { 4235 if (!isGFX10()) 4236 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4237 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4238 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4239 } else if (Header.wavefront_size == 6) { 4240 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4241 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4242 } 4243 } 4244 4245 if (ID == "enable_wgp_mode") { 4246 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4247 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4248 } 4249 4250 if (ID == "enable_mem_ordered") { 4251 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4252 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4253 } 4254 4255 if (ID == "enable_fwd_progress") { 4256 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4257 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4258 } 4259 4260 return false; 4261 } 4262 4263 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4264 amd_kernel_code_t Header; 4265 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4266 4267 while (true) { 4268 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4269 // will set the current token to EndOfStatement. 4270 while(getLexer().is(AsmToken::EndOfStatement)) 4271 Lex(); 4272 4273 if (getLexer().isNot(AsmToken::Identifier)) 4274 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4275 4276 StringRef ID = getLexer().getTok().getIdentifier(); 4277 Lex(); 4278 4279 if (ID == ".end_amd_kernel_code_t") 4280 break; 4281 4282 if (ParseAMDKernelCodeTValue(ID, Header)) 4283 return true; 4284 } 4285 4286 getTargetStreamer().EmitAMDKernelCodeT(Header); 4287 4288 return false; 4289 } 4290 4291 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4292 if (getLexer().isNot(AsmToken::Identifier)) 4293 return TokError("expected symbol name"); 4294 4295 StringRef KernelName = Parser.getTok().getString(); 4296 4297 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4298 ELF::STT_AMDGPU_HSA_KERNEL); 4299 Lex(); 4300 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4301 KernelScope.initialize(getContext()); 4302 return false; 4303 } 4304 4305 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4306 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4307 return Error(getParser().getTok().getLoc(), 4308 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4309 "architectures"); 4310 } 4311 4312 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4313 4314 std::string ISAVersionStringFromSTI; 4315 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4316 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4317 4318 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4319 return Error(getParser().getTok().getLoc(), 4320 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4321 "arguments specified through the command line"); 4322 } 4323 4324 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4325 Lex(); 4326 4327 return false; 4328 } 4329 4330 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4331 const char *AssemblerDirectiveBegin; 4332 const char *AssemblerDirectiveEnd; 4333 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4334 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4335 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4336 HSAMD::V3::AssemblerDirectiveEnd) 4337 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4338 HSAMD::AssemblerDirectiveEnd); 4339 4340 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4341 return Error(getParser().getTok().getLoc(), 4342 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4343 "not available on non-amdhsa OSes")).str()); 4344 } 4345 4346 std::string HSAMetadataString; 4347 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4348 HSAMetadataString)) 4349 return true; 4350 4351 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4352 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4353 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4354 } else { 4355 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4356 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4357 } 4358 4359 return false; 4360 } 4361 4362 /// Common code to parse out a block of text (typically YAML) between start and 4363 /// end directives. 4364 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4365 const char *AssemblerDirectiveEnd, 4366 std::string &CollectString) { 4367 4368 raw_string_ostream CollectStream(CollectString); 4369 4370 getLexer().setSkipSpace(false); 4371 4372 bool FoundEnd = false; 4373 while (!getLexer().is(AsmToken::Eof)) { 4374 while (getLexer().is(AsmToken::Space)) { 4375 CollectStream << getLexer().getTok().getString(); 4376 Lex(); 4377 } 4378 4379 if (getLexer().is(AsmToken::Identifier)) { 4380 StringRef ID = getLexer().getTok().getIdentifier(); 4381 if (ID == AssemblerDirectiveEnd) { 4382 Lex(); 4383 FoundEnd = true; 4384 break; 4385 } 4386 } 4387 4388 CollectStream << Parser.parseStringToEndOfStatement() 4389 << getContext().getAsmInfo()->getSeparatorString(); 4390 4391 Parser.eatToEndOfStatement(); 4392 } 4393 4394 getLexer().setSkipSpace(true); 4395 4396 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4397 return TokError(Twine("expected directive ") + 4398 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4399 } 4400 4401 CollectStream.flush(); 4402 return false; 4403 } 4404 4405 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4406 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4407 std::string String; 4408 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4409 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4410 return true; 4411 4412 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4413 if (!PALMetadata->setFromString(String)) 4414 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4415 return false; 4416 } 4417 4418 /// Parse the assembler directive for old linear-format PAL metadata. 4419 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4420 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4421 return Error(getParser().getTok().getLoc(), 4422 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4423 "not available on non-amdpal OSes")).str()); 4424 } 4425 4426 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4427 PALMetadata->setLegacy(); 4428 for (;;) { 4429 uint32_t Key, Value; 4430 if (ParseAsAbsoluteExpression(Key)) { 4431 return TokError(Twine("invalid value in ") + 4432 Twine(PALMD::AssemblerDirective)); 4433 } 4434 if (getLexer().isNot(AsmToken::Comma)) { 4435 return TokError(Twine("expected an even number of values in ") + 4436 Twine(PALMD::AssemblerDirective)); 4437 } 4438 Lex(); 4439 if (ParseAsAbsoluteExpression(Value)) { 4440 return TokError(Twine("invalid value in ") + 4441 Twine(PALMD::AssemblerDirective)); 4442 } 4443 PALMetadata->setRegister(Key, Value); 4444 if (getLexer().isNot(AsmToken::Comma)) 4445 break; 4446 Lex(); 4447 } 4448 return false; 4449 } 4450 4451 /// ParseDirectiveAMDGPULDS 4452 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4453 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4454 if (getParser().checkForValidSection()) 4455 return true; 4456 4457 StringRef Name; 4458 SMLoc NameLoc = getLexer().getLoc(); 4459 if (getParser().parseIdentifier(Name)) 4460 return TokError("expected identifier in directive"); 4461 4462 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4463 if (parseToken(AsmToken::Comma, "expected ','")) 4464 return true; 4465 4466 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4467 4468 int64_t Size; 4469 SMLoc SizeLoc = getLexer().getLoc(); 4470 if (getParser().parseAbsoluteExpression(Size)) 4471 return true; 4472 if (Size < 0) 4473 return Error(SizeLoc, "size must be non-negative"); 4474 if (Size > LocalMemorySize) 4475 return Error(SizeLoc, "size is too large"); 4476 4477 int64_t Alignment = 4; 4478 if (getLexer().is(AsmToken::Comma)) { 4479 Lex(); 4480 SMLoc AlignLoc = getLexer().getLoc(); 4481 if (getParser().parseAbsoluteExpression(Alignment)) 4482 return true; 4483 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4484 return Error(AlignLoc, "alignment must be a power of two"); 4485 4486 // Alignment larger than the size of LDS is possible in theory, as long 4487 // as the linker manages to place to symbol at address 0, but we do want 4488 // to make sure the alignment fits nicely into a 32-bit integer. 4489 if (Alignment >= 1u << 31) 4490 return Error(AlignLoc, "alignment is too large"); 4491 } 4492 4493 if (parseToken(AsmToken::EndOfStatement, 4494 "unexpected token in '.amdgpu_lds' directive")) 4495 return true; 4496 4497 Symbol->redefineIfPossible(); 4498 if (!Symbol->isUndefined()) 4499 return Error(NameLoc, "invalid symbol redefinition"); 4500 4501 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4502 return false; 4503 } 4504 4505 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4506 StringRef IDVal = DirectiveID.getString(); 4507 4508 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4509 if (IDVal == ".amdgcn_target") 4510 return ParseDirectiveAMDGCNTarget(); 4511 4512 if (IDVal == ".amdhsa_kernel") 4513 return ParseDirectiveAMDHSAKernel(); 4514 4515 // TODO: Restructure/combine with PAL metadata directive. 4516 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4517 return ParseDirectiveHSAMetadata(); 4518 } else { 4519 if (IDVal == ".hsa_code_object_version") 4520 return ParseDirectiveHSACodeObjectVersion(); 4521 4522 if (IDVal == ".hsa_code_object_isa") 4523 return ParseDirectiveHSACodeObjectISA(); 4524 4525 if (IDVal == ".amd_kernel_code_t") 4526 return ParseDirectiveAMDKernelCodeT(); 4527 4528 if (IDVal == ".amdgpu_hsa_kernel") 4529 return ParseDirectiveAMDGPUHsaKernel(); 4530 4531 if (IDVal == ".amd_amdgpu_isa") 4532 return ParseDirectiveISAVersion(); 4533 4534 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4535 return ParseDirectiveHSAMetadata(); 4536 } 4537 4538 if (IDVal == ".amdgpu_lds") 4539 return ParseDirectiveAMDGPULDS(); 4540 4541 if (IDVal == PALMD::AssemblerDirectiveBegin) 4542 return ParseDirectivePALMetadataBegin(); 4543 4544 if (IDVal == PALMD::AssemblerDirective) 4545 return ParseDirectivePALMetadata(); 4546 4547 return true; 4548 } 4549 4550 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4551 unsigned RegNo) const { 4552 4553 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4554 R.isValid(); ++R) { 4555 if (*R == RegNo) 4556 return isGFX9() || isGFX10(); 4557 } 4558 4559 // GFX10 has 2 more SGPRs 104 and 105. 4560 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4561 R.isValid(); ++R) { 4562 if (*R == RegNo) 4563 return hasSGPR104_SGPR105(); 4564 } 4565 4566 switch (RegNo) { 4567 case AMDGPU::SRC_SHARED_BASE: 4568 case AMDGPU::SRC_SHARED_LIMIT: 4569 case AMDGPU::SRC_PRIVATE_BASE: 4570 case AMDGPU::SRC_PRIVATE_LIMIT: 4571 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4572 return !isCI() && !isSI() && !isVI(); 4573 case AMDGPU::TBA: 4574 case AMDGPU::TBA_LO: 4575 case AMDGPU::TBA_HI: 4576 case AMDGPU::TMA: 4577 case AMDGPU::TMA_LO: 4578 case AMDGPU::TMA_HI: 4579 return !isGFX9() && !isGFX10(); 4580 case AMDGPU::XNACK_MASK: 4581 case AMDGPU::XNACK_MASK_LO: 4582 case AMDGPU::XNACK_MASK_HI: 4583 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4584 case AMDGPU::SGPR_NULL: 4585 return isGFX10(); 4586 default: 4587 break; 4588 } 4589 4590 if (isCI()) 4591 return true; 4592 4593 if (isSI() || isGFX10()) { 4594 // No flat_scr on SI. 4595 // On GFX10 flat scratch is not a valid register operand and can only be 4596 // accessed with s_setreg/s_getreg. 4597 switch (RegNo) { 4598 case AMDGPU::FLAT_SCR: 4599 case AMDGPU::FLAT_SCR_LO: 4600 case AMDGPU::FLAT_SCR_HI: 4601 return false; 4602 default: 4603 return true; 4604 } 4605 } 4606 4607 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4608 // SI/CI have. 4609 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4610 R.isValid(); ++R) { 4611 if (*R == RegNo) 4612 return hasSGPR102_SGPR103(); 4613 } 4614 4615 return true; 4616 } 4617 4618 OperandMatchResultTy 4619 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4620 OperandMode Mode) { 4621 // Try to parse with a custom parser 4622 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4623 4624 // If we successfully parsed the operand or if there as an error parsing, 4625 // we are done. 4626 // 4627 // If we are parsing after we reach EndOfStatement then this means we 4628 // are appending default values to the Operands list. This is only done 4629 // by custom parser, so we shouldn't continue on to the generic parsing. 4630 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4631 getLexer().is(AsmToken::EndOfStatement)) 4632 return ResTy; 4633 4634 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4635 unsigned Prefix = Operands.size(); 4636 SMLoc LBraceLoc = getTok().getLoc(); 4637 Parser.Lex(); // eat the '[' 4638 4639 for (;;) { 4640 ResTy = parseReg(Operands); 4641 if (ResTy != MatchOperand_Success) 4642 return ResTy; 4643 4644 if (getLexer().is(AsmToken::RBrac)) 4645 break; 4646 4647 if (getLexer().isNot(AsmToken::Comma)) 4648 return MatchOperand_ParseFail; 4649 Parser.Lex(); 4650 } 4651 4652 if (Operands.size() - Prefix > 1) { 4653 Operands.insert(Operands.begin() + Prefix, 4654 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4655 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4656 getTok().getLoc())); 4657 } 4658 4659 Parser.Lex(); // eat the ']' 4660 return MatchOperand_Success; 4661 } 4662 4663 return parseRegOrImm(Operands); 4664 } 4665 4666 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4667 // Clear any forced encodings from the previous instruction. 4668 setForcedEncodingSize(0); 4669 setForcedDPP(false); 4670 setForcedSDWA(false); 4671 4672 if (Name.endswith("_e64")) { 4673 setForcedEncodingSize(64); 4674 return Name.substr(0, Name.size() - 4); 4675 } else if (Name.endswith("_e32")) { 4676 setForcedEncodingSize(32); 4677 return Name.substr(0, Name.size() - 4); 4678 } else if (Name.endswith("_dpp")) { 4679 setForcedDPP(true); 4680 return Name.substr(0, Name.size() - 4); 4681 } else if (Name.endswith("_sdwa")) { 4682 setForcedSDWA(true); 4683 return Name.substr(0, Name.size() - 5); 4684 } 4685 return Name; 4686 } 4687 4688 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4689 StringRef Name, 4690 SMLoc NameLoc, OperandVector &Operands) { 4691 // Add the instruction mnemonic 4692 Name = parseMnemonicSuffix(Name); 4693 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4694 4695 bool IsMIMG = Name.startswith("image_"); 4696 4697 while (!getLexer().is(AsmToken::EndOfStatement)) { 4698 OperandMode Mode = OperandMode_Default; 4699 if (IsMIMG && isGFX10() && Operands.size() == 2) 4700 Mode = OperandMode_NSA; 4701 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4702 4703 // Eat the comma or space if there is one. 4704 if (getLexer().is(AsmToken::Comma)) 4705 Parser.Lex(); 4706 4707 if (Res != MatchOperand_Success) { 4708 if (!Parser.hasPendingError()) { 4709 // FIXME: use real operand location rather than the current location. 4710 StringRef Msg = 4711 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4712 "not a valid operand."; 4713 Error(getLexer().getLoc(), Msg); 4714 } 4715 while (!getLexer().is(AsmToken::EndOfStatement)) { 4716 Parser.Lex(); 4717 } 4718 return true; 4719 } 4720 } 4721 4722 return false; 4723 } 4724 4725 //===----------------------------------------------------------------------===// 4726 // Utility functions 4727 //===----------------------------------------------------------------------===// 4728 4729 OperandMatchResultTy 4730 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4731 4732 if (!trySkipId(Prefix, AsmToken::Colon)) 4733 return MatchOperand_NoMatch; 4734 4735 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4736 } 4737 4738 OperandMatchResultTy 4739 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4740 AMDGPUOperand::ImmTy ImmTy, 4741 bool (*ConvertResult)(int64_t&)) { 4742 SMLoc S = getLoc(); 4743 int64_t Value = 0; 4744 4745 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4746 if (Res != MatchOperand_Success) 4747 return Res; 4748 4749 if (ConvertResult && !ConvertResult(Value)) { 4750 Error(S, "invalid " + StringRef(Prefix) + " value."); 4751 } 4752 4753 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4754 return MatchOperand_Success; 4755 } 4756 4757 OperandMatchResultTy 4758 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4759 OperandVector &Operands, 4760 AMDGPUOperand::ImmTy ImmTy, 4761 bool (*ConvertResult)(int64_t&)) { 4762 SMLoc S = getLoc(); 4763 if (!trySkipId(Prefix, AsmToken::Colon)) 4764 return MatchOperand_NoMatch; 4765 4766 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4767 return MatchOperand_ParseFail; 4768 4769 unsigned Val = 0; 4770 const unsigned MaxSize = 4; 4771 4772 // FIXME: How to verify the number of elements matches the number of src 4773 // operands? 4774 for (int I = 0; ; ++I) { 4775 int64_t Op; 4776 SMLoc Loc = getLoc(); 4777 if (!parseExpr(Op)) 4778 return MatchOperand_ParseFail; 4779 4780 if (Op != 0 && Op != 1) { 4781 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4782 return MatchOperand_ParseFail; 4783 } 4784 4785 Val |= (Op << I); 4786 4787 if (trySkipToken(AsmToken::RBrac)) 4788 break; 4789 4790 if (I + 1 == MaxSize) { 4791 Error(getLoc(), "expected a closing square bracket"); 4792 return MatchOperand_ParseFail; 4793 } 4794 4795 if (!skipToken(AsmToken::Comma, "expected a comma")) 4796 return MatchOperand_ParseFail; 4797 } 4798 4799 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4800 return MatchOperand_Success; 4801 } 4802 4803 OperandMatchResultTy 4804 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4805 AMDGPUOperand::ImmTy ImmTy) { 4806 int64_t Bit = 0; 4807 SMLoc S = Parser.getTok().getLoc(); 4808 4809 // We are at the end of the statement, and this is a default argument, so 4810 // use a default value. 4811 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4812 switch(getLexer().getKind()) { 4813 case AsmToken::Identifier: { 4814 StringRef Tok = Parser.getTok().getString(); 4815 if (Tok == Name) { 4816 if (Tok == "r128" && !hasMIMG_R128()) 4817 Error(S, "r128 modifier is not supported on this GPU"); 4818 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4819 Error(S, "a16 modifier is not supported on this GPU"); 4820 Bit = 1; 4821 Parser.Lex(); 4822 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4823 Bit = 0; 4824 Parser.Lex(); 4825 } else { 4826 return MatchOperand_NoMatch; 4827 } 4828 break; 4829 } 4830 default: 4831 return MatchOperand_NoMatch; 4832 } 4833 } 4834 4835 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4836 return MatchOperand_ParseFail; 4837 4838 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4839 ImmTy = AMDGPUOperand::ImmTyR128A16; 4840 4841 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4842 return MatchOperand_Success; 4843 } 4844 4845 static void addOptionalImmOperand( 4846 MCInst& Inst, const OperandVector& Operands, 4847 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4848 AMDGPUOperand::ImmTy ImmT, 4849 int64_t Default = 0) { 4850 auto i = OptionalIdx.find(ImmT); 4851 if (i != OptionalIdx.end()) { 4852 unsigned Idx = i->second; 4853 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4854 } else { 4855 Inst.addOperand(MCOperand::createImm(Default)); 4856 } 4857 } 4858 4859 OperandMatchResultTy 4860 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4861 if (getLexer().isNot(AsmToken::Identifier)) { 4862 return MatchOperand_NoMatch; 4863 } 4864 StringRef Tok = Parser.getTok().getString(); 4865 if (Tok != Prefix) { 4866 return MatchOperand_NoMatch; 4867 } 4868 4869 Parser.Lex(); 4870 if (getLexer().isNot(AsmToken::Colon)) { 4871 return MatchOperand_ParseFail; 4872 } 4873 4874 Parser.Lex(); 4875 if (getLexer().isNot(AsmToken::Identifier)) { 4876 return MatchOperand_ParseFail; 4877 } 4878 4879 Value = Parser.getTok().getString(); 4880 return MatchOperand_Success; 4881 } 4882 4883 //===----------------------------------------------------------------------===// 4884 // MTBUF format 4885 //===----------------------------------------------------------------------===// 4886 4887 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 4888 int64_t MaxVal, 4889 int64_t &Fmt) { 4890 int64_t Val; 4891 SMLoc Loc = getLoc(); 4892 4893 auto Res = parseIntWithPrefix(Pref, Val); 4894 if (Res == MatchOperand_ParseFail) 4895 return false; 4896 if (Res == MatchOperand_NoMatch) 4897 return true; 4898 4899 if (Val < 0 || Val > MaxVal) { 4900 Error(Loc, Twine("out of range ", StringRef(Pref))); 4901 return false; 4902 } 4903 4904 Fmt = Val; 4905 return true; 4906 } 4907 4908 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4909 // values to live in a joint format operand in the MCInst encoding. 4910 OperandMatchResultTy 4911 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 4912 using namespace llvm::AMDGPU::MTBUFFormat; 4913 4914 int64_t Dfmt = DFMT_UNDEF; 4915 int64_t Nfmt = NFMT_UNDEF; 4916 4917 // dfmt and nfmt can appear in either order, and each is optional. 4918 for (int I = 0; I < 2; ++I) { 4919 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 4920 return MatchOperand_ParseFail; 4921 4922 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 4923 return MatchOperand_ParseFail; 4924 } 4925 // Skip optional comma between dfmt/nfmt 4926 // but guard against 2 commas following each other. 4927 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 4928 !peekToken().is(AsmToken::Comma)) { 4929 trySkipToken(AsmToken::Comma); 4930 } 4931 } 4932 4933 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 4934 return MatchOperand_NoMatch; 4935 4936 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 4937 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 4938 4939 Format = encodeDfmtNfmt(Dfmt, Nfmt); 4940 return MatchOperand_Success; 4941 } 4942 4943 OperandMatchResultTy 4944 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 4945 using namespace llvm::AMDGPU::MTBUFFormat; 4946 4947 int64_t Fmt = UFMT_UNDEF; 4948 4949 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 4950 return MatchOperand_ParseFail; 4951 4952 if (Fmt == UFMT_UNDEF) 4953 return MatchOperand_NoMatch; 4954 4955 Format = Fmt; 4956 return MatchOperand_Success; 4957 } 4958 4959 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 4960 int64_t &Nfmt, 4961 StringRef FormatStr, 4962 SMLoc Loc) { 4963 using namespace llvm::AMDGPU::MTBUFFormat; 4964 int64_t Format; 4965 4966 Format = getDfmt(FormatStr); 4967 if (Format != DFMT_UNDEF) { 4968 Dfmt = Format; 4969 return true; 4970 } 4971 4972 Format = getNfmt(FormatStr, getSTI()); 4973 if (Format != NFMT_UNDEF) { 4974 Nfmt = Format; 4975 return true; 4976 } 4977 4978 Error(Loc, "unsupported format"); 4979 return false; 4980 } 4981 4982 OperandMatchResultTy 4983 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 4984 SMLoc FormatLoc, 4985 int64_t &Format) { 4986 using namespace llvm::AMDGPU::MTBUFFormat; 4987 4988 int64_t Dfmt = DFMT_UNDEF; 4989 int64_t Nfmt = NFMT_UNDEF; 4990 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 4991 return MatchOperand_ParseFail; 4992 4993 if (trySkipToken(AsmToken::Comma)) { 4994 StringRef Str; 4995 SMLoc Loc = getLoc(); 4996 if (!parseId(Str, "expected a format string") || 4997 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 4998 return MatchOperand_ParseFail; 4999 } 5000 if (Dfmt == DFMT_UNDEF) { 5001 Error(Loc, "duplicate numeric format"); 5002 return MatchOperand_ParseFail; 5003 } else if (Nfmt == NFMT_UNDEF) { 5004 Error(Loc, "duplicate data format"); 5005 return MatchOperand_ParseFail; 5006 } 5007 } 5008 5009 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5010 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5011 5012 if (isGFX10()) { 5013 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5014 if (Ufmt == UFMT_UNDEF) { 5015 Error(FormatLoc, "unsupported format"); 5016 return MatchOperand_ParseFail; 5017 } 5018 Format = Ufmt; 5019 } else { 5020 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5021 } 5022 5023 return MatchOperand_Success; 5024 } 5025 5026 OperandMatchResultTy 5027 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5028 SMLoc Loc, 5029 int64_t &Format) { 5030 using namespace llvm::AMDGPU::MTBUFFormat; 5031 5032 auto Id = getUnifiedFormat(FormatStr); 5033 if (Id == UFMT_UNDEF) 5034 return MatchOperand_NoMatch; 5035 5036 if (!isGFX10()) { 5037 Error(Loc, "unified format is not supported on this GPU"); 5038 return MatchOperand_ParseFail; 5039 } 5040 5041 Format = Id; 5042 return MatchOperand_Success; 5043 } 5044 5045 OperandMatchResultTy 5046 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5047 using namespace llvm::AMDGPU::MTBUFFormat; 5048 SMLoc Loc = getLoc(); 5049 5050 if (!parseExpr(Format)) 5051 return MatchOperand_ParseFail; 5052 if (!isValidFormatEncoding(Format, getSTI())) { 5053 Error(Loc, "out of range format"); 5054 return MatchOperand_ParseFail; 5055 } 5056 5057 return MatchOperand_Success; 5058 } 5059 5060 OperandMatchResultTy 5061 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5062 using namespace llvm::AMDGPU::MTBUFFormat; 5063 5064 if (!trySkipId("format", AsmToken::Colon)) 5065 return MatchOperand_NoMatch; 5066 5067 if (trySkipToken(AsmToken::LBrac)) { 5068 StringRef FormatStr; 5069 SMLoc Loc = getLoc(); 5070 if (!parseId(FormatStr, "expected a format string")) 5071 return MatchOperand_ParseFail; 5072 5073 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5074 if (Res == MatchOperand_NoMatch) 5075 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5076 if (Res != MatchOperand_Success) 5077 return Res; 5078 5079 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5080 return MatchOperand_ParseFail; 5081 5082 return MatchOperand_Success; 5083 } 5084 5085 return parseNumericFormat(Format); 5086 } 5087 5088 OperandMatchResultTy 5089 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5090 using namespace llvm::AMDGPU::MTBUFFormat; 5091 5092 int64_t Format = getDefaultFormatEncoding(getSTI()); 5093 OperandMatchResultTy Res; 5094 SMLoc Loc = getLoc(); 5095 5096 // Parse legacy format syntax. 5097 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5098 if (Res == MatchOperand_ParseFail) 5099 return Res; 5100 5101 bool FormatFound = (Res == MatchOperand_Success); 5102 5103 Operands.push_back( 5104 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5105 5106 if (FormatFound) 5107 trySkipToken(AsmToken::Comma); 5108 5109 if (isToken(AsmToken::EndOfStatement)) { 5110 // We are expecting an soffset operand, 5111 // but let matcher handle the error. 5112 return MatchOperand_Success; 5113 } 5114 5115 // Parse soffset. 5116 Res = parseRegOrImm(Operands); 5117 if (Res != MatchOperand_Success) 5118 return Res; 5119 5120 trySkipToken(AsmToken::Comma); 5121 5122 if (!FormatFound) { 5123 Res = parseSymbolicOrNumericFormat(Format); 5124 if (Res == MatchOperand_ParseFail) 5125 return Res; 5126 if (Res == MatchOperand_Success) { 5127 auto Size = Operands.size(); 5128 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5129 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5130 Op.setImm(Format); 5131 } 5132 return MatchOperand_Success; 5133 } 5134 5135 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5136 Error(getLoc(), "duplicate format"); 5137 return MatchOperand_ParseFail; 5138 } 5139 return MatchOperand_Success; 5140 } 5141 5142 //===----------------------------------------------------------------------===// 5143 // ds 5144 //===----------------------------------------------------------------------===// 5145 5146 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5147 const OperandVector &Operands) { 5148 OptionalImmIndexMap OptionalIdx; 5149 5150 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5151 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5152 5153 // Add the register arguments 5154 if (Op.isReg()) { 5155 Op.addRegOperands(Inst, 1); 5156 continue; 5157 } 5158 5159 // Handle optional arguments 5160 OptionalIdx[Op.getImmTy()] = i; 5161 } 5162 5163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5164 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5165 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5166 5167 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5168 } 5169 5170 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5171 bool IsGdsHardcoded) { 5172 OptionalImmIndexMap OptionalIdx; 5173 5174 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5175 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5176 5177 // Add the register arguments 5178 if (Op.isReg()) { 5179 Op.addRegOperands(Inst, 1); 5180 continue; 5181 } 5182 5183 if (Op.isToken() && Op.getToken() == "gds") { 5184 IsGdsHardcoded = true; 5185 continue; 5186 } 5187 5188 // Handle optional arguments 5189 OptionalIdx[Op.getImmTy()] = i; 5190 } 5191 5192 AMDGPUOperand::ImmTy OffsetType = 5193 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5194 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5195 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5196 AMDGPUOperand::ImmTyOffset; 5197 5198 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5199 5200 if (!IsGdsHardcoded) { 5201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5202 } 5203 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5204 } 5205 5206 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5207 OptionalImmIndexMap OptionalIdx; 5208 5209 unsigned OperandIdx[4]; 5210 unsigned EnMask = 0; 5211 int SrcIdx = 0; 5212 5213 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5214 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5215 5216 // Add the register arguments 5217 if (Op.isReg()) { 5218 assert(SrcIdx < 4); 5219 OperandIdx[SrcIdx] = Inst.size(); 5220 Op.addRegOperands(Inst, 1); 5221 ++SrcIdx; 5222 continue; 5223 } 5224 5225 if (Op.isOff()) { 5226 assert(SrcIdx < 4); 5227 OperandIdx[SrcIdx] = Inst.size(); 5228 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5229 ++SrcIdx; 5230 continue; 5231 } 5232 5233 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5234 Op.addImmOperands(Inst, 1); 5235 continue; 5236 } 5237 5238 if (Op.isToken() && Op.getToken() == "done") 5239 continue; 5240 5241 // Handle optional arguments 5242 OptionalIdx[Op.getImmTy()] = i; 5243 } 5244 5245 assert(SrcIdx == 4); 5246 5247 bool Compr = false; 5248 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5249 Compr = true; 5250 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5251 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5252 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5253 } 5254 5255 for (auto i = 0; i < SrcIdx; ++i) { 5256 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5257 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5258 } 5259 } 5260 5261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5263 5264 Inst.addOperand(MCOperand::createImm(EnMask)); 5265 } 5266 5267 //===----------------------------------------------------------------------===// 5268 // s_waitcnt 5269 //===----------------------------------------------------------------------===// 5270 5271 static bool 5272 encodeCnt( 5273 const AMDGPU::IsaVersion ISA, 5274 int64_t &IntVal, 5275 int64_t CntVal, 5276 bool Saturate, 5277 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5278 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5279 { 5280 bool Failed = false; 5281 5282 IntVal = encode(ISA, IntVal, CntVal); 5283 if (CntVal != decode(ISA, IntVal)) { 5284 if (Saturate) { 5285 IntVal = encode(ISA, IntVal, -1); 5286 } else { 5287 Failed = true; 5288 } 5289 } 5290 return Failed; 5291 } 5292 5293 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5294 5295 SMLoc CntLoc = getLoc(); 5296 StringRef CntName = getTokenStr(); 5297 5298 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5299 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5300 return false; 5301 5302 int64_t CntVal; 5303 SMLoc ValLoc = getLoc(); 5304 if (!parseExpr(CntVal)) 5305 return false; 5306 5307 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5308 5309 bool Failed = true; 5310 bool Sat = CntName.endswith("_sat"); 5311 5312 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5313 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5314 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5315 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5316 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5317 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5318 } else { 5319 Error(CntLoc, "invalid counter name " + CntName); 5320 return false; 5321 } 5322 5323 if (Failed) { 5324 Error(ValLoc, "too large value for " + CntName); 5325 return false; 5326 } 5327 5328 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5329 return false; 5330 5331 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5332 if (isToken(AsmToken::EndOfStatement)) { 5333 Error(getLoc(), "expected a counter name"); 5334 return false; 5335 } 5336 } 5337 5338 return true; 5339 } 5340 5341 OperandMatchResultTy 5342 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5343 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5344 int64_t Waitcnt = getWaitcntBitMask(ISA); 5345 SMLoc S = getLoc(); 5346 5347 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5348 while (!isToken(AsmToken::EndOfStatement)) { 5349 if (!parseCnt(Waitcnt)) 5350 return MatchOperand_ParseFail; 5351 } 5352 } else { 5353 if (!parseExpr(Waitcnt)) 5354 return MatchOperand_ParseFail; 5355 } 5356 5357 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5358 return MatchOperand_Success; 5359 } 5360 5361 bool 5362 AMDGPUOperand::isSWaitCnt() const { 5363 return isImm(); 5364 } 5365 5366 //===----------------------------------------------------------------------===// 5367 // hwreg 5368 //===----------------------------------------------------------------------===// 5369 5370 bool 5371 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5372 int64_t &Offset, 5373 int64_t &Width) { 5374 using namespace llvm::AMDGPU::Hwreg; 5375 5376 // The register may be specified by name or using a numeric code 5377 if (isToken(AsmToken::Identifier) && 5378 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5379 HwReg.IsSymbolic = true; 5380 lex(); // skip message name 5381 } else if (!parseExpr(HwReg.Id)) { 5382 return false; 5383 } 5384 5385 if (trySkipToken(AsmToken::RParen)) 5386 return true; 5387 5388 // parse optional params 5389 return 5390 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5391 parseExpr(Offset) && 5392 skipToken(AsmToken::Comma, "expected a comma") && 5393 parseExpr(Width) && 5394 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5395 } 5396 5397 bool 5398 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5399 const int64_t Offset, 5400 const int64_t Width, 5401 const SMLoc Loc) { 5402 5403 using namespace llvm::AMDGPU::Hwreg; 5404 5405 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5406 Error(Loc, "specified hardware register is not supported on this GPU"); 5407 return false; 5408 } else if (!isValidHwreg(HwReg.Id)) { 5409 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5410 return false; 5411 } else if (!isValidHwregOffset(Offset)) { 5412 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5413 return false; 5414 } else if (!isValidHwregWidth(Width)) { 5415 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5416 return false; 5417 } 5418 return true; 5419 } 5420 5421 OperandMatchResultTy 5422 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5423 using namespace llvm::AMDGPU::Hwreg; 5424 5425 int64_t ImmVal = 0; 5426 SMLoc Loc = getLoc(); 5427 5428 if (trySkipId("hwreg", AsmToken::LParen)) { 5429 OperandInfoTy HwReg(ID_UNKNOWN_); 5430 int64_t Offset = OFFSET_DEFAULT_; 5431 int64_t Width = WIDTH_DEFAULT_; 5432 if (parseHwregBody(HwReg, Offset, Width) && 5433 validateHwreg(HwReg, Offset, Width, Loc)) { 5434 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5435 } else { 5436 return MatchOperand_ParseFail; 5437 } 5438 } else if (parseExpr(ImmVal)) { 5439 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5440 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5441 return MatchOperand_ParseFail; 5442 } 5443 } else { 5444 return MatchOperand_ParseFail; 5445 } 5446 5447 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5448 return MatchOperand_Success; 5449 } 5450 5451 bool AMDGPUOperand::isHwreg() const { 5452 return isImmTy(ImmTyHwreg); 5453 } 5454 5455 //===----------------------------------------------------------------------===// 5456 // sendmsg 5457 //===----------------------------------------------------------------------===// 5458 5459 bool 5460 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5461 OperandInfoTy &Op, 5462 OperandInfoTy &Stream) { 5463 using namespace llvm::AMDGPU::SendMsg; 5464 5465 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5466 Msg.IsSymbolic = true; 5467 lex(); // skip message name 5468 } else if (!parseExpr(Msg.Id)) { 5469 return false; 5470 } 5471 5472 if (trySkipToken(AsmToken::Comma)) { 5473 Op.IsDefined = true; 5474 if (isToken(AsmToken::Identifier) && 5475 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5476 lex(); // skip operation name 5477 } else if (!parseExpr(Op.Id)) { 5478 return false; 5479 } 5480 5481 if (trySkipToken(AsmToken::Comma)) { 5482 Stream.IsDefined = true; 5483 if (!parseExpr(Stream.Id)) 5484 return false; 5485 } 5486 } 5487 5488 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5489 } 5490 5491 bool 5492 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5493 const OperandInfoTy &Op, 5494 const OperandInfoTy &Stream, 5495 const SMLoc S) { 5496 using namespace llvm::AMDGPU::SendMsg; 5497 5498 // Validation strictness depends on whether message is specified 5499 // in a symbolc or in a numeric form. In the latter case 5500 // only encoding possibility is checked. 5501 bool Strict = Msg.IsSymbolic; 5502 5503 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5504 Error(S, "invalid message id"); 5505 return false; 5506 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5507 Error(S, Op.IsDefined ? 5508 "message does not support operations" : 5509 "missing message operation"); 5510 return false; 5511 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5512 Error(S, "invalid operation id"); 5513 return false; 5514 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5515 Error(S, "message operation does not support streams"); 5516 return false; 5517 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5518 Error(S, "invalid message stream id"); 5519 return false; 5520 } 5521 return true; 5522 } 5523 5524 OperandMatchResultTy 5525 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5526 using namespace llvm::AMDGPU::SendMsg; 5527 5528 int64_t ImmVal = 0; 5529 SMLoc Loc = getLoc(); 5530 5531 if (trySkipId("sendmsg", AsmToken::LParen)) { 5532 OperandInfoTy Msg(ID_UNKNOWN_); 5533 OperandInfoTy Op(OP_NONE_); 5534 OperandInfoTy Stream(STREAM_ID_NONE_); 5535 if (parseSendMsgBody(Msg, Op, Stream) && 5536 validateSendMsg(Msg, Op, Stream, Loc)) { 5537 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5538 } else { 5539 return MatchOperand_ParseFail; 5540 } 5541 } else if (parseExpr(ImmVal)) { 5542 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5543 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5544 return MatchOperand_ParseFail; 5545 } 5546 } else { 5547 return MatchOperand_ParseFail; 5548 } 5549 5550 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5551 return MatchOperand_Success; 5552 } 5553 5554 bool AMDGPUOperand::isSendMsg() const { 5555 return isImmTy(ImmTySendMsg); 5556 } 5557 5558 //===----------------------------------------------------------------------===// 5559 // v_interp 5560 //===----------------------------------------------------------------------===// 5561 5562 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5563 if (getLexer().getKind() != AsmToken::Identifier) 5564 return MatchOperand_NoMatch; 5565 5566 StringRef Str = Parser.getTok().getString(); 5567 int Slot = StringSwitch<int>(Str) 5568 .Case("p10", 0) 5569 .Case("p20", 1) 5570 .Case("p0", 2) 5571 .Default(-1); 5572 5573 SMLoc S = Parser.getTok().getLoc(); 5574 if (Slot == -1) 5575 return MatchOperand_ParseFail; 5576 5577 Parser.Lex(); 5578 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5579 AMDGPUOperand::ImmTyInterpSlot)); 5580 return MatchOperand_Success; 5581 } 5582 5583 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5584 if (getLexer().getKind() != AsmToken::Identifier) 5585 return MatchOperand_NoMatch; 5586 5587 StringRef Str = Parser.getTok().getString(); 5588 if (!Str.startswith("attr")) 5589 return MatchOperand_NoMatch; 5590 5591 StringRef Chan = Str.take_back(2); 5592 int AttrChan = StringSwitch<int>(Chan) 5593 .Case(".x", 0) 5594 .Case(".y", 1) 5595 .Case(".z", 2) 5596 .Case(".w", 3) 5597 .Default(-1); 5598 if (AttrChan == -1) 5599 return MatchOperand_ParseFail; 5600 5601 Str = Str.drop_back(2).drop_front(4); 5602 5603 uint8_t Attr; 5604 if (Str.getAsInteger(10, Attr)) 5605 return MatchOperand_ParseFail; 5606 5607 SMLoc S = Parser.getTok().getLoc(); 5608 Parser.Lex(); 5609 if (Attr > 63) { 5610 Error(S, "out of bounds attr"); 5611 return MatchOperand_ParseFail; 5612 } 5613 5614 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5615 5616 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5617 AMDGPUOperand::ImmTyInterpAttr)); 5618 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5619 AMDGPUOperand::ImmTyAttrChan)); 5620 return MatchOperand_Success; 5621 } 5622 5623 //===----------------------------------------------------------------------===// 5624 // exp 5625 //===----------------------------------------------------------------------===// 5626 5627 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5628 uint8_t &Val) { 5629 if (Str == "null") { 5630 Val = 9; 5631 return MatchOperand_Success; 5632 } 5633 5634 if (Str.startswith("mrt")) { 5635 Str = Str.drop_front(3); 5636 if (Str == "z") { // == mrtz 5637 Val = 8; 5638 return MatchOperand_Success; 5639 } 5640 5641 if (Str.getAsInteger(10, Val)) 5642 return MatchOperand_ParseFail; 5643 5644 if (Val > 7) { 5645 Error(getLoc(), "invalid exp target"); 5646 return MatchOperand_ParseFail; 5647 } 5648 5649 return MatchOperand_Success; 5650 } 5651 5652 if (Str.startswith("pos")) { 5653 Str = Str.drop_front(3); 5654 if (Str.getAsInteger(10, Val)) 5655 return MatchOperand_ParseFail; 5656 5657 if (Val > 4 || (Val == 4 && !isGFX10())) { 5658 Error(getLoc(), "invalid exp target"); 5659 return MatchOperand_ParseFail; 5660 } 5661 5662 Val += 12; 5663 return MatchOperand_Success; 5664 } 5665 5666 if (isGFX10() && Str == "prim") { 5667 Val = 20; 5668 return MatchOperand_Success; 5669 } 5670 5671 if (Str.startswith("param")) { 5672 Str = Str.drop_front(5); 5673 if (Str.getAsInteger(10, Val)) 5674 return MatchOperand_ParseFail; 5675 5676 if (Val >= 32) { 5677 Error(getLoc(), "invalid exp target"); 5678 return MatchOperand_ParseFail; 5679 } 5680 5681 Val += 32; 5682 return MatchOperand_Success; 5683 } 5684 5685 if (Str.startswith("invalid_target_")) { 5686 Str = Str.drop_front(15); 5687 if (Str.getAsInteger(10, Val)) 5688 return MatchOperand_ParseFail; 5689 5690 Error(getLoc(), "invalid exp target"); 5691 return MatchOperand_ParseFail; 5692 } 5693 5694 return MatchOperand_NoMatch; 5695 } 5696 5697 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5698 uint8_t Val; 5699 StringRef Str = Parser.getTok().getString(); 5700 5701 auto Res = parseExpTgtImpl(Str, Val); 5702 if (Res != MatchOperand_Success) 5703 return Res; 5704 5705 SMLoc S = Parser.getTok().getLoc(); 5706 Parser.Lex(); 5707 5708 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5709 AMDGPUOperand::ImmTyExpTgt)); 5710 return MatchOperand_Success; 5711 } 5712 5713 //===----------------------------------------------------------------------===// 5714 // parser helpers 5715 //===----------------------------------------------------------------------===// 5716 5717 bool 5718 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5719 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5720 } 5721 5722 bool 5723 AMDGPUAsmParser::isId(const StringRef Id) const { 5724 return isId(getToken(), Id); 5725 } 5726 5727 bool 5728 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5729 return getTokenKind() == Kind; 5730 } 5731 5732 bool 5733 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5734 if (isId(Id)) { 5735 lex(); 5736 return true; 5737 } 5738 return false; 5739 } 5740 5741 bool 5742 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5743 if (isId(Id) && peekToken().is(Kind)) { 5744 lex(); 5745 lex(); 5746 return true; 5747 } 5748 return false; 5749 } 5750 5751 bool 5752 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5753 if (isToken(Kind)) { 5754 lex(); 5755 return true; 5756 } 5757 return false; 5758 } 5759 5760 bool 5761 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5762 const StringRef ErrMsg) { 5763 if (!trySkipToken(Kind)) { 5764 Error(getLoc(), ErrMsg); 5765 return false; 5766 } 5767 return true; 5768 } 5769 5770 bool 5771 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5772 return !getParser().parseAbsoluteExpression(Imm); 5773 } 5774 5775 bool 5776 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5777 SMLoc S = getLoc(); 5778 5779 const MCExpr *Expr; 5780 if (Parser.parseExpression(Expr)) 5781 return false; 5782 5783 int64_t IntVal; 5784 if (Expr->evaluateAsAbsolute(IntVal)) { 5785 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5786 } else { 5787 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5788 } 5789 return true; 5790 } 5791 5792 bool 5793 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5794 if (isToken(AsmToken::String)) { 5795 Val = getToken().getStringContents(); 5796 lex(); 5797 return true; 5798 } else { 5799 Error(getLoc(), ErrMsg); 5800 return false; 5801 } 5802 } 5803 5804 bool 5805 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5806 if (isToken(AsmToken::Identifier)) { 5807 Val = getTokenStr(); 5808 lex(); 5809 return true; 5810 } else { 5811 Error(getLoc(), ErrMsg); 5812 return false; 5813 } 5814 } 5815 5816 AsmToken 5817 AMDGPUAsmParser::getToken() const { 5818 return Parser.getTok(); 5819 } 5820 5821 AsmToken 5822 AMDGPUAsmParser::peekToken() { 5823 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5824 } 5825 5826 void 5827 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5828 auto TokCount = getLexer().peekTokens(Tokens); 5829 5830 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5831 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5832 } 5833 5834 AsmToken::TokenKind 5835 AMDGPUAsmParser::getTokenKind() const { 5836 return getLexer().getKind(); 5837 } 5838 5839 SMLoc 5840 AMDGPUAsmParser::getLoc() const { 5841 return getToken().getLoc(); 5842 } 5843 5844 StringRef 5845 AMDGPUAsmParser::getTokenStr() const { 5846 return getToken().getString(); 5847 } 5848 5849 void 5850 AMDGPUAsmParser::lex() { 5851 Parser.Lex(); 5852 } 5853 5854 //===----------------------------------------------------------------------===// 5855 // swizzle 5856 //===----------------------------------------------------------------------===// 5857 5858 LLVM_READNONE 5859 static unsigned 5860 encodeBitmaskPerm(const unsigned AndMask, 5861 const unsigned OrMask, 5862 const unsigned XorMask) { 5863 using namespace llvm::AMDGPU::Swizzle; 5864 5865 return BITMASK_PERM_ENC | 5866 (AndMask << BITMASK_AND_SHIFT) | 5867 (OrMask << BITMASK_OR_SHIFT) | 5868 (XorMask << BITMASK_XOR_SHIFT); 5869 } 5870 5871 bool 5872 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5873 const unsigned MinVal, 5874 const unsigned MaxVal, 5875 const StringRef ErrMsg) { 5876 for (unsigned i = 0; i < OpNum; ++i) { 5877 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5878 return false; 5879 } 5880 SMLoc ExprLoc = Parser.getTok().getLoc(); 5881 if (!parseExpr(Op[i])) { 5882 return false; 5883 } 5884 if (Op[i] < MinVal || Op[i] > MaxVal) { 5885 Error(ExprLoc, ErrMsg); 5886 return false; 5887 } 5888 } 5889 5890 return true; 5891 } 5892 5893 bool 5894 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5895 using namespace llvm::AMDGPU::Swizzle; 5896 5897 int64_t Lane[LANE_NUM]; 5898 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5899 "expected a 2-bit lane id")) { 5900 Imm = QUAD_PERM_ENC; 5901 for (unsigned I = 0; I < LANE_NUM; ++I) { 5902 Imm |= Lane[I] << (LANE_SHIFT * I); 5903 } 5904 return true; 5905 } 5906 return false; 5907 } 5908 5909 bool 5910 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5911 using namespace llvm::AMDGPU::Swizzle; 5912 5913 SMLoc S = Parser.getTok().getLoc(); 5914 int64_t GroupSize; 5915 int64_t LaneIdx; 5916 5917 if (!parseSwizzleOperands(1, &GroupSize, 5918 2, 32, 5919 "group size must be in the interval [2,32]")) { 5920 return false; 5921 } 5922 if (!isPowerOf2_64(GroupSize)) { 5923 Error(S, "group size must be a power of two"); 5924 return false; 5925 } 5926 if (parseSwizzleOperands(1, &LaneIdx, 5927 0, GroupSize - 1, 5928 "lane id must be in the interval [0,group size - 1]")) { 5929 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5930 return true; 5931 } 5932 return false; 5933 } 5934 5935 bool 5936 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5937 using namespace llvm::AMDGPU::Swizzle; 5938 5939 SMLoc S = Parser.getTok().getLoc(); 5940 int64_t GroupSize; 5941 5942 if (!parseSwizzleOperands(1, &GroupSize, 5943 2, 32, "group size must be in the interval [2,32]")) { 5944 return false; 5945 } 5946 if (!isPowerOf2_64(GroupSize)) { 5947 Error(S, "group size must be a power of two"); 5948 return false; 5949 } 5950 5951 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5952 return true; 5953 } 5954 5955 bool 5956 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5957 using namespace llvm::AMDGPU::Swizzle; 5958 5959 SMLoc S = Parser.getTok().getLoc(); 5960 int64_t GroupSize; 5961 5962 if (!parseSwizzleOperands(1, &GroupSize, 5963 1, 16, "group size must be in the interval [1,16]")) { 5964 return false; 5965 } 5966 if (!isPowerOf2_64(GroupSize)) { 5967 Error(S, "group size must be a power of two"); 5968 return false; 5969 } 5970 5971 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5972 return true; 5973 } 5974 5975 bool 5976 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5977 using namespace llvm::AMDGPU::Swizzle; 5978 5979 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5980 return false; 5981 } 5982 5983 StringRef Ctl; 5984 SMLoc StrLoc = Parser.getTok().getLoc(); 5985 if (!parseString(Ctl)) { 5986 return false; 5987 } 5988 if (Ctl.size() != BITMASK_WIDTH) { 5989 Error(StrLoc, "expected a 5-character mask"); 5990 return false; 5991 } 5992 5993 unsigned AndMask = 0; 5994 unsigned OrMask = 0; 5995 unsigned XorMask = 0; 5996 5997 for (size_t i = 0; i < Ctl.size(); ++i) { 5998 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5999 switch(Ctl[i]) { 6000 default: 6001 Error(StrLoc, "invalid mask"); 6002 return false; 6003 case '0': 6004 break; 6005 case '1': 6006 OrMask |= Mask; 6007 break; 6008 case 'p': 6009 AndMask |= Mask; 6010 break; 6011 case 'i': 6012 AndMask |= Mask; 6013 XorMask |= Mask; 6014 break; 6015 } 6016 } 6017 6018 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6019 return true; 6020 } 6021 6022 bool 6023 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6024 6025 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6026 6027 if (!parseExpr(Imm)) { 6028 return false; 6029 } 6030 if (!isUInt<16>(Imm)) { 6031 Error(OffsetLoc, "expected a 16-bit offset"); 6032 return false; 6033 } 6034 return true; 6035 } 6036 6037 bool 6038 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6039 using namespace llvm::AMDGPU::Swizzle; 6040 6041 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6042 6043 SMLoc ModeLoc = Parser.getTok().getLoc(); 6044 bool Ok = false; 6045 6046 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6047 Ok = parseSwizzleQuadPerm(Imm); 6048 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6049 Ok = parseSwizzleBitmaskPerm(Imm); 6050 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6051 Ok = parseSwizzleBroadcast(Imm); 6052 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6053 Ok = parseSwizzleSwap(Imm); 6054 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6055 Ok = parseSwizzleReverse(Imm); 6056 } else { 6057 Error(ModeLoc, "expected a swizzle mode"); 6058 } 6059 6060 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6061 } 6062 6063 return false; 6064 } 6065 6066 OperandMatchResultTy 6067 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6068 SMLoc S = Parser.getTok().getLoc(); 6069 int64_t Imm = 0; 6070 6071 if (trySkipId("offset")) { 6072 6073 bool Ok = false; 6074 if (skipToken(AsmToken::Colon, "expected a colon")) { 6075 if (trySkipId("swizzle")) { 6076 Ok = parseSwizzleMacro(Imm); 6077 } else { 6078 Ok = parseSwizzleOffset(Imm); 6079 } 6080 } 6081 6082 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6083 6084 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6085 } else { 6086 // Swizzle "offset" operand is optional. 6087 // If it is omitted, try parsing other optional operands. 6088 return parseOptionalOpr(Operands); 6089 } 6090 } 6091 6092 bool 6093 AMDGPUOperand::isSwizzle() const { 6094 return isImmTy(ImmTySwizzle); 6095 } 6096 6097 //===----------------------------------------------------------------------===// 6098 // VGPR Index Mode 6099 //===----------------------------------------------------------------------===// 6100 6101 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6102 6103 using namespace llvm::AMDGPU::VGPRIndexMode; 6104 6105 if (trySkipToken(AsmToken::RParen)) { 6106 return OFF; 6107 } 6108 6109 int64_t Imm = 0; 6110 6111 while (true) { 6112 unsigned Mode = 0; 6113 SMLoc S = Parser.getTok().getLoc(); 6114 6115 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6116 if (trySkipId(IdSymbolic[ModeId])) { 6117 Mode = 1 << ModeId; 6118 break; 6119 } 6120 } 6121 6122 if (Mode == 0) { 6123 Error(S, (Imm == 0)? 6124 "expected a VGPR index mode or a closing parenthesis" : 6125 "expected a VGPR index mode"); 6126 return UNDEF; 6127 } 6128 6129 if (Imm & Mode) { 6130 Error(S, "duplicate VGPR index mode"); 6131 return UNDEF; 6132 } 6133 Imm |= Mode; 6134 6135 if (trySkipToken(AsmToken::RParen)) 6136 break; 6137 if (!skipToken(AsmToken::Comma, 6138 "expected a comma or a closing parenthesis")) 6139 return UNDEF; 6140 } 6141 6142 return Imm; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6147 6148 using namespace llvm::AMDGPU::VGPRIndexMode; 6149 6150 int64_t Imm = 0; 6151 SMLoc S = Parser.getTok().getLoc(); 6152 6153 if (getLexer().getKind() == AsmToken::Identifier && 6154 Parser.getTok().getString() == "gpr_idx" && 6155 getLexer().peekTok().is(AsmToken::LParen)) { 6156 6157 Parser.Lex(); 6158 Parser.Lex(); 6159 6160 Imm = parseGPRIdxMacro(); 6161 if (Imm == UNDEF) 6162 return MatchOperand_ParseFail; 6163 6164 } else { 6165 if (getParser().parseAbsoluteExpression(Imm)) 6166 return MatchOperand_ParseFail; 6167 if (Imm < 0 || !isUInt<4>(Imm)) { 6168 Error(S, "invalid immediate: only 4-bit values are legal"); 6169 return MatchOperand_ParseFail; 6170 } 6171 } 6172 6173 Operands.push_back( 6174 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6175 return MatchOperand_Success; 6176 } 6177 6178 bool AMDGPUOperand::isGPRIdxMode() const { 6179 return isImmTy(ImmTyGprIdxMode); 6180 } 6181 6182 //===----------------------------------------------------------------------===// 6183 // sopp branch targets 6184 //===----------------------------------------------------------------------===// 6185 6186 OperandMatchResultTy 6187 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6188 6189 // Make sure we are not parsing something 6190 // that looks like a label or an expression but is not. 6191 // This will improve error messages. 6192 if (isRegister() || isModifier()) 6193 return MatchOperand_NoMatch; 6194 6195 if (!parseExpr(Operands)) 6196 return MatchOperand_ParseFail; 6197 6198 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6199 assert(Opr.isImm() || Opr.isExpr()); 6200 SMLoc Loc = Opr.getStartLoc(); 6201 6202 // Currently we do not support arbitrary expressions as branch targets. 6203 // Only labels and absolute expressions are accepted. 6204 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6205 Error(Loc, "expected an absolute expression or a label"); 6206 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6207 Error(Loc, "expected a 16-bit signed jump offset"); 6208 } 6209 6210 return MatchOperand_Success; 6211 } 6212 6213 //===----------------------------------------------------------------------===// 6214 // Boolean holding registers 6215 //===----------------------------------------------------------------------===// 6216 6217 OperandMatchResultTy 6218 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6219 return parseReg(Operands); 6220 } 6221 6222 //===----------------------------------------------------------------------===// 6223 // mubuf 6224 //===----------------------------------------------------------------------===// 6225 6226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6227 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6228 } 6229 6230 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6231 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6232 } 6233 6234 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6235 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6236 } 6237 6238 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6239 const OperandVector &Operands, 6240 bool IsAtomic, 6241 bool IsAtomicReturn, 6242 bool IsLds) { 6243 bool IsLdsOpcode = IsLds; 6244 bool HasLdsModifier = false; 6245 OptionalImmIndexMap OptionalIdx; 6246 assert(IsAtomicReturn ? IsAtomic : true); 6247 unsigned FirstOperandIdx = 1; 6248 6249 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6250 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6251 6252 // Add the register arguments 6253 if (Op.isReg()) { 6254 Op.addRegOperands(Inst, 1); 6255 // Insert a tied src for atomic return dst. 6256 // This cannot be postponed as subsequent calls to 6257 // addImmOperands rely on correct number of MC operands. 6258 if (IsAtomicReturn && i == FirstOperandIdx) 6259 Op.addRegOperands(Inst, 1); 6260 continue; 6261 } 6262 6263 // Handle the case where soffset is an immediate 6264 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6265 Op.addImmOperands(Inst, 1); 6266 continue; 6267 } 6268 6269 HasLdsModifier |= Op.isLDS(); 6270 6271 // Handle tokens like 'offen' which are sometimes hard-coded into the 6272 // asm string. There are no MCInst operands for these. 6273 if (Op.isToken()) { 6274 continue; 6275 } 6276 assert(Op.isImm()); 6277 6278 // Handle optional arguments 6279 OptionalIdx[Op.getImmTy()] = i; 6280 } 6281 6282 // This is a workaround for an llvm quirk which may result in an 6283 // incorrect instruction selection. Lds and non-lds versions of 6284 // MUBUF instructions are identical except that lds versions 6285 // have mandatory 'lds' modifier. However this modifier follows 6286 // optional modifiers and llvm asm matcher regards this 'lds' 6287 // modifier as an optional one. As a result, an lds version 6288 // of opcode may be selected even if it has no 'lds' modifier. 6289 if (IsLdsOpcode && !HasLdsModifier) { 6290 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6291 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6292 Inst.setOpcode(NoLdsOpcode); 6293 IsLdsOpcode = false; 6294 } 6295 } 6296 6297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6298 if (!IsAtomic) { // glc is hard-coded. 6299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6300 } 6301 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6302 6303 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6305 } 6306 6307 if (isGFX10()) 6308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6309 } 6310 6311 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6312 OptionalImmIndexMap OptionalIdx; 6313 6314 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6315 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6316 6317 // Add the register arguments 6318 if (Op.isReg()) { 6319 Op.addRegOperands(Inst, 1); 6320 continue; 6321 } 6322 6323 // Handle the case where soffset is an immediate 6324 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6325 Op.addImmOperands(Inst, 1); 6326 continue; 6327 } 6328 6329 // Handle tokens like 'offen' which are sometimes hard-coded into the 6330 // asm string. There are no MCInst operands for these. 6331 if (Op.isToken()) { 6332 continue; 6333 } 6334 assert(Op.isImm()); 6335 6336 // Handle optional arguments 6337 OptionalIdx[Op.getImmTy()] = i; 6338 } 6339 6340 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6341 AMDGPUOperand::ImmTyOffset); 6342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6345 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6346 6347 if (isGFX10()) 6348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6349 } 6350 6351 //===----------------------------------------------------------------------===// 6352 // mimg 6353 //===----------------------------------------------------------------------===// 6354 6355 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6356 bool IsAtomic) { 6357 unsigned I = 1; 6358 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6359 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6360 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6361 } 6362 6363 if (IsAtomic) { 6364 // Add src, same as dst 6365 assert(Desc.getNumDefs() == 1); 6366 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6367 } 6368 6369 OptionalImmIndexMap OptionalIdx; 6370 6371 for (unsigned E = Operands.size(); I != E; ++I) { 6372 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6373 6374 // Add the register arguments 6375 if (Op.isReg()) { 6376 Op.addRegOperands(Inst, 1); 6377 } else if (Op.isImmModifier()) { 6378 OptionalIdx[Op.getImmTy()] = I; 6379 } else if (!Op.isToken()) { 6380 llvm_unreachable("unexpected operand type"); 6381 } 6382 } 6383 6384 bool IsGFX10 = isGFX10(); 6385 6386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6387 if (IsGFX10) 6388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6390 if (IsGFX10) 6391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6395 if (IsGFX10) 6396 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6397 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6399 if (!IsGFX10) 6400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6402 } 6403 6404 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6405 cvtMIMG(Inst, Operands, true); 6406 } 6407 6408 //===----------------------------------------------------------------------===// 6409 // smrd 6410 //===----------------------------------------------------------------------===// 6411 6412 bool AMDGPUOperand::isSMRDOffset8() const { 6413 return isImm() && isUInt<8>(getImm()); 6414 } 6415 6416 bool AMDGPUOperand::isSMEMOffset() const { 6417 return isImm(); // Offset range is checked later by validator. 6418 } 6419 6420 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6421 // 32-bit literals are only supported on CI and we only want to use them 6422 // when the offset is > 8-bits. 6423 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6424 } 6425 6426 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6427 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6428 } 6429 6430 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6431 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6432 } 6433 6434 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6435 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6436 } 6437 6438 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6439 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6440 } 6441 6442 //===----------------------------------------------------------------------===// 6443 // vop3 6444 //===----------------------------------------------------------------------===// 6445 6446 static bool ConvertOmodMul(int64_t &Mul) { 6447 if (Mul != 1 && Mul != 2 && Mul != 4) 6448 return false; 6449 6450 Mul >>= 1; 6451 return true; 6452 } 6453 6454 static bool ConvertOmodDiv(int64_t &Div) { 6455 if (Div == 1) { 6456 Div = 0; 6457 return true; 6458 } 6459 6460 if (Div == 2) { 6461 Div = 3; 6462 return true; 6463 } 6464 6465 return false; 6466 } 6467 6468 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6469 if (BoundCtrl == 0) { 6470 BoundCtrl = 1; 6471 return true; 6472 } 6473 6474 if (BoundCtrl == -1) { 6475 BoundCtrl = 0; 6476 return true; 6477 } 6478 6479 return false; 6480 } 6481 6482 // Note: the order in this table matches the order of operands in AsmString. 6483 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6484 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6485 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6486 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6487 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6488 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6489 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6490 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6491 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6492 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6493 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6494 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6495 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6496 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6497 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6498 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6499 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6500 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6501 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6502 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6503 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6504 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6505 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6506 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6507 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6508 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6509 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6510 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6511 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6512 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6513 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6514 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6515 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6516 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6517 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6518 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6519 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6520 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6521 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6522 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6523 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6524 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6525 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6526 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6527 }; 6528 6529 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6530 6531 OperandMatchResultTy res = parseOptionalOpr(Operands); 6532 6533 // This is a hack to enable hardcoded mandatory operands which follow 6534 // optional operands. 6535 // 6536 // Current design assumes that all operands after the first optional operand 6537 // are also optional. However implementation of some instructions violates 6538 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6539 // 6540 // To alleviate this problem, we have to (implicitly) parse extra operands 6541 // to make sure autogenerated parser of custom operands never hit hardcoded 6542 // mandatory operands. 6543 6544 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6545 if (res != MatchOperand_Success || 6546 isToken(AsmToken::EndOfStatement)) 6547 break; 6548 6549 trySkipToken(AsmToken::Comma); 6550 res = parseOptionalOpr(Operands); 6551 } 6552 6553 return res; 6554 } 6555 6556 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6557 OperandMatchResultTy res; 6558 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6559 // try to parse any optional operand here 6560 if (Op.IsBit) { 6561 res = parseNamedBit(Op.Name, Operands, Op.Type); 6562 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6563 res = parseOModOperand(Operands); 6564 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6565 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6566 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6567 res = parseSDWASel(Operands, Op.Name, Op.Type); 6568 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6569 res = parseSDWADstUnused(Operands); 6570 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6571 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6572 Op.Type == AMDGPUOperand::ImmTyNegLo || 6573 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6574 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6575 Op.ConvertResult); 6576 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6577 res = parseDim(Operands); 6578 } else { 6579 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6580 } 6581 if (res != MatchOperand_NoMatch) { 6582 return res; 6583 } 6584 } 6585 return MatchOperand_NoMatch; 6586 } 6587 6588 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6589 StringRef Name = Parser.getTok().getString(); 6590 if (Name == "mul") { 6591 return parseIntWithPrefix("mul", Operands, 6592 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6593 } 6594 6595 if (Name == "div") { 6596 return parseIntWithPrefix("div", Operands, 6597 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6598 } 6599 6600 return MatchOperand_NoMatch; 6601 } 6602 6603 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6604 cvtVOP3P(Inst, Operands); 6605 6606 int Opc = Inst.getOpcode(); 6607 6608 int SrcNum; 6609 const int Ops[] = { AMDGPU::OpName::src0, 6610 AMDGPU::OpName::src1, 6611 AMDGPU::OpName::src2 }; 6612 for (SrcNum = 0; 6613 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6614 ++SrcNum); 6615 assert(SrcNum > 0); 6616 6617 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6618 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6619 6620 if ((OpSel & (1 << SrcNum)) != 0) { 6621 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6622 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6623 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6624 } 6625 } 6626 6627 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6628 // 1. This operand is input modifiers 6629 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6630 // 2. This is not last operand 6631 && Desc.NumOperands > (OpNum + 1) 6632 // 3. Next operand is register class 6633 && Desc.OpInfo[OpNum + 1].RegClass != -1 6634 // 4. Next register is not tied to any other operand 6635 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6636 } 6637 6638 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6639 { 6640 OptionalImmIndexMap OptionalIdx; 6641 unsigned Opc = Inst.getOpcode(); 6642 6643 unsigned I = 1; 6644 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6645 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6646 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6647 } 6648 6649 for (unsigned E = Operands.size(); I != E; ++I) { 6650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6651 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6652 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6653 } else if (Op.isInterpSlot() || 6654 Op.isInterpAttr() || 6655 Op.isAttrChan()) { 6656 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6657 } else if (Op.isImmModifier()) { 6658 OptionalIdx[Op.getImmTy()] = I; 6659 } else { 6660 llvm_unreachable("unhandled operand type"); 6661 } 6662 } 6663 6664 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6666 } 6667 6668 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6670 } 6671 6672 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6674 } 6675 } 6676 6677 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6678 OptionalImmIndexMap &OptionalIdx) { 6679 unsigned Opc = Inst.getOpcode(); 6680 6681 unsigned I = 1; 6682 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6683 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6684 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6685 } 6686 6687 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6688 // This instruction has src modifiers 6689 for (unsigned E = Operands.size(); I != E; ++I) { 6690 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6691 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6692 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6693 } else if (Op.isImmModifier()) { 6694 OptionalIdx[Op.getImmTy()] = I; 6695 } else if (Op.isRegOrImm()) { 6696 Op.addRegOrImmOperands(Inst, 1); 6697 } else { 6698 llvm_unreachable("unhandled operand type"); 6699 } 6700 } 6701 } else { 6702 // No src modifiers 6703 for (unsigned E = Operands.size(); I != E; ++I) { 6704 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6705 if (Op.isMod()) { 6706 OptionalIdx[Op.getImmTy()] = I; 6707 } else { 6708 Op.addRegOrImmOperands(Inst, 1); 6709 } 6710 } 6711 } 6712 6713 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6715 } 6716 6717 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6719 } 6720 6721 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6722 // it has src2 register operand that is tied to dst operand 6723 // we don't allow modifiers for this operand in assembler so src2_modifiers 6724 // should be 0. 6725 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6726 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6727 Opc == AMDGPU::V_MAC_F32_e64_vi || 6728 Opc == AMDGPU::V_MAC_F16_e64_vi || 6729 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6730 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6731 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6732 auto it = Inst.begin(); 6733 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6734 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6735 ++it; 6736 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6737 } 6738 } 6739 6740 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6741 OptionalImmIndexMap OptionalIdx; 6742 cvtVOP3(Inst, Operands, OptionalIdx); 6743 } 6744 6745 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6746 const OperandVector &Operands) { 6747 OptionalImmIndexMap OptIdx; 6748 const int Opc = Inst.getOpcode(); 6749 const MCInstrDesc &Desc = MII.get(Opc); 6750 6751 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6752 6753 cvtVOP3(Inst, Operands, OptIdx); 6754 6755 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6756 assert(!IsPacked); 6757 Inst.addOperand(Inst.getOperand(0)); 6758 } 6759 6760 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6761 // instruction, and then figure out where to actually put the modifiers 6762 6763 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6764 6765 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6766 if (OpSelHiIdx != -1) { 6767 int DefaultVal = IsPacked ? -1 : 0; 6768 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6769 DefaultVal); 6770 } 6771 6772 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6773 if (NegLoIdx != -1) { 6774 assert(IsPacked); 6775 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6776 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6777 } 6778 6779 const int Ops[] = { AMDGPU::OpName::src0, 6780 AMDGPU::OpName::src1, 6781 AMDGPU::OpName::src2 }; 6782 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6783 AMDGPU::OpName::src1_modifiers, 6784 AMDGPU::OpName::src2_modifiers }; 6785 6786 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6787 6788 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6789 unsigned OpSelHi = 0; 6790 unsigned NegLo = 0; 6791 unsigned NegHi = 0; 6792 6793 if (OpSelHiIdx != -1) { 6794 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6795 } 6796 6797 if (NegLoIdx != -1) { 6798 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6799 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6800 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6801 } 6802 6803 for (int J = 0; J < 3; ++J) { 6804 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6805 if (OpIdx == -1) 6806 break; 6807 6808 uint32_t ModVal = 0; 6809 6810 if ((OpSel & (1 << J)) != 0) 6811 ModVal |= SISrcMods::OP_SEL_0; 6812 6813 if ((OpSelHi & (1 << J)) != 0) 6814 ModVal |= SISrcMods::OP_SEL_1; 6815 6816 if ((NegLo & (1 << J)) != 0) 6817 ModVal |= SISrcMods::NEG; 6818 6819 if ((NegHi & (1 << J)) != 0) 6820 ModVal |= SISrcMods::NEG_HI; 6821 6822 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6823 6824 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6825 } 6826 } 6827 6828 //===----------------------------------------------------------------------===// 6829 // dpp 6830 //===----------------------------------------------------------------------===// 6831 6832 bool AMDGPUOperand::isDPP8() const { 6833 return isImmTy(ImmTyDPP8); 6834 } 6835 6836 bool AMDGPUOperand::isDPPCtrl() const { 6837 using namespace AMDGPU::DPP; 6838 6839 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6840 if (result) { 6841 int64_t Imm = getImm(); 6842 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6843 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6844 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6845 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6846 (Imm == DppCtrl::WAVE_SHL1) || 6847 (Imm == DppCtrl::WAVE_ROL1) || 6848 (Imm == DppCtrl::WAVE_SHR1) || 6849 (Imm == DppCtrl::WAVE_ROR1) || 6850 (Imm == DppCtrl::ROW_MIRROR) || 6851 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6852 (Imm == DppCtrl::BCAST15) || 6853 (Imm == DppCtrl::BCAST31) || 6854 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6855 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6856 } 6857 return false; 6858 } 6859 6860 //===----------------------------------------------------------------------===// 6861 // mAI 6862 //===----------------------------------------------------------------------===// 6863 6864 bool AMDGPUOperand::isBLGP() const { 6865 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6866 } 6867 6868 bool AMDGPUOperand::isCBSZ() const { 6869 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6870 } 6871 6872 bool AMDGPUOperand::isABID() const { 6873 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6874 } 6875 6876 bool AMDGPUOperand::isS16Imm() const { 6877 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6878 } 6879 6880 bool AMDGPUOperand::isU16Imm() const { 6881 return isImm() && isUInt<16>(getImm()); 6882 } 6883 6884 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6885 if (!isGFX10()) 6886 return MatchOperand_NoMatch; 6887 6888 SMLoc S = Parser.getTok().getLoc(); 6889 6890 if (getLexer().isNot(AsmToken::Identifier)) 6891 return MatchOperand_NoMatch; 6892 if (getLexer().getTok().getString() != "dim") 6893 return MatchOperand_NoMatch; 6894 6895 Parser.Lex(); 6896 if (getLexer().isNot(AsmToken::Colon)) 6897 return MatchOperand_ParseFail; 6898 6899 Parser.Lex(); 6900 6901 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6902 // integer. 6903 std::string Token; 6904 if (getLexer().is(AsmToken::Integer)) { 6905 SMLoc Loc = getLexer().getTok().getEndLoc(); 6906 Token = std::string(getLexer().getTok().getString()); 6907 Parser.Lex(); 6908 if (getLexer().getTok().getLoc() != Loc) 6909 return MatchOperand_ParseFail; 6910 } 6911 if (getLexer().isNot(AsmToken::Identifier)) 6912 return MatchOperand_ParseFail; 6913 Token += getLexer().getTok().getString(); 6914 6915 StringRef DimId = Token; 6916 if (DimId.startswith("SQ_RSRC_IMG_")) 6917 DimId = DimId.substr(12); 6918 6919 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6920 if (!DimInfo) 6921 return MatchOperand_ParseFail; 6922 6923 Parser.Lex(); 6924 6925 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6926 AMDGPUOperand::ImmTyDim)); 6927 return MatchOperand_Success; 6928 } 6929 6930 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6931 SMLoc S = Parser.getTok().getLoc(); 6932 StringRef Prefix; 6933 6934 if (getLexer().getKind() == AsmToken::Identifier) { 6935 Prefix = Parser.getTok().getString(); 6936 } else { 6937 return MatchOperand_NoMatch; 6938 } 6939 6940 if (Prefix != "dpp8") 6941 return parseDPPCtrl(Operands); 6942 if (!isGFX10()) 6943 return MatchOperand_NoMatch; 6944 6945 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6946 6947 int64_t Sels[8]; 6948 6949 Parser.Lex(); 6950 if (getLexer().isNot(AsmToken::Colon)) 6951 return MatchOperand_ParseFail; 6952 6953 Parser.Lex(); 6954 if (getLexer().isNot(AsmToken::LBrac)) 6955 return MatchOperand_ParseFail; 6956 6957 Parser.Lex(); 6958 if (getParser().parseAbsoluteExpression(Sels[0])) 6959 return MatchOperand_ParseFail; 6960 if (0 > Sels[0] || 7 < Sels[0]) 6961 return MatchOperand_ParseFail; 6962 6963 for (size_t i = 1; i < 8; ++i) { 6964 if (getLexer().isNot(AsmToken::Comma)) 6965 return MatchOperand_ParseFail; 6966 6967 Parser.Lex(); 6968 if (getParser().parseAbsoluteExpression(Sels[i])) 6969 return MatchOperand_ParseFail; 6970 if (0 > Sels[i] || 7 < Sels[i]) 6971 return MatchOperand_ParseFail; 6972 } 6973 6974 if (getLexer().isNot(AsmToken::RBrac)) 6975 return MatchOperand_ParseFail; 6976 Parser.Lex(); 6977 6978 unsigned DPP8 = 0; 6979 for (size_t i = 0; i < 8; ++i) 6980 DPP8 |= (Sels[i] << (i * 3)); 6981 6982 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6983 return MatchOperand_Success; 6984 } 6985 6986 OperandMatchResultTy 6987 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6988 using namespace AMDGPU::DPP; 6989 6990 SMLoc S = Parser.getTok().getLoc(); 6991 StringRef Prefix; 6992 int64_t Int; 6993 6994 if (getLexer().getKind() == AsmToken::Identifier) { 6995 Prefix = Parser.getTok().getString(); 6996 } else { 6997 return MatchOperand_NoMatch; 6998 } 6999 7000 if (Prefix == "row_mirror") { 7001 Int = DppCtrl::ROW_MIRROR; 7002 Parser.Lex(); 7003 } else if (Prefix == "row_half_mirror") { 7004 Int = DppCtrl::ROW_HALF_MIRROR; 7005 Parser.Lex(); 7006 } else { 7007 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7008 if (Prefix != "quad_perm" 7009 && Prefix != "row_shl" 7010 && Prefix != "row_shr" 7011 && Prefix != "row_ror" 7012 && Prefix != "wave_shl" 7013 && Prefix != "wave_rol" 7014 && Prefix != "wave_shr" 7015 && Prefix != "wave_ror" 7016 && Prefix != "row_bcast" 7017 && Prefix != "row_share" 7018 && Prefix != "row_xmask") { 7019 return MatchOperand_NoMatch; 7020 } 7021 7022 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7023 return MatchOperand_NoMatch; 7024 7025 if (!isVI() && !isGFX9() && 7026 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7027 Prefix == "wave_rol" || Prefix == "wave_ror" || 7028 Prefix == "row_bcast")) 7029 return MatchOperand_NoMatch; 7030 7031 Parser.Lex(); 7032 if (getLexer().isNot(AsmToken::Colon)) 7033 return MatchOperand_ParseFail; 7034 7035 if (Prefix == "quad_perm") { 7036 // quad_perm:[%d,%d,%d,%d] 7037 Parser.Lex(); 7038 if (getLexer().isNot(AsmToken::LBrac)) 7039 return MatchOperand_ParseFail; 7040 Parser.Lex(); 7041 7042 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7043 return MatchOperand_ParseFail; 7044 7045 for (int i = 0; i < 3; ++i) { 7046 if (getLexer().isNot(AsmToken::Comma)) 7047 return MatchOperand_ParseFail; 7048 Parser.Lex(); 7049 7050 int64_t Temp; 7051 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7052 return MatchOperand_ParseFail; 7053 const int shift = i*2 + 2; 7054 Int += (Temp << shift); 7055 } 7056 7057 if (getLexer().isNot(AsmToken::RBrac)) 7058 return MatchOperand_ParseFail; 7059 Parser.Lex(); 7060 } else { 7061 // sel:%d 7062 Parser.Lex(); 7063 if (getParser().parseAbsoluteExpression(Int)) 7064 return MatchOperand_ParseFail; 7065 7066 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7067 Int |= DppCtrl::ROW_SHL0; 7068 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7069 Int |= DppCtrl::ROW_SHR0; 7070 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7071 Int |= DppCtrl::ROW_ROR0; 7072 } else if (Prefix == "wave_shl" && 1 == Int) { 7073 Int = DppCtrl::WAVE_SHL1; 7074 } else if (Prefix == "wave_rol" && 1 == Int) { 7075 Int = DppCtrl::WAVE_ROL1; 7076 } else if (Prefix == "wave_shr" && 1 == Int) { 7077 Int = DppCtrl::WAVE_SHR1; 7078 } else if (Prefix == "wave_ror" && 1 == Int) { 7079 Int = DppCtrl::WAVE_ROR1; 7080 } else if (Prefix == "row_bcast") { 7081 if (Int == 15) { 7082 Int = DppCtrl::BCAST15; 7083 } else if (Int == 31) { 7084 Int = DppCtrl::BCAST31; 7085 } else { 7086 return MatchOperand_ParseFail; 7087 } 7088 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7089 Int |= DppCtrl::ROW_SHARE_FIRST; 7090 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7091 Int |= DppCtrl::ROW_XMASK_FIRST; 7092 } else { 7093 return MatchOperand_ParseFail; 7094 } 7095 } 7096 } 7097 7098 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7099 return MatchOperand_Success; 7100 } 7101 7102 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7103 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7104 } 7105 7106 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7107 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7108 } 7109 7110 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7111 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7112 } 7113 7114 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7115 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7116 } 7117 7118 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7119 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7120 } 7121 7122 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7123 OptionalImmIndexMap OptionalIdx; 7124 7125 unsigned I = 1; 7126 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7127 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7128 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7129 } 7130 7131 int Fi = 0; 7132 for (unsigned E = Operands.size(); I != E; ++I) { 7133 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7134 MCOI::TIED_TO); 7135 if (TiedTo != -1) { 7136 assert((unsigned)TiedTo < Inst.getNumOperands()); 7137 // handle tied old or src2 for MAC instructions 7138 Inst.addOperand(Inst.getOperand(TiedTo)); 7139 } 7140 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7141 // Add the register arguments 7142 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7143 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7144 // Skip it. 7145 continue; 7146 } 7147 7148 if (IsDPP8) { 7149 if (Op.isDPP8()) { 7150 Op.addImmOperands(Inst, 1); 7151 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7152 Op.addRegWithFPInputModsOperands(Inst, 2); 7153 } else if (Op.isFI()) { 7154 Fi = Op.getImm(); 7155 } else if (Op.isReg()) { 7156 Op.addRegOperands(Inst, 1); 7157 } else { 7158 llvm_unreachable("Invalid operand type"); 7159 } 7160 } else { 7161 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7162 Op.addRegWithFPInputModsOperands(Inst, 2); 7163 } else if (Op.isDPPCtrl()) { 7164 Op.addImmOperands(Inst, 1); 7165 } else if (Op.isImm()) { 7166 // Handle optional arguments 7167 OptionalIdx[Op.getImmTy()] = I; 7168 } else { 7169 llvm_unreachable("Invalid operand type"); 7170 } 7171 } 7172 } 7173 7174 if (IsDPP8) { 7175 using namespace llvm::AMDGPU::DPP; 7176 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7177 } else { 7178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7181 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7183 } 7184 } 7185 } 7186 7187 //===----------------------------------------------------------------------===// 7188 // sdwa 7189 //===----------------------------------------------------------------------===// 7190 7191 OperandMatchResultTy 7192 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7193 AMDGPUOperand::ImmTy Type) { 7194 using namespace llvm::AMDGPU::SDWA; 7195 7196 SMLoc S = Parser.getTok().getLoc(); 7197 StringRef Value; 7198 OperandMatchResultTy res; 7199 7200 res = parseStringWithPrefix(Prefix, Value); 7201 if (res != MatchOperand_Success) { 7202 return res; 7203 } 7204 7205 int64_t Int; 7206 Int = StringSwitch<int64_t>(Value) 7207 .Case("BYTE_0", SdwaSel::BYTE_0) 7208 .Case("BYTE_1", SdwaSel::BYTE_1) 7209 .Case("BYTE_2", SdwaSel::BYTE_2) 7210 .Case("BYTE_3", SdwaSel::BYTE_3) 7211 .Case("WORD_0", SdwaSel::WORD_0) 7212 .Case("WORD_1", SdwaSel::WORD_1) 7213 .Case("DWORD", SdwaSel::DWORD) 7214 .Default(0xffffffff); 7215 Parser.Lex(); // eat last token 7216 7217 if (Int == 0xffffffff) { 7218 return MatchOperand_ParseFail; 7219 } 7220 7221 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7222 return MatchOperand_Success; 7223 } 7224 7225 OperandMatchResultTy 7226 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7227 using namespace llvm::AMDGPU::SDWA; 7228 7229 SMLoc S = Parser.getTok().getLoc(); 7230 StringRef Value; 7231 OperandMatchResultTy res; 7232 7233 res = parseStringWithPrefix("dst_unused", Value); 7234 if (res != MatchOperand_Success) { 7235 return res; 7236 } 7237 7238 int64_t Int; 7239 Int = StringSwitch<int64_t>(Value) 7240 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7241 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7242 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7243 .Default(0xffffffff); 7244 Parser.Lex(); // eat last token 7245 7246 if (Int == 0xffffffff) { 7247 return MatchOperand_ParseFail; 7248 } 7249 7250 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7251 return MatchOperand_Success; 7252 } 7253 7254 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7255 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7256 } 7257 7258 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7259 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7260 } 7261 7262 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7263 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7264 } 7265 7266 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7267 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7268 } 7269 7270 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7271 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7272 } 7273 7274 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7275 uint64_t BasicInstType, 7276 bool SkipDstVcc, 7277 bool SkipSrcVcc) { 7278 using namespace llvm::AMDGPU::SDWA; 7279 7280 OptionalImmIndexMap OptionalIdx; 7281 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7282 bool SkippedVcc = false; 7283 7284 unsigned I = 1; 7285 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7286 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7287 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7288 } 7289 7290 for (unsigned E = Operands.size(); I != E; ++I) { 7291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7292 if (SkipVcc && !SkippedVcc && Op.isReg() && 7293 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7294 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7295 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7296 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7297 // Skip VCC only if we didn't skip it on previous iteration. 7298 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7299 if (BasicInstType == SIInstrFlags::VOP2 && 7300 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7301 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7302 SkippedVcc = true; 7303 continue; 7304 } else if (BasicInstType == SIInstrFlags::VOPC && 7305 Inst.getNumOperands() == 0) { 7306 SkippedVcc = true; 7307 continue; 7308 } 7309 } 7310 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7311 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7312 } else if (Op.isImm()) { 7313 // Handle optional arguments 7314 OptionalIdx[Op.getImmTy()] = I; 7315 } else { 7316 llvm_unreachable("Invalid operand type"); 7317 } 7318 SkippedVcc = false; 7319 } 7320 7321 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7322 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7323 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7324 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7325 switch (BasicInstType) { 7326 case SIInstrFlags::VOP1: 7327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7328 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7330 } 7331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7334 break; 7335 7336 case SIInstrFlags::VOP2: 7337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7338 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7339 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7340 } 7341 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7345 break; 7346 7347 case SIInstrFlags::VOPC: 7348 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7352 break; 7353 7354 default: 7355 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7356 } 7357 } 7358 7359 // special case v_mac_{f16, f32}: 7360 // it has src2 register operand that is tied to dst operand 7361 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7362 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7363 auto it = Inst.begin(); 7364 std::advance( 7365 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7366 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7367 } 7368 } 7369 7370 //===----------------------------------------------------------------------===// 7371 // mAI 7372 //===----------------------------------------------------------------------===// 7373 7374 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7375 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7376 } 7377 7378 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7379 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7380 } 7381 7382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7383 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7384 } 7385 7386 /// Force static initialization. 7387 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7388 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7389 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7390 } 7391 7392 #define GET_REGISTER_MATCHER 7393 #define GET_MATCHER_IMPLEMENTATION 7394 #define GET_MNEMONIC_SPELL_CHECKER 7395 #include "AMDGPUGenAsmMatcher.inc" 7396 7397 // This fuction should be defined after auto-generated include so that we have 7398 // MatchClassKind enum defined 7399 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7400 unsigned Kind) { 7401 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7402 // But MatchInstructionImpl() expects to meet token and fails to validate 7403 // operand. This method checks if we are given immediate operand but expect to 7404 // get corresponding token. 7405 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7406 switch (Kind) { 7407 case MCK_addr64: 7408 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7409 case MCK_gds: 7410 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7411 case MCK_lds: 7412 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7413 case MCK_glc: 7414 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7415 case MCK_idxen: 7416 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7417 case MCK_offen: 7418 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7419 case MCK_SSrcB32: 7420 // When operands have expression values, they will return true for isToken, 7421 // because it is not possible to distinguish between a token and an 7422 // expression at parse time. MatchInstructionImpl() will always try to 7423 // match an operand as a token, when isToken returns true, and when the 7424 // name of the expression is not a valid token, the match will fail, 7425 // so we need to handle it here. 7426 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7427 case MCK_SSrcF32: 7428 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7429 case MCK_SoppBrTarget: 7430 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7431 case MCK_VReg32OrOff: 7432 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7433 case MCK_InterpSlot: 7434 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7435 case MCK_Attr: 7436 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7437 case MCK_AttrChan: 7438 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7439 case MCK_ImmSMEMOffset: 7440 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7441 case MCK_SReg_64: 7442 case MCK_SReg_64_XEXEC: 7443 // Null is defined as a 32-bit register but 7444 // it should also be enabled with 64-bit operands. 7445 // The following code enables it for SReg_64 operands 7446 // used as source and destination. Remaining source 7447 // operands are handled in isInlinableImm. 7448 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7449 default: 7450 return Match_InvalidOperand; 7451 } 7452 } 7453 7454 //===----------------------------------------------------------------------===// 7455 // endpgm 7456 //===----------------------------------------------------------------------===// 7457 7458 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7459 SMLoc S = Parser.getTok().getLoc(); 7460 int64_t Imm = 0; 7461 7462 if (!parseExpr(Imm)) { 7463 // The operand is optional, if not present default to 0 7464 Imm = 0; 7465 } 7466 7467 if (!isUInt<16>(Imm)) { 7468 Error(S, "expected a 16-bit value"); 7469 return MatchOperand_ParseFail; 7470 } 7471 7472 Operands.push_back( 7473 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7474 return MatchOperand_Success; 7475 } 7476 7477 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7478