1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth); 1092 1093 bool isRegister(); 1094 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1095 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1096 void initializeGprCountSymbol(RegisterKind RegKind); 1097 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1098 unsigned RegWidth); 1099 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1100 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1101 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1102 bool IsGdsHardcoded); 1103 1104 public: 1105 enum AMDGPUMatchResultTy { 1106 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1107 }; 1108 enum OperandMode { 1109 OperandMode_Default, 1110 OperandMode_NSA, 1111 }; 1112 1113 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1114 1115 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1116 const MCInstrInfo &MII, 1117 const MCTargetOptions &Options) 1118 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1119 MCAsmParserExtension::Initialize(Parser); 1120 1121 if (getFeatureBits().none()) { 1122 // Set default features. 1123 copySTI().ToggleFeature("southern-islands"); 1124 } 1125 1126 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1127 1128 { 1129 // TODO: make those pre-defined variables read-only. 1130 // Currently there is none suitable machinery in the core llvm-mc for this. 1131 // MCSymbol::isRedefinable is intended for another purpose, and 1132 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1133 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1134 MCContext &Ctx = getContext(); 1135 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1136 MCSymbol *Sym = 1137 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1138 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1139 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1140 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1141 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1143 } else { 1144 MCSymbol *Sym = 1145 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1147 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1149 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1151 } 1152 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1153 initializeGprCountSymbol(IS_VGPR); 1154 initializeGprCountSymbol(IS_SGPR); 1155 } else 1156 KernelScope.initialize(getContext()); 1157 } 1158 } 1159 1160 bool hasXNACK() const { 1161 return AMDGPU::hasXNACK(getSTI()); 1162 } 1163 1164 bool hasMIMG_R128() const { 1165 return AMDGPU::hasMIMG_R128(getSTI()); 1166 } 1167 1168 bool hasPackedD16() const { 1169 return AMDGPU::hasPackedD16(getSTI()); 1170 } 1171 1172 bool hasGFX10A16() const { 1173 return AMDGPU::hasGFX10A16(getSTI()); 1174 } 1175 1176 bool isSI() const { 1177 return AMDGPU::isSI(getSTI()); 1178 } 1179 1180 bool isCI() const { 1181 return AMDGPU::isCI(getSTI()); 1182 } 1183 1184 bool isVI() const { 1185 return AMDGPU::isVI(getSTI()); 1186 } 1187 1188 bool isGFX9() const { 1189 return AMDGPU::isGFX9(getSTI()); 1190 } 1191 1192 bool isGFX10() const { 1193 return AMDGPU::isGFX10(getSTI()); 1194 } 1195 1196 bool isGFX10_BEncoding() const { 1197 return AMDGPU::isGFX10_BEncoding(getSTI()); 1198 } 1199 1200 bool hasInv2PiInlineImm() const { 1201 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1202 } 1203 1204 bool hasFlatOffsets() const { 1205 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1206 } 1207 1208 bool hasSGPR102_SGPR103() const { 1209 return !isVI() && !isGFX9(); 1210 } 1211 1212 bool hasSGPR104_SGPR105() const { 1213 return isGFX10(); 1214 } 1215 1216 bool hasIntClamp() const { 1217 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1218 } 1219 1220 AMDGPUTargetStreamer &getTargetStreamer() { 1221 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1222 return static_cast<AMDGPUTargetStreamer &>(TS); 1223 } 1224 1225 const MCRegisterInfo *getMRI() const { 1226 // We need this const_cast because for some reason getContext() is not const 1227 // in MCAsmParser. 1228 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1229 } 1230 1231 const MCInstrInfo *getMII() const { 1232 return &MII; 1233 } 1234 1235 const FeatureBitset &getFeatureBits() const { 1236 return getSTI().getFeatureBits(); 1237 } 1238 1239 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1240 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1241 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1242 1243 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1244 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1245 bool isForcedDPP() const { return ForcedDPP; } 1246 bool isForcedSDWA() const { return ForcedSDWA; } 1247 ArrayRef<unsigned> getMatchedVariants() const; 1248 1249 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1250 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1251 bool RestoreOnFailure); 1252 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1253 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1254 SMLoc &EndLoc) override; 1255 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1256 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1257 unsigned Kind) override; 1258 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1259 OperandVector &Operands, MCStreamer &Out, 1260 uint64_t &ErrorInfo, 1261 bool MatchingInlineAsm) override; 1262 bool ParseDirective(AsmToken DirectiveID) override; 1263 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1264 OperandMode Mode = OperandMode_Default); 1265 StringRef parseMnemonicSuffix(StringRef Name); 1266 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1267 SMLoc NameLoc, OperandVector &Operands) override; 1268 //bool ProcessInstruction(MCInst &Inst); 1269 1270 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1271 1272 OperandMatchResultTy 1273 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1274 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1275 bool (*ConvertResult)(int64_t &) = nullptr); 1276 1277 OperandMatchResultTy 1278 parseOperandArrayWithPrefix(const char *Prefix, 1279 OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t&) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseNamedBit(const char *Name, OperandVector &Operands, 1285 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1286 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1287 StringRef &Value); 1288 1289 bool isModifier(); 1290 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1291 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1292 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1293 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1294 bool parseSP3NegModifier(); 1295 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1296 OperandMatchResultTy parseReg(OperandVector &Operands); 1297 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1298 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1299 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1300 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1301 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1302 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1303 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1304 OperandMatchResultTy parseUfmt(int64_t &Format); 1305 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1306 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1307 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1308 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1309 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1310 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1311 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1312 1313 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1314 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1315 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1316 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1317 1318 bool parseCnt(int64_t &IntVal); 1319 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1320 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1321 1322 private: 1323 struct OperandInfoTy { 1324 int64_t Id; 1325 bool IsSymbolic = false; 1326 bool IsDefined = false; 1327 1328 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1329 }; 1330 1331 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1332 bool validateSendMsg(const OperandInfoTy &Msg, 1333 const OperandInfoTy &Op, 1334 const OperandInfoTy &Stream, 1335 const SMLoc Loc); 1336 1337 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1338 bool validateHwreg(const OperandInfoTy &HwReg, 1339 const int64_t Offset, 1340 const int64_t Width, 1341 const SMLoc Loc); 1342 1343 void errorExpTgt(); 1344 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1345 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1346 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1347 1348 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1349 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1350 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1351 bool validateSOPLiteral(const MCInst &Inst) const; 1352 bool validateConstantBusLimitations(const MCInst &Inst); 1353 bool validateEarlyClobberLimitations(const MCInst &Inst); 1354 bool validateIntClampSupported(const MCInst &Inst); 1355 bool validateMIMGAtomicDMask(const MCInst &Inst); 1356 bool validateMIMGGatherDMask(const MCInst &Inst); 1357 bool validateMovrels(const MCInst &Inst); 1358 bool validateMIMGDataSize(const MCInst &Inst); 1359 bool validateMIMGAddrSize(const MCInst &Inst); 1360 bool validateMIMGD16(const MCInst &Inst); 1361 bool validateMIMGDim(const MCInst &Inst); 1362 bool validateLdsDirect(const MCInst &Inst); 1363 bool validateOpSel(const MCInst &Inst); 1364 bool validateVccOperand(unsigned Reg) const; 1365 bool validateVOP3Literal(const MCInst &Inst) const; 1366 bool validateMAIAccWrite(const MCInst &Inst); 1367 unsigned getConstantBusLimit(unsigned Opcode) const; 1368 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1369 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1370 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1371 1372 bool isId(const StringRef Id) const; 1373 bool isId(const AsmToken &Token, const StringRef Id) const; 1374 bool isToken(const AsmToken::TokenKind Kind) const; 1375 bool trySkipId(const StringRef Id); 1376 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1377 bool trySkipToken(const AsmToken::TokenKind Kind); 1378 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1379 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1380 bool parseId(StringRef &Val, const StringRef ErrMsg); 1381 1382 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1383 AsmToken::TokenKind getTokenKind() const; 1384 bool parseExpr(int64_t &Imm); 1385 bool parseExpr(OperandVector &Operands); 1386 StringRef getTokenStr() const; 1387 AsmToken peekToken(); 1388 AsmToken getToken() const; 1389 SMLoc getLoc() const; 1390 void lex(); 1391 1392 public: 1393 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1394 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1395 1396 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1397 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1398 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1399 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1400 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1401 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1402 1403 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1404 const unsigned MinVal, 1405 const unsigned MaxVal, 1406 const StringRef ErrMsg); 1407 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1408 bool parseSwizzleOffset(int64_t &Imm); 1409 bool parseSwizzleMacro(int64_t &Imm); 1410 bool parseSwizzleQuadPerm(int64_t &Imm); 1411 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1412 bool parseSwizzleBroadcast(int64_t &Imm); 1413 bool parseSwizzleSwap(int64_t &Imm); 1414 bool parseSwizzleReverse(int64_t &Imm); 1415 1416 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1417 int64_t parseGPRIdxMacro(); 1418 1419 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1420 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1421 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1422 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1423 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1424 1425 AMDGPUOperand::Ptr defaultDLC() const; 1426 AMDGPUOperand::Ptr defaultGLC() const; 1427 AMDGPUOperand::Ptr defaultSLC() const; 1428 1429 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1430 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1431 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1432 AMDGPUOperand::Ptr defaultFlatOffset() const; 1433 1434 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1435 1436 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1437 OptionalImmIndexMap &OptionalIdx); 1438 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1439 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1440 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1441 1442 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1443 1444 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1445 bool IsAtomic = false); 1446 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1447 1448 OperandMatchResultTy parseDim(OperandVector &Operands); 1449 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1450 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1451 AMDGPUOperand::Ptr defaultRowMask() const; 1452 AMDGPUOperand::Ptr defaultBankMask() const; 1453 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1454 AMDGPUOperand::Ptr defaultFI() const; 1455 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1456 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1457 1458 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1459 AMDGPUOperand::ImmTy Type); 1460 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1461 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1462 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1463 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1464 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1465 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1466 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1467 uint64_t BasicInstType, 1468 bool SkipDstVcc = false, 1469 bool SkipSrcVcc = false); 1470 1471 AMDGPUOperand::Ptr defaultBLGP() const; 1472 AMDGPUOperand::Ptr defaultCBSZ() const; 1473 AMDGPUOperand::Ptr defaultABID() const; 1474 1475 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1476 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1477 }; 1478 1479 struct OptionalOperand { 1480 const char *Name; 1481 AMDGPUOperand::ImmTy Type; 1482 bool IsBit; 1483 bool (*ConvertResult)(int64_t&); 1484 }; 1485 1486 } // end anonymous namespace 1487 1488 // May be called with integer type with equivalent bitwidth. 1489 static const fltSemantics *getFltSemantics(unsigned Size) { 1490 switch (Size) { 1491 case 4: 1492 return &APFloat::IEEEsingle(); 1493 case 8: 1494 return &APFloat::IEEEdouble(); 1495 case 2: 1496 return &APFloat::IEEEhalf(); 1497 default: 1498 llvm_unreachable("unsupported fp type"); 1499 } 1500 } 1501 1502 static const fltSemantics *getFltSemantics(MVT VT) { 1503 return getFltSemantics(VT.getSizeInBits() / 8); 1504 } 1505 1506 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1507 switch (OperandType) { 1508 case AMDGPU::OPERAND_REG_IMM_INT32: 1509 case AMDGPU::OPERAND_REG_IMM_FP32: 1510 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1511 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1512 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1513 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1514 return &APFloat::IEEEsingle(); 1515 case AMDGPU::OPERAND_REG_IMM_INT64: 1516 case AMDGPU::OPERAND_REG_IMM_FP64: 1517 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1518 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1519 return &APFloat::IEEEdouble(); 1520 case AMDGPU::OPERAND_REG_IMM_INT16: 1521 case AMDGPU::OPERAND_REG_IMM_FP16: 1522 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1523 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1524 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1525 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1528 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1529 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1530 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1531 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1532 return &APFloat::IEEEhalf(); 1533 default: 1534 llvm_unreachable("unsupported fp type"); 1535 } 1536 } 1537 1538 //===----------------------------------------------------------------------===// 1539 // Operand 1540 //===----------------------------------------------------------------------===// 1541 1542 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1543 bool Lost; 1544 1545 // Convert literal to single precision 1546 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1547 APFloat::rmNearestTiesToEven, 1548 &Lost); 1549 // We allow precision lost but not overflow or underflow 1550 if (Status != APFloat::opOK && 1551 Lost && 1552 ((Status & APFloat::opOverflow) != 0 || 1553 (Status & APFloat::opUnderflow) != 0)) { 1554 return false; 1555 } 1556 1557 return true; 1558 } 1559 1560 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1561 return isUIntN(Size, Val) || isIntN(Size, Val); 1562 } 1563 1564 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1565 if (VT.getScalarType() == MVT::i16) { 1566 // FP immediate values are broken. 1567 return isInlinableIntLiteral(Val); 1568 } 1569 1570 // f16/v2f16 operands work correctly for all values. 1571 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1572 } 1573 1574 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1575 1576 // This is a hack to enable named inline values like 1577 // shared_base with both 32-bit and 64-bit operands. 1578 // Note that these values are defined as 1579 // 32-bit operands only. 1580 if (isInlineValue()) { 1581 return true; 1582 } 1583 1584 if (!isImmTy(ImmTyNone)) { 1585 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1586 return false; 1587 } 1588 // TODO: We should avoid using host float here. It would be better to 1589 // check the float bit values which is what a few other places do. 1590 // We've had bot failures before due to weird NaN support on mips hosts. 1591 1592 APInt Literal(64, Imm.Val); 1593 1594 if (Imm.IsFPImm) { // We got fp literal token 1595 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1596 return AMDGPU::isInlinableLiteral64(Imm.Val, 1597 AsmParser->hasInv2PiInlineImm()); 1598 } 1599 1600 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1601 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1602 return false; 1603 1604 if (type.getScalarSizeInBits() == 16) { 1605 return isInlineableLiteralOp16( 1606 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1607 type, AsmParser->hasInv2PiInlineImm()); 1608 } 1609 1610 // Check if single precision literal is inlinable 1611 return AMDGPU::isInlinableLiteral32( 1612 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1613 AsmParser->hasInv2PiInlineImm()); 1614 } 1615 1616 // We got int literal token. 1617 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1618 return AMDGPU::isInlinableLiteral64(Imm.Val, 1619 AsmParser->hasInv2PiInlineImm()); 1620 } 1621 1622 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1623 return false; 1624 } 1625 1626 if (type.getScalarSizeInBits() == 16) { 1627 return isInlineableLiteralOp16( 1628 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1629 type, AsmParser->hasInv2PiInlineImm()); 1630 } 1631 1632 return AMDGPU::isInlinableLiteral32( 1633 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1634 AsmParser->hasInv2PiInlineImm()); 1635 } 1636 1637 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1638 // Check that this immediate can be added as literal 1639 if (!isImmTy(ImmTyNone)) { 1640 return false; 1641 } 1642 1643 if (!Imm.IsFPImm) { 1644 // We got int literal token. 1645 1646 if (type == MVT::f64 && hasFPModifiers()) { 1647 // Cannot apply fp modifiers to int literals preserving the same semantics 1648 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1649 // disable these cases. 1650 return false; 1651 } 1652 1653 unsigned Size = type.getSizeInBits(); 1654 if (Size == 64) 1655 Size = 32; 1656 1657 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1658 // types. 1659 return isSafeTruncation(Imm.Val, Size); 1660 } 1661 1662 // We got fp literal token 1663 if (type == MVT::f64) { // Expected 64-bit fp operand 1664 // We would set low 64-bits of literal to zeroes but we accept this literals 1665 return true; 1666 } 1667 1668 if (type == MVT::i64) { // Expected 64-bit int operand 1669 // We don't allow fp literals in 64-bit integer instructions. It is 1670 // unclear how we should encode them. 1671 return false; 1672 } 1673 1674 // We allow fp literals with f16x2 operands assuming that the specified 1675 // literal goes into the lower half and the upper half is zero. We also 1676 // require that the literal may be losslesly converted to f16. 1677 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1678 (type == MVT::v2i16)? MVT::i16 : type; 1679 1680 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1681 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1682 } 1683 1684 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1685 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1686 } 1687 1688 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1689 if (AsmParser->isVI()) 1690 return isVReg32(); 1691 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1692 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1693 else 1694 return false; 1695 } 1696 1697 bool AMDGPUOperand::isSDWAFP16Operand() const { 1698 return isSDWAOperand(MVT::f16); 1699 } 1700 1701 bool AMDGPUOperand::isSDWAFP32Operand() const { 1702 return isSDWAOperand(MVT::f32); 1703 } 1704 1705 bool AMDGPUOperand::isSDWAInt16Operand() const { 1706 return isSDWAOperand(MVT::i16); 1707 } 1708 1709 bool AMDGPUOperand::isSDWAInt32Operand() const { 1710 return isSDWAOperand(MVT::i32); 1711 } 1712 1713 bool AMDGPUOperand::isBoolReg() const { 1714 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1715 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1716 } 1717 1718 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1719 { 1720 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1721 assert(Size == 2 || Size == 4 || Size == 8); 1722 1723 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1724 1725 if (Imm.Mods.Abs) { 1726 Val &= ~FpSignMask; 1727 } 1728 if (Imm.Mods.Neg) { 1729 Val ^= FpSignMask; 1730 } 1731 1732 return Val; 1733 } 1734 1735 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1736 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1737 Inst.getNumOperands())) { 1738 addLiteralImmOperand(Inst, Imm.Val, 1739 ApplyModifiers & 1740 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1741 } else { 1742 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1743 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1744 } 1745 } 1746 1747 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1748 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1749 auto OpNum = Inst.getNumOperands(); 1750 // Check that this operand accepts literals 1751 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1752 1753 if (ApplyModifiers) { 1754 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1755 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1756 Val = applyInputFPModifiers(Val, Size); 1757 } 1758 1759 APInt Literal(64, Val); 1760 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1761 1762 if (Imm.IsFPImm) { // We got fp literal token 1763 switch (OpTy) { 1764 case AMDGPU::OPERAND_REG_IMM_INT64: 1765 case AMDGPU::OPERAND_REG_IMM_FP64: 1766 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1767 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1768 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1769 AsmParser->hasInv2PiInlineImm())) { 1770 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1771 return; 1772 } 1773 1774 // Non-inlineable 1775 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1776 // For fp operands we check if low 32 bits are zeros 1777 if (Literal.getLoBits(32) != 0) { 1778 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1779 "Can't encode literal as exact 64-bit floating-point operand. " 1780 "Low 32-bits will be set to zero"); 1781 } 1782 1783 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1784 return; 1785 } 1786 1787 // We don't allow fp literals in 64-bit integer instructions. It is 1788 // unclear how we should encode them. This case should be checked earlier 1789 // in predicate methods (isLiteralImm()) 1790 llvm_unreachable("fp literal in 64-bit integer instruction."); 1791 1792 case AMDGPU::OPERAND_REG_IMM_INT32: 1793 case AMDGPU::OPERAND_REG_IMM_FP32: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1798 case AMDGPU::OPERAND_REG_IMM_INT16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1810 bool lost; 1811 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1812 // Convert literal to single precision 1813 FPLiteral.convert(*getOpFltSemantics(OpTy), 1814 APFloat::rmNearestTiesToEven, &lost); 1815 // We allow precision lost but not overflow or underflow. This should be 1816 // checked earlier in isLiteralImm() 1817 1818 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1819 Inst.addOperand(MCOperand::createImm(ImmVal)); 1820 return; 1821 } 1822 default: 1823 llvm_unreachable("invalid operand size"); 1824 } 1825 1826 return; 1827 } 1828 1829 // We got int literal token. 1830 // Only sign extend inline immediates. 1831 switch (OpTy) { 1832 case AMDGPU::OPERAND_REG_IMM_INT32: 1833 case AMDGPU::OPERAND_REG_IMM_FP32: 1834 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1835 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1836 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1838 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1839 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1840 if (isSafeTruncation(Val, 32) && 1841 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1842 AsmParser->hasInv2PiInlineImm())) { 1843 Inst.addOperand(MCOperand::createImm(Val)); 1844 return; 1845 } 1846 1847 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1848 return; 1849 1850 case AMDGPU::OPERAND_REG_IMM_INT64: 1851 case AMDGPU::OPERAND_REG_IMM_FP64: 1852 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1854 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1855 Inst.addOperand(MCOperand::createImm(Val)); 1856 return; 1857 } 1858 1859 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1860 return; 1861 1862 case AMDGPU::OPERAND_REG_IMM_INT16: 1863 case AMDGPU::OPERAND_REG_IMM_FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1868 if (isSafeTruncation(Val, 16) && 1869 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1870 AsmParser->hasInv2PiInlineImm())) { 1871 Inst.addOperand(MCOperand::createImm(Val)); 1872 return; 1873 } 1874 1875 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1876 return; 1877 1878 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1879 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1882 assert(isSafeTruncation(Val, 16)); 1883 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1884 AsmParser->hasInv2PiInlineImm())); 1885 1886 Inst.addOperand(MCOperand::createImm(Val)); 1887 return; 1888 } 1889 default: 1890 llvm_unreachable("invalid operand size"); 1891 } 1892 } 1893 1894 template <unsigned Bitwidth> 1895 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1896 APInt Literal(64, Imm.Val); 1897 1898 if (!Imm.IsFPImm) { 1899 // We got int literal token. 1900 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1901 return; 1902 } 1903 1904 bool Lost; 1905 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1906 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1907 APFloat::rmNearestTiesToEven, &Lost); 1908 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1909 } 1910 1911 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1912 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1913 } 1914 1915 static bool isInlineValue(unsigned Reg) { 1916 switch (Reg) { 1917 case AMDGPU::SRC_SHARED_BASE: 1918 case AMDGPU::SRC_SHARED_LIMIT: 1919 case AMDGPU::SRC_PRIVATE_BASE: 1920 case AMDGPU::SRC_PRIVATE_LIMIT: 1921 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1922 return true; 1923 case AMDGPU::SRC_VCCZ: 1924 case AMDGPU::SRC_EXECZ: 1925 case AMDGPU::SRC_SCC: 1926 return true; 1927 case AMDGPU::SGPR_NULL: 1928 return true; 1929 default: 1930 return false; 1931 } 1932 } 1933 1934 bool AMDGPUOperand::isInlineValue() const { 1935 return isRegKind() && ::isInlineValue(getReg()); 1936 } 1937 1938 //===----------------------------------------------------------------------===// 1939 // AsmParser 1940 //===----------------------------------------------------------------------===// 1941 1942 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1943 if (Is == IS_VGPR) { 1944 switch (RegWidth) { 1945 default: return -1; 1946 case 1: return AMDGPU::VGPR_32RegClassID; 1947 case 2: return AMDGPU::VReg_64RegClassID; 1948 case 3: return AMDGPU::VReg_96RegClassID; 1949 case 4: return AMDGPU::VReg_128RegClassID; 1950 case 5: return AMDGPU::VReg_160RegClassID; 1951 case 6: return AMDGPU::VReg_192RegClassID; 1952 case 8: return AMDGPU::VReg_256RegClassID; 1953 case 16: return AMDGPU::VReg_512RegClassID; 1954 case 32: return AMDGPU::VReg_1024RegClassID; 1955 } 1956 } else if (Is == IS_TTMP) { 1957 switch (RegWidth) { 1958 default: return -1; 1959 case 1: return AMDGPU::TTMP_32RegClassID; 1960 case 2: return AMDGPU::TTMP_64RegClassID; 1961 case 4: return AMDGPU::TTMP_128RegClassID; 1962 case 8: return AMDGPU::TTMP_256RegClassID; 1963 case 16: return AMDGPU::TTMP_512RegClassID; 1964 } 1965 } else if (Is == IS_SGPR) { 1966 switch (RegWidth) { 1967 default: return -1; 1968 case 1: return AMDGPU::SGPR_32RegClassID; 1969 case 2: return AMDGPU::SGPR_64RegClassID; 1970 case 3: return AMDGPU::SGPR_96RegClassID; 1971 case 4: return AMDGPU::SGPR_128RegClassID; 1972 case 5: return AMDGPU::SGPR_160RegClassID; 1973 case 6: return AMDGPU::SGPR_192RegClassID; 1974 case 8: return AMDGPU::SGPR_256RegClassID; 1975 case 16: return AMDGPU::SGPR_512RegClassID; 1976 } 1977 } else if (Is == IS_AGPR) { 1978 switch (RegWidth) { 1979 default: return -1; 1980 case 1: return AMDGPU::AGPR_32RegClassID; 1981 case 2: return AMDGPU::AReg_64RegClassID; 1982 case 3: return AMDGPU::AReg_96RegClassID; 1983 case 4: return AMDGPU::AReg_128RegClassID; 1984 case 5: return AMDGPU::AReg_160RegClassID; 1985 case 6: return AMDGPU::AReg_192RegClassID; 1986 case 8: return AMDGPU::AReg_256RegClassID; 1987 case 16: return AMDGPU::AReg_512RegClassID; 1988 case 32: return AMDGPU::AReg_1024RegClassID; 1989 } 1990 } 1991 return -1; 1992 } 1993 1994 static unsigned getSpecialRegForName(StringRef RegName) { 1995 return StringSwitch<unsigned>(RegName) 1996 .Case("exec", AMDGPU::EXEC) 1997 .Case("vcc", AMDGPU::VCC) 1998 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1999 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2000 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2001 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2002 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2003 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2004 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2005 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2006 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2007 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2008 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2009 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2010 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2011 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2012 .Case("m0", AMDGPU::M0) 2013 .Case("vccz", AMDGPU::SRC_VCCZ) 2014 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2015 .Case("execz", AMDGPU::SRC_EXECZ) 2016 .Case("src_execz", AMDGPU::SRC_EXECZ) 2017 .Case("scc", AMDGPU::SRC_SCC) 2018 .Case("src_scc", AMDGPU::SRC_SCC) 2019 .Case("tba", AMDGPU::TBA) 2020 .Case("tma", AMDGPU::TMA) 2021 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2022 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2023 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2024 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2025 .Case("vcc_lo", AMDGPU::VCC_LO) 2026 .Case("vcc_hi", AMDGPU::VCC_HI) 2027 .Case("exec_lo", AMDGPU::EXEC_LO) 2028 .Case("exec_hi", AMDGPU::EXEC_HI) 2029 .Case("tma_lo", AMDGPU::TMA_LO) 2030 .Case("tma_hi", AMDGPU::TMA_HI) 2031 .Case("tba_lo", AMDGPU::TBA_LO) 2032 .Case("tba_hi", AMDGPU::TBA_HI) 2033 .Case("pc", AMDGPU::PC_REG) 2034 .Case("null", AMDGPU::SGPR_NULL) 2035 .Default(AMDGPU::NoRegister); 2036 } 2037 2038 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2039 SMLoc &EndLoc, bool RestoreOnFailure) { 2040 auto R = parseRegister(); 2041 if (!R) return true; 2042 assert(R->isReg()); 2043 RegNo = R->getReg(); 2044 StartLoc = R->getStartLoc(); 2045 EndLoc = R->getEndLoc(); 2046 return false; 2047 } 2048 2049 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2050 SMLoc &EndLoc) { 2051 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2052 } 2053 2054 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2055 SMLoc &StartLoc, 2056 SMLoc &EndLoc) { 2057 bool Result = 2058 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2059 bool PendingErrors = getParser().hasPendingError(); 2060 getParser().clearPendingErrors(); 2061 if (PendingErrors) 2062 return MatchOperand_ParseFail; 2063 if (Result) 2064 return MatchOperand_NoMatch; 2065 return MatchOperand_Success; 2066 } 2067 2068 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2069 RegisterKind RegKind, unsigned Reg1) { 2070 switch (RegKind) { 2071 case IS_SPECIAL: 2072 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2073 Reg = AMDGPU::EXEC; 2074 RegWidth = 2; 2075 return true; 2076 } 2077 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2078 Reg = AMDGPU::FLAT_SCR; 2079 RegWidth = 2; 2080 return true; 2081 } 2082 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2083 Reg = AMDGPU::XNACK_MASK; 2084 RegWidth = 2; 2085 return true; 2086 } 2087 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2088 Reg = AMDGPU::VCC; 2089 RegWidth = 2; 2090 return true; 2091 } 2092 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2093 Reg = AMDGPU::TBA; 2094 RegWidth = 2; 2095 return true; 2096 } 2097 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2098 Reg = AMDGPU::TMA; 2099 RegWidth = 2; 2100 return true; 2101 } 2102 return false; 2103 case IS_VGPR: 2104 case IS_SGPR: 2105 case IS_AGPR: 2106 case IS_TTMP: 2107 if (Reg1 != Reg + RegWidth) { 2108 return false; 2109 } 2110 RegWidth++; 2111 return true; 2112 default: 2113 llvm_unreachable("unexpected register kind"); 2114 } 2115 } 2116 2117 struct RegInfo { 2118 StringLiteral Name; 2119 RegisterKind Kind; 2120 }; 2121 2122 static constexpr RegInfo RegularRegisters[] = { 2123 {{"v"}, IS_VGPR}, 2124 {{"s"}, IS_SGPR}, 2125 {{"ttmp"}, IS_TTMP}, 2126 {{"acc"}, IS_AGPR}, 2127 {{"a"}, IS_AGPR}, 2128 }; 2129 2130 static bool isRegularReg(RegisterKind Kind) { 2131 return Kind == IS_VGPR || 2132 Kind == IS_SGPR || 2133 Kind == IS_TTMP || 2134 Kind == IS_AGPR; 2135 } 2136 2137 static const RegInfo* getRegularRegInfo(StringRef Str) { 2138 for (const RegInfo &Reg : RegularRegisters) 2139 if (Str.startswith(Reg.Name)) 2140 return &Reg; 2141 return nullptr; 2142 } 2143 2144 static bool getRegNum(StringRef Str, unsigned& Num) { 2145 return !Str.getAsInteger(10, Num); 2146 } 2147 2148 bool 2149 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2150 const AsmToken &NextToken) const { 2151 2152 // A list of consecutive registers: [s0,s1,s2,s3] 2153 if (Token.is(AsmToken::LBrac)) 2154 return true; 2155 2156 if (!Token.is(AsmToken::Identifier)) 2157 return false; 2158 2159 // A single register like s0 or a range of registers like s[0:1] 2160 2161 StringRef Str = Token.getString(); 2162 const RegInfo *Reg = getRegularRegInfo(Str); 2163 if (Reg) { 2164 StringRef RegName = Reg->Name; 2165 StringRef RegSuffix = Str.substr(RegName.size()); 2166 if (!RegSuffix.empty()) { 2167 unsigned Num; 2168 // A single register with an index: rXX 2169 if (getRegNum(RegSuffix, Num)) 2170 return true; 2171 } else { 2172 // A range of registers: r[XX:YY]. 2173 if (NextToken.is(AsmToken::LBrac)) 2174 return true; 2175 } 2176 } 2177 2178 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2179 } 2180 2181 bool 2182 AMDGPUAsmParser::isRegister() 2183 { 2184 return isRegister(getToken(), peekToken()); 2185 } 2186 2187 unsigned 2188 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2189 unsigned RegNum, 2190 unsigned RegWidth) { 2191 2192 assert(isRegularReg(RegKind)); 2193 2194 unsigned AlignSize = 1; 2195 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2196 // SGPR and TTMP registers must be aligned. 2197 // Max required alignment is 4 dwords. 2198 AlignSize = std::min(RegWidth, 4u); 2199 } 2200 2201 if (RegNum % AlignSize != 0) 2202 return AMDGPU::NoRegister; 2203 2204 unsigned RegIdx = RegNum / AlignSize; 2205 int RCID = getRegClass(RegKind, RegWidth); 2206 if (RCID == -1) 2207 return AMDGPU::NoRegister; 2208 2209 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2210 const MCRegisterClass RC = TRI->getRegClass(RCID); 2211 if (RegIdx >= RC.getNumRegs()) 2212 return AMDGPU::NoRegister; 2213 2214 return RC.getRegister(RegIdx); 2215 } 2216 2217 bool 2218 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2219 int64_t RegLo, RegHi; 2220 if (!trySkipToken(AsmToken::LBrac)) 2221 return false; 2222 2223 if (!parseExpr(RegLo)) 2224 return false; 2225 2226 if (trySkipToken(AsmToken::Colon)) { 2227 if (!parseExpr(RegHi)) 2228 return false; 2229 } else { 2230 RegHi = RegLo; 2231 } 2232 2233 if (!trySkipToken(AsmToken::RBrac)) 2234 return false; 2235 2236 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2237 return false; 2238 2239 Num = static_cast<unsigned>(RegLo); 2240 Width = (RegHi - RegLo) + 1; 2241 return true; 2242 } 2243 2244 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2245 unsigned &RegNum, unsigned &RegWidth, 2246 SmallVectorImpl<AsmToken> &Tokens) { 2247 assert(isToken(AsmToken::Identifier)); 2248 unsigned Reg = getSpecialRegForName(getTokenStr()); 2249 if (Reg) { 2250 RegNum = 0; 2251 RegWidth = 1; 2252 RegKind = IS_SPECIAL; 2253 Tokens.push_back(getToken()); 2254 lex(); // skip register name 2255 } 2256 return Reg; 2257 } 2258 2259 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2260 unsigned &RegNum, unsigned &RegWidth, 2261 SmallVectorImpl<AsmToken> &Tokens) { 2262 assert(isToken(AsmToken::Identifier)); 2263 StringRef RegName = getTokenStr(); 2264 2265 const RegInfo *RI = getRegularRegInfo(RegName); 2266 if (!RI) 2267 return AMDGPU::NoRegister; 2268 Tokens.push_back(getToken()); 2269 lex(); // skip register name 2270 2271 RegKind = RI->Kind; 2272 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2273 if (!RegSuffix.empty()) { 2274 // Single 32-bit register: vXX. 2275 if (!getRegNum(RegSuffix, RegNum)) 2276 return AMDGPU::NoRegister; 2277 RegWidth = 1; 2278 } else { 2279 // Range of registers: v[XX:YY]. ":YY" is optional. 2280 if (!ParseRegRange(RegNum, RegWidth)) 2281 return AMDGPU::NoRegister; 2282 } 2283 2284 return getRegularReg(RegKind, RegNum, RegWidth); 2285 } 2286 2287 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2288 unsigned &RegWidth, 2289 SmallVectorImpl<AsmToken> &Tokens) { 2290 unsigned Reg = AMDGPU::NoRegister; 2291 2292 if (!trySkipToken(AsmToken::LBrac)) 2293 return AMDGPU::NoRegister; 2294 2295 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2296 2297 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2298 return AMDGPU::NoRegister; 2299 if (RegWidth != 1) 2300 return AMDGPU::NoRegister; 2301 2302 for (; trySkipToken(AsmToken::Comma); ) { 2303 RegisterKind NextRegKind; 2304 unsigned NextReg, NextRegNum, NextRegWidth; 2305 2306 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2307 Tokens)) 2308 return AMDGPU::NoRegister; 2309 if (NextRegWidth != 1) 2310 return AMDGPU::NoRegister; 2311 if (NextRegKind != RegKind) 2312 return AMDGPU::NoRegister; 2313 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2314 return AMDGPU::NoRegister; 2315 } 2316 2317 if (!trySkipToken(AsmToken::RBrac)) 2318 return AMDGPU::NoRegister; 2319 2320 if (isRegularReg(RegKind)) 2321 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2322 2323 return Reg; 2324 } 2325 2326 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2327 unsigned &RegNum, unsigned &RegWidth, 2328 SmallVectorImpl<AsmToken> &Tokens) { 2329 Reg = AMDGPU::NoRegister; 2330 2331 if (isToken(AsmToken::Identifier)) { 2332 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2333 if (Reg == AMDGPU::NoRegister) 2334 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2335 } else { 2336 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2337 } 2338 2339 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2340 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2341 } 2342 2343 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2344 unsigned &RegNum, unsigned &RegWidth, 2345 bool RestoreOnFailure) { 2346 Reg = AMDGPU::NoRegister; 2347 2348 SmallVector<AsmToken, 1> Tokens; 2349 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2350 if (RestoreOnFailure) { 2351 while (!Tokens.empty()) { 2352 getLexer().UnLex(Tokens.pop_back_val()); 2353 } 2354 } 2355 return true; 2356 } 2357 return false; 2358 } 2359 2360 Optional<StringRef> 2361 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2362 switch (RegKind) { 2363 case IS_VGPR: 2364 return StringRef(".amdgcn.next_free_vgpr"); 2365 case IS_SGPR: 2366 return StringRef(".amdgcn.next_free_sgpr"); 2367 default: 2368 return None; 2369 } 2370 } 2371 2372 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2373 auto SymbolName = getGprCountSymbolName(RegKind); 2374 assert(SymbolName && "initializing invalid register kind"); 2375 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2376 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2377 } 2378 2379 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2380 unsigned DwordRegIndex, 2381 unsigned RegWidth) { 2382 // Symbols are only defined for GCN targets 2383 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2384 return true; 2385 2386 auto SymbolName = getGprCountSymbolName(RegKind); 2387 if (!SymbolName) 2388 return true; 2389 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2390 2391 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2392 int64_t OldCount; 2393 2394 if (!Sym->isVariable()) 2395 return !Error(getParser().getTok().getLoc(), 2396 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2397 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2398 return !Error( 2399 getParser().getTok().getLoc(), 2400 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2401 2402 if (OldCount <= NewMax) 2403 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2404 2405 return true; 2406 } 2407 2408 std::unique_ptr<AMDGPUOperand> 2409 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2410 const auto &Tok = Parser.getTok(); 2411 SMLoc StartLoc = Tok.getLoc(); 2412 SMLoc EndLoc = Tok.getEndLoc(); 2413 RegisterKind RegKind; 2414 unsigned Reg, RegNum, RegWidth; 2415 2416 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2417 //FIXME: improve error messages (bug 41303). 2418 Error(StartLoc, "not a valid operand."); 2419 return nullptr; 2420 } 2421 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2422 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2423 return nullptr; 2424 } else 2425 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2426 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2427 } 2428 2429 OperandMatchResultTy 2430 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2431 // TODO: add syntactic sugar for 1/(2*PI) 2432 2433 assert(!isRegister()); 2434 assert(!isModifier()); 2435 2436 const auto& Tok = getToken(); 2437 const auto& NextTok = peekToken(); 2438 bool IsReal = Tok.is(AsmToken::Real); 2439 SMLoc S = getLoc(); 2440 bool Negate = false; 2441 2442 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2443 lex(); 2444 IsReal = true; 2445 Negate = true; 2446 } 2447 2448 if (IsReal) { 2449 // Floating-point expressions are not supported. 2450 // Can only allow floating-point literals with an 2451 // optional sign. 2452 2453 StringRef Num = getTokenStr(); 2454 lex(); 2455 2456 APFloat RealVal(APFloat::IEEEdouble()); 2457 auto roundMode = APFloat::rmNearestTiesToEven; 2458 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2459 return MatchOperand_ParseFail; 2460 } 2461 if (Negate) 2462 RealVal.changeSign(); 2463 2464 Operands.push_back( 2465 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2466 AMDGPUOperand::ImmTyNone, true)); 2467 2468 return MatchOperand_Success; 2469 2470 } else { 2471 int64_t IntVal; 2472 const MCExpr *Expr; 2473 SMLoc S = getLoc(); 2474 2475 if (HasSP3AbsModifier) { 2476 // This is a workaround for handling expressions 2477 // as arguments of SP3 'abs' modifier, for example: 2478 // |1.0| 2479 // |-1| 2480 // |1+x| 2481 // This syntax is not compatible with syntax of standard 2482 // MC expressions (due to the trailing '|'). 2483 SMLoc EndLoc; 2484 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2485 return MatchOperand_ParseFail; 2486 } else { 2487 if (Parser.parseExpression(Expr)) 2488 return MatchOperand_ParseFail; 2489 } 2490 2491 if (Expr->evaluateAsAbsolute(IntVal)) { 2492 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2493 } else { 2494 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2495 } 2496 2497 return MatchOperand_Success; 2498 } 2499 2500 return MatchOperand_NoMatch; 2501 } 2502 2503 OperandMatchResultTy 2504 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2505 if (!isRegister()) 2506 return MatchOperand_NoMatch; 2507 2508 if (auto R = parseRegister()) { 2509 assert(R->isReg()); 2510 Operands.push_back(std::move(R)); 2511 return MatchOperand_Success; 2512 } 2513 return MatchOperand_ParseFail; 2514 } 2515 2516 OperandMatchResultTy 2517 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2518 auto res = parseReg(Operands); 2519 if (res != MatchOperand_NoMatch) { 2520 return res; 2521 } else if (isModifier()) { 2522 return MatchOperand_NoMatch; 2523 } else { 2524 return parseImm(Operands, HasSP3AbsMod); 2525 } 2526 } 2527 2528 bool 2529 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2530 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2531 const auto &str = Token.getString(); 2532 return str == "abs" || str == "neg" || str == "sext"; 2533 } 2534 return false; 2535 } 2536 2537 bool 2538 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2539 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2540 } 2541 2542 bool 2543 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2544 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2545 } 2546 2547 bool 2548 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2549 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2550 } 2551 2552 // Check if this is an operand modifier or an opcode modifier 2553 // which may look like an expression but it is not. We should 2554 // avoid parsing these modifiers as expressions. Currently 2555 // recognized sequences are: 2556 // |...| 2557 // abs(...) 2558 // neg(...) 2559 // sext(...) 2560 // -reg 2561 // -|...| 2562 // -abs(...) 2563 // name:... 2564 // Note that simple opcode modifiers like 'gds' may be parsed as 2565 // expressions; this is a special case. See getExpressionAsToken. 2566 // 2567 bool 2568 AMDGPUAsmParser::isModifier() { 2569 2570 AsmToken Tok = getToken(); 2571 AsmToken NextToken[2]; 2572 peekTokens(NextToken); 2573 2574 return isOperandModifier(Tok, NextToken[0]) || 2575 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2576 isOpcodeModifierWithVal(Tok, NextToken[0]); 2577 } 2578 2579 // Check if the current token is an SP3 'neg' modifier. 2580 // Currently this modifier is allowed in the following context: 2581 // 2582 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2583 // 2. Before an 'abs' modifier: -abs(...) 2584 // 3. Before an SP3 'abs' modifier: -|...| 2585 // 2586 // In all other cases "-" is handled as a part 2587 // of an expression that follows the sign. 2588 // 2589 // Note: When "-" is followed by an integer literal, 2590 // this is interpreted as integer negation rather 2591 // than a floating-point NEG modifier applied to N. 2592 // Beside being contr-intuitive, such use of floating-point 2593 // NEG modifier would have resulted in different meaning 2594 // of integer literals used with VOP1/2/C and VOP3, 2595 // for example: 2596 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2597 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2598 // Negative fp literals with preceding "-" are 2599 // handled likewise for unifomtity 2600 // 2601 bool 2602 AMDGPUAsmParser::parseSP3NegModifier() { 2603 2604 AsmToken NextToken[2]; 2605 peekTokens(NextToken); 2606 2607 if (isToken(AsmToken::Minus) && 2608 (isRegister(NextToken[0], NextToken[1]) || 2609 NextToken[0].is(AsmToken::Pipe) || 2610 isId(NextToken[0], "abs"))) { 2611 lex(); 2612 return true; 2613 } 2614 2615 return false; 2616 } 2617 2618 OperandMatchResultTy 2619 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2620 bool AllowImm) { 2621 bool Neg, SP3Neg; 2622 bool Abs, SP3Abs; 2623 SMLoc Loc; 2624 2625 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2626 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2627 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2628 return MatchOperand_ParseFail; 2629 } 2630 2631 SP3Neg = parseSP3NegModifier(); 2632 2633 Loc = getLoc(); 2634 Neg = trySkipId("neg"); 2635 if (Neg && SP3Neg) { 2636 Error(Loc, "expected register or immediate"); 2637 return MatchOperand_ParseFail; 2638 } 2639 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2640 return MatchOperand_ParseFail; 2641 2642 Abs = trySkipId("abs"); 2643 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2644 return MatchOperand_ParseFail; 2645 2646 Loc = getLoc(); 2647 SP3Abs = trySkipToken(AsmToken::Pipe); 2648 if (Abs && SP3Abs) { 2649 Error(Loc, "expected register or immediate"); 2650 return MatchOperand_ParseFail; 2651 } 2652 2653 OperandMatchResultTy Res; 2654 if (AllowImm) { 2655 Res = parseRegOrImm(Operands, SP3Abs); 2656 } else { 2657 Res = parseReg(Operands); 2658 } 2659 if (Res != MatchOperand_Success) { 2660 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2661 } 2662 2663 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2664 return MatchOperand_ParseFail; 2665 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2666 return MatchOperand_ParseFail; 2667 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2668 return MatchOperand_ParseFail; 2669 2670 AMDGPUOperand::Modifiers Mods; 2671 Mods.Abs = Abs || SP3Abs; 2672 Mods.Neg = Neg || SP3Neg; 2673 2674 if (Mods.hasFPModifiers()) { 2675 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2676 if (Op.isExpr()) { 2677 Error(Op.getStartLoc(), "expected an absolute expression"); 2678 return MatchOperand_ParseFail; 2679 } 2680 Op.setModifiers(Mods); 2681 } 2682 return MatchOperand_Success; 2683 } 2684 2685 OperandMatchResultTy 2686 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2687 bool AllowImm) { 2688 bool Sext = trySkipId("sext"); 2689 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2690 return MatchOperand_ParseFail; 2691 2692 OperandMatchResultTy Res; 2693 if (AllowImm) { 2694 Res = parseRegOrImm(Operands); 2695 } else { 2696 Res = parseReg(Operands); 2697 } 2698 if (Res != MatchOperand_Success) { 2699 return Sext? MatchOperand_ParseFail : Res; 2700 } 2701 2702 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2703 return MatchOperand_ParseFail; 2704 2705 AMDGPUOperand::Modifiers Mods; 2706 Mods.Sext = Sext; 2707 2708 if (Mods.hasIntModifiers()) { 2709 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2710 if (Op.isExpr()) { 2711 Error(Op.getStartLoc(), "expected an absolute expression"); 2712 return MatchOperand_ParseFail; 2713 } 2714 Op.setModifiers(Mods); 2715 } 2716 2717 return MatchOperand_Success; 2718 } 2719 2720 OperandMatchResultTy 2721 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2722 return parseRegOrImmWithFPInputMods(Operands, false); 2723 } 2724 2725 OperandMatchResultTy 2726 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2727 return parseRegOrImmWithIntInputMods(Operands, false); 2728 } 2729 2730 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2731 auto Loc = getLoc(); 2732 if (trySkipId("off")) { 2733 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2734 AMDGPUOperand::ImmTyOff, false)); 2735 return MatchOperand_Success; 2736 } 2737 2738 if (!isRegister()) 2739 return MatchOperand_NoMatch; 2740 2741 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2742 if (Reg) { 2743 Operands.push_back(std::move(Reg)); 2744 return MatchOperand_Success; 2745 } 2746 2747 return MatchOperand_ParseFail; 2748 2749 } 2750 2751 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2752 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2753 2754 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2755 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2756 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2757 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2758 return Match_InvalidOperand; 2759 2760 if ((TSFlags & SIInstrFlags::VOP3) && 2761 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2762 getForcedEncodingSize() != 64) 2763 return Match_PreferE32; 2764 2765 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2766 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2767 // v_mac_f32/16 allow only dst_sel == DWORD; 2768 auto OpNum = 2769 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2770 const auto &Op = Inst.getOperand(OpNum); 2771 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2772 return Match_InvalidOperand; 2773 } 2774 } 2775 2776 return Match_Success; 2777 } 2778 2779 // What asm variants we should check 2780 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2781 if (getForcedEncodingSize() == 32) { 2782 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2783 return makeArrayRef(Variants); 2784 } 2785 2786 if (isForcedVOP3()) { 2787 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2788 return makeArrayRef(Variants); 2789 } 2790 2791 if (isForcedSDWA()) { 2792 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2793 AMDGPUAsmVariants::SDWA9}; 2794 return makeArrayRef(Variants); 2795 } 2796 2797 if (isForcedDPP()) { 2798 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2799 return makeArrayRef(Variants); 2800 } 2801 2802 static const unsigned Variants[] = { 2803 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2804 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2805 }; 2806 2807 return makeArrayRef(Variants); 2808 } 2809 2810 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2811 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2812 const unsigned Num = Desc.getNumImplicitUses(); 2813 for (unsigned i = 0; i < Num; ++i) { 2814 unsigned Reg = Desc.ImplicitUses[i]; 2815 switch (Reg) { 2816 case AMDGPU::FLAT_SCR: 2817 case AMDGPU::VCC: 2818 case AMDGPU::VCC_LO: 2819 case AMDGPU::VCC_HI: 2820 case AMDGPU::M0: 2821 return Reg; 2822 default: 2823 break; 2824 } 2825 } 2826 return AMDGPU::NoRegister; 2827 } 2828 2829 // NB: This code is correct only when used to check constant 2830 // bus limitations because GFX7 support no f16 inline constants. 2831 // Note that there are no cases when a GFX7 opcode violates 2832 // constant bus limitations due to the use of an f16 constant. 2833 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2834 unsigned OpIdx) const { 2835 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2836 2837 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2838 return false; 2839 } 2840 2841 const MCOperand &MO = Inst.getOperand(OpIdx); 2842 2843 int64_t Val = MO.getImm(); 2844 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2845 2846 switch (OpSize) { // expected operand size 2847 case 8: 2848 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2849 case 4: 2850 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2851 case 2: { 2852 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2853 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2854 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2855 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2856 return AMDGPU::isInlinableIntLiteral(Val); 2857 2858 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2859 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2860 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2861 return AMDGPU::isInlinableIntLiteralV216(Val); 2862 2863 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2864 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2865 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2866 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2867 2868 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2869 } 2870 default: 2871 llvm_unreachable("invalid operand size"); 2872 } 2873 } 2874 2875 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2876 if (!isGFX10()) 2877 return 1; 2878 2879 switch (Opcode) { 2880 // 64-bit shift instructions can use only one scalar value input 2881 case AMDGPU::V_LSHLREV_B64: 2882 case AMDGPU::V_LSHLREV_B64_gfx10: 2883 case AMDGPU::V_LSHL_B64: 2884 case AMDGPU::V_LSHRREV_B64: 2885 case AMDGPU::V_LSHRREV_B64_gfx10: 2886 case AMDGPU::V_LSHR_B64: 2887 case AMDGPU::V_ASHRREV_I64: 2888 case AMDGPU::V_ASHRREV_I64_gfx10: 2889 case AMDGPU::V_ASHR_I64: 2890 return 1; 2891 default: 2892 return 2; 2893 } 2894 } 2895 2896 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2897 const MCOperand &MO = Inst.getOperand(OpIdx); 2898 if (MO.isImm()) { 2899 return !isInlineConstant(Inst, OpIdx); 2900 } else if (MO.isReg()) { 2901 auto Reg = MO.getReg(); 2902 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2903 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2904 } else { 2905 return true; 2906 } 2907 } 2908 2909 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2910 const unsigned Opcode = Inst.getOpcode(); 2911 const MCInstrDesc &Desc = MII.get(Opcode); 2912 unsigned ConstantBusUseCount = 0; 2913 unsigned NumLiterals = 0; 2914 unsigned LiteralSize; 2915 2916 if (Desc.TSFlags & 2917 (SIInstrFlags::VOPC | 2918 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2919 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2920 SIInstrFlags::SDWA)) { 2921 // Check special imm operands (used by madmk, etc) 2922 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2923 ++ConstantBusUseCount; 2924 } 2925 2926 SmallDenseSet<unsigned> SGPRsUsed; 2927 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2928 if (SGPRUsed != AMDGPU::NoRegister) { 2929 SGPRsUsed.insert(SGPRUsed); 2930 ++ConstantBusUseCount; 2931 } 2932 2933 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2934 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2935 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2936 2937 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2938 2939 for (int OpIdx : OpIndices) { 2940 if (OpIdx == -1) break; 2941 2942 const MCOperand &MO = Inst.getOperand(OpIdx); 2943 if (usesConstantBus(Inst, OpIdx)) { 2944 if (MO.isReg()) { 2945 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2946 // Pairs of registers with a partial intersections like these 2947 // s0, s[0:1] 2948 // flat_scratch_lo, flat_scratch 2949 // flat_scratch_lo, flat_scratch_hi 2950 // are theoretically valid but they are disabled anyway. 2951 // Note that this code mimics SIInstrInfo::verifyInstruction 2952 if (!SGPRsUsed.count(Reg)) { 2953 SGPRsUsed.insert(Reg); 2954 ++ConstantBusUseCount; 2955 } 2956 } else { // Expression or a literal 2957 2958 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2959 continue; // special operand like VINTERP attr_chan 2960 2961 // An instruction may use only one literal. 2962 // This has been validated on the previous step. 2963 // See validateVOP3Literal. 2964 // This literal may be used as more than one operand. 2965 // If all these operands are of the same size, 2966 // this literal counts as one scalar value. 2967 // Otherwise it counts as 2 scalar values. 2968 // See "GFX10 Shader Programming", section 3.6.2.3. 2969 2970 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2971 if (Size < 4) Size = 4; 2972 2973 if (NumLiterals == 0) { 2974 NumLiterals = 1; 2975 LiteralSize = Size; 2976 } else if (LiteralSize != Size) { 2977 NumLiterals = 2; 2978 } 2979 } 2980 } 2981 } 2982 } 2983 ConstantBusUseCount += NumLiterals; 2984 2985 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2986 } 2987 2988 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2989 const unsigned Opcode = Inst.getOpcode(); 2990 const MCInstrDesc &Desc = MII.get(Opcode); 2991 2992 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2993 if (DstIdx == -1 || 2994 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2995 return true; 2996 } 2997 2998 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2999 3000 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3001 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3002 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3003 3004 assert(DstIdx != -1); 3005 const MCOperand &Dst = Inst.getOperand(DstIdx); 3006 assert(Dst.isReg()); 3007 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3008 3009 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3010 3011 for (int SrcIdx : SrcIndices) { 3012 if (SrcIdx == -1) break; 3013 const MCOperand &Src = Inst.getOperand(SrcIdx); 3014 if (Src.isReg()) { 3015 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3016 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3017 return false; 3018 } 3019 } 3020 } 3021 3022 return true; 3023 } 3024 3025 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3026 3027 const unsigned Opc = Inst.getOpcode(); 3028 const MCInstrDesc &Desc = MII.get(Opc); 3029 3030 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3031 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3032 assert(ClampIdx != -1); 3033 return Inst.getOperand(ClampIdx).getImm() == 0; 3034 } 3035 3036 return true; 3037 } 3038 3039 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3040 3041 const unsigned Opc = Inst.getOpcode(); 3042 const MCInstrDesc &Desc = MII.get(Opc); 3043 3044 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3045 return true; 3046 3047 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3048 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3049 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3050 3051 assert(VDataIdx != -1); 3052 assert(DMaskIdx != -1); 3053 assert(TFEIdx != -1); 3054 3055 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3056 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3057 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3058 if (DMask == 0) 3059 DMask = 1; 3060 3061 unsigned DataSize = 3062 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3063 if (hasPackedD16()) { 3064 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3065 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3066 DataSize = (DataSize + 1) / 2; 3067 } 3068 3069 return (VDataSize / 4) == DataSize + TFESize; 3070 } 3071 3072 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3073 const unsigned Opc = Inst.getOpcode(); 3074 const MCInstrDesc &Desc = MII.get(Opc); 3075 3076 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3077 return true; 3078 3079 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3080 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3081 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3082 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3083 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3084 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3085 3086 assert(VAddr0Idx != -1); 3087 assert(SrsrcIdx != -1); 3088 assert(DimIdx != -1); 3089 assert(SrsrcIdx > VAddr0Idx); 3090 3091 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3092 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3093 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3094 unsigned VAddrSize = 3095 IsNSA ? SrsrcIdx - VAddr0Idx 3096 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3097 3098 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3099 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3100 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3101 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3102 if (!IsNSA) { 3103 if (AddrSize > 8) 3104 AddrSize = 16; 3105 else if (AddrSize > 4) 3106 AddrSize = 8; 3107 } 3108 3109 return VAddrSize == AddrSize; 3110 } 3111 3112 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3113 3114 const unsigned Opc = Inst.getOpcode(); 3115 const MCInstrDesc &Desc = MII.get(Opc); 3116 3117 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3118 return true; 3119 if (!Desc.mayLoad() || !Desc.mayStore()) 3120 return true; // Not atomic 3121 3122 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3123 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3124 3125 // This is an incomplete check because image_atomic_cmpswap 3126 // may only use 0x3 and 0xf while other atomic operations 3127 // may use 0x1 and 0x3. However these limitations are 3128 // verified when we check that dmask matches dst size. 3129 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3130 } 3131 3132 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3133 3134 const unsigned Opc = Inst.getOpcode(); 3135 const MCInstrDesc &Desc = MII.get(Opc); 3136 3137 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3138 return true; 3139 3140 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3141 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3142 3143 // GATHER4 instructions use dmask in a different fashion compared to 3144 // other MIMG instructions. The only useful DMASK values are 3145 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3146 // (red,red,red,red) etc.) The ISA document doesn't mention 3147 // this. 3148 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3149 } 3150 3151 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3152 { 3153 switch (Opcode) { 3154 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3155 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3156 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3157 return true; 3158 default: 3159 return false; 3160 } 3161 } 3162 3163 // movrels* opcodes should only allow VGPRS as src0. 3164 // This is specified in .td description for vop1/vop3, 3165 // but sdwa is handled differently. See isSDWAOperand. 3166 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3167 3168 const unsigned Opc = Inst.getOpcode(); 3169 const MCInstrDesc &Desc = MII.get(Opc); 3170 3171 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3172 return true; 3173 3174 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3175 assert(Src0Idx != -1); 3176 3177 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3178 if (!Src0.isReg()) 3179 return false; 3180 3181 auto Reg = Src0.getReg(); 3182 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3183 return !isSGPR(mc2PseudoReg(Reg), TRI); 3184 } 3185 3186 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3187 3188 const unsigned Opc = Inst.getOpcode(); 3189 3190 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3191 return true; 3192 3193 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3194 assert(Src0Idx != -1); 3195 3196 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3197 if (!Src0.isReg()) 3198 return true; 3199 3200 auto Reg = Src0.getReg(); 3201 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3202 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3203 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3204 return false; 3205 } 3206 3207 return true; 3208 } 3209 3210 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3211 3212 const unsigned Opc = Inst.getOpcode(); 3213 const MCInstrDesc &Desc = MII.get(Opc); 3214 3215 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3216 return true; 3217 3218 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3219 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3220 if (isCI() || isSI()) 3221 return false; 3222 } 3223 3224 return true; 3225 } 3226 3227 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3228 const unsigned Opc = Inst.getOpcode(); 3229 const MCInstrDesc &Desc = MII.get(Opc); 3230 3231 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3232 return true; 3233 3234 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3235 if (DimIdx < 0) 3236 return true; 3237 3238 long Imm = Inst.getOperand(DimIdx).getImm(); 3239 if (Imm < 0 || Imm >= 8) 3240 return false; 3241 3242 return true; 3243 } 3244 3245 static bool IsRevOpcode(const unsigned Opcode) 3246 { 3247 switch (Opcode) { 3248 case AMDGPU::V_SUBREV_F32_e32: 3249 case AMDGPU::V_SUBREV_F32_e64: 3250 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3251 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3252 case AMDGPU::V_SUBREV_F32_e32_vi: 3253 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3254 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3255 case AMDGPU::V_SUBREV_F32_e64_vi: 3256 3257 case AMDGPU::V_SUBREV_CO_U32_e32: 3258 case AMDGPU::V_SUBREV_CO_U32_e64: 3259 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3260 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3261 3262 case AMDGPU::V_SUBBREV_U32_e32: 3263 case AMDGPU::V_SUBBREV_U32_e64: 3264 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3265 case AMDGPU::V_SUBBREV_U32_e32_vi: 3266 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3267 case AMDGPU::V_SUBBREV_U32_e64_vi: 3268 3269 case AMDGPU::V_SUBREV_U32_e32: 3270 case AMDGPU::V_SUBREV_U32_e64: 3271 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3272 case AMDGPU::V_SUBREV_U32_e32_vi: 3273 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3274 case AMDGPU::V_SUBREV_U32_e64_vi: 3275 3276 case AMDGPU::V_SUBREV_F16_e32: 3277 case AMDGPU::V_SUBREV_F16_e64: 3278 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3279 case AMDGPU::V_SUBREV_F16_e32_vi: 3280 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3281 case AMDGPU::V_SUBREV_F16_e64_vi: 3282 3283 case AMDGPU::V_SUBREV_U16_e32: 3284 case AMDGPU::V_SUBREV_U16_e64: 3285 case AMDGPU::V_SUBREV_U16_e32_vi: 3286 case AMDGPU::V_SUBREV_U16_e64_vi: 3287 3288 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3289 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3290 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3291 3292 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3293 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3294 3295 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3296 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3297 3298 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3299 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3300 3301 case AMDGPU::V_LSHRREV_B32_e32: 3302 case AMDGPU::V_LSHRREV_B32_e64: 3303 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3304 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3305 case AMDGPU::V_LSHRREV_B32_e32_vi: 3306 case AMDGPU::V_LSHRREV_B32_e64_vi: 3307 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3308 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3309 3310 case AMDGPU::V_ASHRREV_I32_e32: 3311 case AMDGPU::V_ASHRREV_I32_e64: 3312 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3313 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3314 case AMDGPU::V_ASHRREV_I32_e32_vi: 3315 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3316 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3317 case AMDGPU::V_ASHRREV_I32_e64_vi: 3318 3319 case AMDGPU::V_LSHLREV_B32_e32: 3320 case AMDGPU::V_LSHLREV_B32_e64: 3321 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3322 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3323 case AMDGPU::V_LSHLREV_B32_e32_vi: 3324 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3325 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3326 case AMDGPU::V_LSHLREV_B32_e64_vi: 3327 3328 case AMDGPU::V_LSHLREV_B16_e32: 3329 case AMDGPU::V_LSHLREV_B16_e64: 3330 case AMDGPU::V_LSHLREV_B16_e32_vi: 3331 case AMDGPU::V_LSHLREV_B16_e64_vi: 3332 case AMDGPU::V_LSHLREV_B16_gfx10: 3333 3334 case AMDGPU::V_LSHRREV_B16_e32: 3335 case AMDGPU::V_LSHRREV_B16_e64: 3336 case AMDGPU::V_LSHRREV_B16_e32_vi: 3337 case AMDGPU::V_LSHRREV_B16_e64_vi: 3338 case AMDGPU::V_LSHRREV_B16_gfx10: 3339 3340 case AMDGPU::V_ASHRREV_I16_e32: 3341 case AMDGPU::V_ASHRREV_I16_e64: 3342 case AMDGPU::V_ASHRREV_I16_e32_vi: 3343 case AMDGPU::V_ASHRREV_I16_e64_vi: 3344 case AMDGPU::V_ASHRREV_I16_gfx10: 3345 3346 case AMDGPU::V_LSHLREV_B64: 3347 case AMDGPU::V_LSHLREV_B64_gfx10: 3348 case AMDGPU::V_LSHLREV_B64_vi: 3349 3350 case AMDGPU::V_LSHRREV_B64: 3351 case AMDGPU::V_LSHRREV_B64_gfx10: 3352 case AMDGPU::V_LSHRREV_B64_vi: 3353 3354 case AMDGPU::V_ASHRREV_I64: 3355 case AMDGPU::V_ASHRREV_I64_gfx10: 3356 case AMDGPU::V_ASHRREV_I64_vi: 3357 3358 case AMDGPU::V_PK_LSHLREV_B16: 3359 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3360 case AMDGPU::V_PK_LSHLREV_B16_vi: 3361 3362 case AMDGPU::V_PK_LSHRREV_B16: 3363 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3364 case AMDGPU::V_PK_LSHRREV_B16_vi: 3365 case AMDGPU::V_PK_ASHRREV_I16: 3366 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3367 case AMDGPU::V_PK_ASHRREV_I16_vi: 3368 return true; 3369 default: 3370 return false; 3371 } 3372 } 3373 3374 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3375 3376 using namespace SIInstrFlags; 3377 const unsigned Opcode = Inst.getOpcode(); 3378 const MCInstrDesc &Desc = MII.get(Opcode); 3379 3380 // lds_direct register is defined so that it can be used 3381 // with 9-bit operands only. Ignore encodings which do not accept these. 3382 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3383 return true; 3384 3385 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3386 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3387 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3388 3389 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3390 3391 // lds_direct cannot be specified as either src1 or src2. 3392 for (int SrcIdx : SrcIndices) { 3393 if (SrcIdx == -1) break; 3394 const MCOperand &Src = Inst.getOperand(SrcIdx); 3395 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3396 return false; 3397 } 3398 } 3399 3400 if (Src0Idx == -1) 3401 return true; 3402 3403 const MCOperand &Src = Inst.getOperand(Src0Idx); 3404 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3405 return true; 3406 3407 // lds_direct is specified as src0. Check additional limitations. 3408 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3409 } 3410 3411 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3412 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3413 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3414 if (Op.isFlatOffset()) 3415 return Op.getStartLoc(); 3416 } 3417 return getLoc(); 3418 } 3419 3420 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3421 const OperandVector &Operands) { 3422 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3423 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3424 return true; 3425 3426 auto Opcode = Inst.getOpcode(); 3427 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3428 assert(OpNum != -1); 3429 3430 const auto &Op = Inst.getOperand(OpNum); 3431 if (!hasFlatOffsets() && Op.getImm() != 0) { 3432 Error(getFlatOffsetLoc(Operands), 3433 "flat offset modifier is not supported on this GPU"); 3434 return false; 3435 } 3436 3437 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3438 // For FLAT segment the offset must be positive; 3439 // MSB is ignored and forced to zero. 3440 unsigned OffsetSize = isGFX9() ? 13 : 12; 3441 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3442 if (!isIntN(OffsetSize, Op.getImm())) { 3443 Error(getFlatOffsetLoc(Operands), 3444 isGFX9() ? "expected a 13-bit signed offset" : 3445 "expected a 12-bit signed offset"); 3446 return false; 3447 } 3448 } else { 3449 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3450 Error(getFlatOffsetLoc(Operands), 3451 isGFX9() ? "expected a 12-bit unsigned offset" : 3452 "expected an 11-bit unsigned offset"); 3453 return false; 3454 } 3455 } 3456 3457 return true; 3458 } 3459 3460 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3461 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3462 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3463 if (Op.isSMEMOffset()) 3464 return Op.getStartLoc(); 3465 } 3466 return getLoc(); 3467 } 3468 3469 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3470 const OperandVector &Operands) { 3471 if (isCI() || isSI()) 3472 return true; 3473 3474 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3475 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3476 return true; 3477 3478 auto Opcode = Inst.getOpcode(); 3479 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3480 if (OpNum == -1) 3481 return true; 3482 3483 const auto &Op = Inst.getOperand(OpNum); 3484 if (!Op.isImm()) 3485 return true; 3486 3487 uint64_t Offset = Op.getImm(); 3488 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3489 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3490 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3491 return true; 3492 3493 Error(getSMEMOffsetLoc(Operands), 3494 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3495 "expected a 21-bit signed offset"); 3496 3497 return false; 3498 } 3499 3500 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3501 unsigned Opcode = Inst.getOpcode(); 3502 const MCInstrDesc &Desc = MII.get(Opcode); 3503 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3504 return true; 3505 3506 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3507 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3508 3509 const int OpIndices[] = { Src0Idx, Src1Idx }; 3510 3511 unsigned NumExprs = 0; 3512 unsigned NumLiterals = 0; 3513 uint32_t LiteralValue; 3514 3515 for (int OpIdx : OpIndices) { 3516 if (OpIdx == -1) break; 3517 3518 const MCOperand &MO = Inst.getOperand(OpIdx); 3519 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3520 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3521 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3522 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3523 if (NumLiterals == 0 || LiteralValue != Value) { 3524 LiteralValue = Value; 3525 ++NumLiterals; 3526 } 3527 } else if (MO.isExpr()) { 3528 ++NumExprs; 3529 } 3530 } 3531 } 3532 3533 return NumLiterals + NumExprs <= 1; 3534 } 3535 3536 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3537 const unsigned Opc = Inst.getOpcode(); 3538 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3539 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3540 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3541 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3542 3543 if (OpSel & ~3) 3544 return false; 3545 } 3546 return true; 3547 } 3548 3549 // Check if VCC register matches wavefront size 3550 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3551 auto FB = getFeatureBits(); 3552 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3553 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3554 } 3555 3556 // VOP3 literal is only allowed in GFX10+ and only one can be used 3557 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3558 unsigned Opcode = Inst.getOpcode(); 3559 const MCInstrDesc &Desc = MII.get(Opcode); 3560 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3561 return true; 3562 3563 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3564 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3565 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3566 3567 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3568 3569 unsigned NumExprs = 0; 3570 unsigned NumLiterals = 0; 3571 uint32_t LiteralValue; 3572 3573 for (int OpIdx : OpIndices) { 3574 if (OpIdx == -1) break; 3575 3576 const MCOperand &MO = Inst.getOperand(OpIdx); 3577 if (!MO.isImm() && !MO.isExpr()) 3578 continue; 3579 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3580 continue; 3581 3582 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3583 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3584 return false; 3585 3586 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3587 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3588 if (NumLiterals == 0 || LiteralValue != Value) { 3589 LiteralValue = Value; 3590 ++NumLiterals; 3591 } 3592 } else if (MO.isExpr()) { 3593 ++NumExprs; 3594 } 3595 } 3596 NumLiterals += NumExprs; 3597 3598 return !NumLiterals || 3599 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3600 } 3601 3602 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3603 const SMLoc &IDLoc, 3604 const OperandVector &Operands) { 3605 if (!validateLdsDirect(Inst)) { 3606 Error(IDLoc, 3607 "invalid use of lds_direct"); 3608 return false; 3609 } 3610 if (!validateSOPLiteral(Inst)) { 3611 Error(IDLoc, 3612 "only one literal operand is allowed"); 3613 return false; 3614 } 3615 if (!validateVOP3Literal(Inst)) { 3616 Error(IDLoc, 3617 "invalid literal operand"); 3618 return false; 3619 } 3620 if (!validateConstantBusLimitations(Inst)) { 3621 Error(IDLoc, 3622 "invalid operand (violates constant bus restrictions)"); 3623 return false; 3624 } 3625 if (!validateEarlyClobberLimitations(Inst)) { 3626 Error(IDLoc, 3627 "destination must be different than all sources"); 3628 return false; 3629 } 3630 if (!validateIntClampSupported(Inst)) { 3631 Error(IDLoc, 3632 "integer clamping is not supported on this GPU"); 3633 return false; 3634 } 3635 if (!validateOpSel(Inst)) { 3636 Error(IDLoc, 3637 "invalid op_sel operand"); 3638 return false; 3639 } 3640 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3641 if (!validateMIMGD16(Inst)) { 3642 Error(IDLoc, 3643 "d16 modifier is not supported on this GPU"); 3644 return false; 3645 } 3646 if (!validateMIMGDim(Inst)) { 3647 Error(IDLoc, "dim modifier is required on this GPU"); 3648 return false; 3649 } 3650 if (!validateMIMGDataSize(Inst)) { 3651 Error(IDLoc, 3652 "image data size does not match dmask and tfe"); 3653 return false; 3654 } 3655 if (!validateMIMGAddrSize(Inst)) { 3656 Error(IDLoc, 3657 "image address size does not match dim and a16"); 3658 return false; 3659 } 3660 if (!validateMIMGAtomicDMask(Inst)) { 3661 Error(IDLoc, 3662 "invalid atomic image dmask"); 3663 return false; 3664 } 3665 if (!validateMIMGGatherDMask(Inst)) { 3666 Error(IDLoc, 3667 "invalid image_gather dmask: only one bit must be set"); 3668 return false; 3669 } 3670 if (!validateMovrels(Inst)) { 3671 Error(IDLoc, "source operand must be a VGPR"); 3672 return false; 3673 } 3674 if (!validateFlatOffset(Inst, Operands)) { 3675 return false; 3676 } 3677 if (!validateSMEMOffset(Inst, Operands)) { 3678 return false; 3679 } 3680 if (!validateMAIAccWrite(Inst)) { 3681 return false; 3682 } 3683 3684 return true; 3685 } 3686 3687 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3688 const FeatureBitset &FBS, 3689 unsigned VariantID = 0); 3690 3691 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3692 OperandVector &Operands, 3693 MCStreamer &Out, 3694 uint64_t &ErrorInfo, 3695 bool MatchingInlineAsm) { 3696 MCInst Inst; 3697 unsigned Result = Match_Success; 3698 for (auto Variant : getMatchedVariants()) { 3699 uint64_t EI; 3700 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3701 Variant); 3702 // We order match statuses from least to most specific. We use most specific 3703 // status as resulting 3704 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3705 if ((R == Match_Success) || 3706 (R == Match_PreferE32) || 3707 (R == Match_MissingFeature && Result != Match_PreferE32) || 3708 (R == Match_InvalidOperand && Result != Match_MissingFeature 3709 && Result != Match_PreferE32) || 3710 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3711 && Result != Match_MissingFeature 3712 && Result != Match_PreferE32)) { 3713 Result = R; 3714 ErrorInfo = EI; 3715 } 3716 if (R == Match_Success) 3717 break; 3718 } 3719 3720 switch (Result) { 3721 default: break; 3722 case Match_Success: 3723 if (!validateInstruction(Inst, IDLoc, Operands)) { 3724 return true; 3725 } 3726 Inst.setLoc(IDLoc); 3727 Out.emitInstruction(Inst, getSTI()); 3728 return false; 3729 3730 case Match_MissingFeature: 3731 return Error(IDLoc, "instruction not supported on this GPU"); 3732 3733 case Match_MnemonicFail: { 3734 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3735 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3736 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3737 return Error(IDLoc, "invalid instruction" + Suggestion, 3738 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3739 } 3740 3741 case Match_InvalidOperand: { 3742 SMLoc ErrorLoc = IDLoc; 3743 if (ErrorInfo != ~0ULL) { 3744 if (ErrorInfo >= Operands.size()) { 3745 return Error(IDLoc, "too few operands for instruction"); 3746 } 3747 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3748 if (ErrorLoc == SMLoc()) 3749 ErrorLoc = IDLoc; 3750 } 3751 return Error(ErrorLoc, "invalid operand for instruction"); 3752 } 3753 3754 case Match_PreferE32: 3755 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3756 "should be encoded as e32"); 3757 } 3758 llvm_unreachable("Implement any new match types added!"); 3759 } 3760 3761 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3762 int64_t Tmp = -1; 3763 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3764 return true; 3765 } 3766 if (getParser().parseAbsoluteExpression(Tmp)) { 3767 return true; 3768 } 3769 Ret = static_cast<uint32_t>(Tmp); 3770 return false; 3771 } 3772 3773 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3774 uint32_t &Minor) { 3775 if (ParseAsAbsoluteExpression(Major)) 3776 return TokError("invalid major version"); 3777 3778 if (getLexer().isNot(AsmToken::Comma)) 3779 return TokError("minor version number required, comma expected"); 3780 Lex(); 3781 3782 if (ParseAsAbsoluteExpression(Minor)) 3783 return TokError("invalid minor version"); 3784 3785 return false; 3786 } 3787 3788 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3789 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3790 return TokError("directive only supported for amdgcn architecture"); 3791 3792 std::string Target; 3793 3794 SMLoc TargetStart = getTok().getLoc(); 3795 if (getParser().parseEscapedString(Target)) 3796 return true; 3797 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3798 3799 std::string ExpectedTarget; 3800 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3801 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3802 3803 if (Target != ExpectedTargetOS.str()) 3804 return getParser().Error(TargetRange.Start, "target must match options", 3805 TargetRange); 3806 3807 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3808 return false; 3809 } 3810 3811 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3812 return getParser().Error(Range.Start, "value out of range", Range); 3813 } 3814 3815 bool AMDGPUAsmParser::calculateGPRBlocks( 3816 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3817 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3818 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3819 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3820 // TODO(scott.linder): These calculations are duplicated from 3821 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3822 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3823 3824 unsigned NumVGPRs = NextFreeVGPR; 3825 unsigned NumSGPRs = NextFreeSGPR; 3826 3827 if (Version.Major >= 10) 3828 NumSGPRs = 0; 3829 else { 3830 unsigned MaxAddressableNumSGPRs = 3831 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3832 3833 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3834 NumSGPRs > MaxAddressableNumSGPRs) 3835 return OutOfRangeError(SGPRRange); 3836 3837 NumSGPRs += 3838 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3839 3840 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3841 NumSGPRs > MaxAddressableNumSGPRs) 3842 return OutOfRangeError(SGPRRange); 3843 3844 if (Features.test(FeatureSGPRInitBug)) 3845 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3846 } 3847 3848 VGPRBlocks = 3849 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3850 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3851 3852 return false; 3853 } 3854 3855 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3856 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3857 return TokError("directive only supported for amdgcn architecture"); 3858 3859 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3860 return TokError("directive only supported for amdhsa OS"); 3861 3862 StringRef KernelName; 3863 if (getParser().parseIdentifier(KernelName)) 3864 return true; 3865 3866 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3867 3868 StringSet<> Seen; 3869 3870 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3871 3872 SMRange VGPRRange; 3873 uint64_t NextFreeVGPR = 0; 3874 SMRange SGPRRange; 3875 uint64_t NextFreeSGPR = 0; 3876 unsigned UserSGPRCount = 0; 3877 bool ReserveVCC = true; 3878 bool ReserveFlatScr = true; 3879 bool ReserveXNACK = hasXNACK(); 3880 Optional<bool> EnableWavefrontSize32; 3881 3882 while (true) { 3883 while (getLexer().is(AsmToken::EndOfStatement)) 3884 Lex(); 3885 3886 if (getLexer().isNot(AsmToken::Identifier)) 3887 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3888 3889 StringRef ID = getTok().getIdentifier(); 3890 SMRange IDRange = getTok().getLocRange(); 3891 Lex(); 3892 3893 if (ID == ".end_amdhsa_kernel") 3894 break; 3895 3896 if (Seen.find(ID) != Seen.end()) 3897 return TokError(".amdhsa_ directives cannot be repeated"); 3898 Seen.insert(ID); 3899 3900 SMLoc ValStart = getTok().getLoc(); 3901 int64_t IVal; 3902 if (getParser().parseAbsoluteExpression(IVal)) 3903 return true; 3904 SMLoc ValEnd = getTok().getLoc(); 3905 SMRange ValRange = SMRange(ValStart, ValEnd); 3906 3907 if (IVal < 0) 3908 return OutOfRangeError(ValRange); 3909 3910 uint64_t Val = IVal; 3911 3912 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3913 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3914 return OutOfRangeError(RANGE); \ 3915 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3916 3917 if (ID == ".amdhsa_group_segment_fixed_size") { 3918 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3919 return OutOfRangeError(ValRange); 3920 KD.group_segment_fixed_size = Val; 3921 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3922 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3923 return OutOfRangeError(ValRange); 3924 KD.private_segment_fixed_size = Val; 3925 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3926 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3927 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3928 Val, ValRange); 3929 if (Val) 3930 UserSGPRCount += 4; 3931 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3932 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3933 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3934 ValRange); 3935 if (Val) 3936 UserSGPRCount += 2; 3937 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3938 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3939 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3940 ValRange); 3941 if (Val) 3942 UserSGPRCount += 2; 3943 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3944 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3945 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3946 Val, ValRange); 3947 if (Val) 3948 UserSGPRCount += 2; 3949 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3950 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3951 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3952 ValRange); 3953 if (Val) 3954 UserSGPRCount += 2; 3955 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3956 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3957 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3958 ValRange); 3959 if (Val) 3960 UserSGPRCount += 2; 3961 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3962 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3963 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3964 Val, ValRange); 3965 if (Val) 3966 UserSGPRCount += 1; 3967 } else if (ID == ".amdhsa_wavefront_size32") { 3968 if (IVersion.Major < 10) 3969 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3970 IDRange); 3971 EnableWavefrontSize32 = Val; 3972 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3973 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3974 Val, ValRange); 3975 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3976 PARSE_BITS_ENTRY( 3977 KD.compute_pgm_rsrc2, 3978 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3979 ValRange); 3980 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3981 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3982 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3983 ValRange); 3984 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3985 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3986 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3987 ValRange); 3988 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3989 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3990 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3991 ValRange); 3992 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3993 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3994 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3995 ValRange); 3996 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3998 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3999 ValRange); 4000 } else if (ID == ".amdhsa_next_free_vgpr") { 4001 VGPRRange = ValRange; 4002 NextFreeVGPR = Val; 4003 } else if (ID == ".amdhsa_next_free_sgpr") { 4004 SGPRRange = ValRange; 4005 NextFreeSGPR = Val; 4006 } else if (ID == ".amdhsa_reserve_vcc") { 4007 if (!isUInt<1>(Val)) 4008 return OutOfRangeError(ValRange); 4009 ReserveVCC = Val; 4010 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4011 if (IVersion.Major < 7) 4012 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4013 IDRange); 4014 if (!isUInt<1>(Val)) 4015 return OutOfRangeError(ValRange); 4016 ReserveFlatScr = Val; 4017 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4018 if (IVersion.Major < 8) 4019 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4020 IDRange); 4021 if (!isUInt<1>(Val)) 4022 return OutOfRangeError(ValRange); 4023 ReserveXNACK = Val; 4024 } else if (ID == ".amdhsa_float_round_mode_32") { 4025 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4026 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4027 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4029 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4030 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4032 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4033 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4035 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4036 ValRange); 4037 } else if (ID == ".amdhsa_dx10_clamp") { 4038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4039 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4040 } else if (ID == ".amdhsa_ieee_mode") { 4041 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4042 Val, ValRange); 4043 } else if (ID == ".amdhsa_fp16_overflow") { 4044 if (IVersion.Major < 9) 4045 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4046 IDRange); 4047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4048 ValRange); 4049 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4050 if (IVersion.Major < 10) 4051 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4052 IDRange); 4053 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4054 ValRange); 4055 } else if (ID == ".amdhsa_memory_ordered") { 4056 if (IVersion.Major < 10) 4057 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4058 IDRange); 4059 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4060 ValRange); 4061 } else if (ID == ".amdhsa_forward_progress") { 4062 if (IVersion.Major < 10) 4063 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4064 IDRange); 4065 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4066 ValRange); 4067 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4068 PARSE_BITS_ENTRY( 4069 KD.compute_pgm_rsrc2, 4070 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4071 ValRange); 4072 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4074 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4075 Val, ValRange); 4076 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4077 PARSE_BITS_ENTRY( 4078 KD.compute_pgm_rsrc2, 4079 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4080 ValRange); 4081 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4083 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4084 Val, ValRange); 4085 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4086 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4087 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4088 Val, ValRange); 4089 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4091 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4092 Val, ValRange); 4093 } else if (ID == ".amdhsa_exception_int_div_zero") { 4094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4095 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4096 Val, ValRange); 4097 } else { 4098 return getParser().Error(IDRange.Start, 4099 "unknown .amdhsa_kernel directive", IDRange); 4100 } 4101 4102 #undef PARSE_BITS_ENTRY 4103 } 4104 4105 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4106 return TokError(".amdhsa_next_free_vgpr directive is required"); 4107 4108 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4109 return TokError(".amdhsa_next_free_sgpr directive is required"); 4110 4111 unsigned VGPRBlocks; 4112 unsigned SGPRBlocks; 4113 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4114 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4115 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4116 SGPRBlocks)) 4117 return true; 4118 4119 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4120 VGPRBlocks)) 4121 return OutOfRangeError(VGPRRange); 4122 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4123 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4124 4125 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4126 SGPRBlocks)) 4127 return OutOfRangeError(SGPRRange); 4128 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4129 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4130 SGPRBlocks); 4131 4132 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4133 return TokError("too many user SGPRs enabled"); 4134 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4135 UserSGPRCount); 4136 4137 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4138 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4139 ReserveFlatScr, ReserveXNACK); 4140 return false; 4141 } 4142 4143 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4144 uint32_t Major; 4145 uint32_t Minor; 4146 4147 if (ParseDirectiveMajorMinor(Major, Minor)) 4148 return true; 4149 4150 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4151 return false; 4152 } 4153 4154 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4155 uint32_t Major; 4156 uint32_t Minor; 4157 uint32_t Stepping; 4158 StringRef VendorName; 4159 StringRef ArchName; 4160 4161 // If this directive has no arguments, then use the ISA version for the 4162 // targeted GPU. 4163 if (getLexer().is(AsmToken::EndOfStatement)) { 4164 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4165 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4166 ISA.Stepping, 4167 "AMD", "AMDGPU"); 4168 return false; 4169 } 4170 4171 if (ParseDirectiveMajorMinor(Major, Minor)) 4172 return true; 4173 4174 if (getLexer().isNot(AsmToken::Comma)) 4175 return TokError("stepping version number required, comma expected"); 4176 Lex(); 4177 4178 if (ParseAsAbsoluteExpression(Stepping)) 4179 return TokError("invalid stepping version"); 4180 4181 if (getLexer().isNot(AsmToken::Comma)) 4182 return TokError("vendor name required, comma expected"); 4183 Lex(); 4184 4185 if (getLexer().isNot(AsmToken::String)) 4186 return TokError("invalid vendor name"); 4187 4188 VendorName = getLexer().getTok().getStringContents(); 4189 Lex(); 4190 4191 if (getLexer().isNot(AsmToken::Comma)) 4192 return TokError("arch name required, comma expected"); 4193 Lex(); 4194 4195 if (getLexer().isNot(AsmToken::String)) 4196 return TokError("invalid arch name"); 4197 4198 ArchName = getLexer().getTok().getStringContents(); 4199 Lex(); 4200 4201 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4202 VendorName, ArchName); 4203 return false; 4204 } 4205 4206 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4207 amd_kernel_code_t &Header) { 4208 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4209 // assembly for backwards compatibility. 4210 if (ID == "max_scratch_backing_memory_byte_size") { 4211 Parser.eatToEndOfStatement(); 4212 return false; 4213 } 4214 4215 SmallString<40> ErrStr; 4216 raw_svector_ostream Err(ErrStr); 4217 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4218 return TokError(Err.str()); 4219 } 4220 Lex(); 4221 4222 if (ID == "enable_wavefront_size32") { 4223 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4224 if (!isGFX10()) 4225 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4226 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4227 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4228 } else { 4229 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4230 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4231 } 4232 } 4233 4234 if (ID == "wavefront_size") { 4235 if (Header.wavefront_size == 5) { 4236 if (!isGFX10()) 4237 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4238 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4239 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4240 } else if (Header.wavefront_size == 6) { 4241 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4242 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4243 } 4244 } 4245 4246 if (ID == "enable_wgp_mode") { 4247 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4248 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4249 } 4250 4251 if (ID == "enable_mem_ordered") { 4252 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4253 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4254 } 4255 4256 if (ID == "enable_fwd_progress") { 4257 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4258 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4259 } 4260 4261 return false; 4262 } 4263 4264 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4265 amd_kernel_code_t Header; 4266 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4267 4268 while (true) { 4269 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4270 // will set the current token to EndOfStatement. 4271 while(getLexer().is(AsmToken::EndOfStatement)) 4272 Lex(); 4273 4274 if (getLexer().isNot(AsmToken::Identifier)) 4275 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4276 4277 StringRef ID = getLexer().getTok().getIdentifier(); 4278 Lex(); 4279 4280 if (ID == ".end_amd_kernel_code_t") 4281 break; 4282 4283 if (ParseAMDKernelCodeTValue(ID, Header)) 4284 return true; 4285 } 4286 4287 getTargetStreamer().EmitAMDKernelCodeT(Header); 4288 4289 return false; 4290 } 4291 4292 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4293 if (getLexer().isNot(AsmToken::Identifier)) 4294 return TokError("expected symbol name"); 4295 4296 StringRef KernelName = Parser.getTok().getString(); 4297 4298 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4299 ELF::STT_AMDGPU_HSA_KERNEL); 4300 Lex(); 4301 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4302 KernelScope.initialize(getContext()); 4303 return false; 4304 } 4305 4306 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4307 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4308 return Error(getParser().getTok().getLoc(), 4309 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4310 "architectures"); 4311 } 4312 4313 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4314 4315 std::string ISAVersionStringFromSTI; 4316 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4317 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4318 4319 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4320 return Error(getParser().getTok().getLoc(), 4321 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4322 "arguments specified through the command line"); 4323 } 4324 4325 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4326 Lex(); 4327 4328 return false; 4329 } 4330 4331 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4332 const char *AssemblerDirectiveBegin; 4333 const char *AssemblerDirectiveEnd; 4334 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4335 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4336 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4337 HSAMD::V3::AssemblerDirectiveEnd) 4338 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4339 HSAMD::AssemblerDirectiveEnd); 4340 4341 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4342 return Error(getParser().getTok().getLoc(), 4343 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4344 "not available on non-amdhsa OSes")).str()); 4345 } 4346 4347 std::string HSAMetadataString; 4348 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4349 HSAMetadataString)) 4350 return true; 4351 4352 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4353 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4354 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4355 } else { 4356 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4357 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4358 } 4359 4360 return false; 4361 } 4362 4363 /// Common code to parse out a block of text (typically YAML) between start and 4364 /// end directives. 4365 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4366 const char *AssemblerDirectiveEnd, 4367 std::string &CollectString) { 4368 4369 raw_string_ostream CollectStream(CollectString); 4370 4371 getLexer().setSkipSpace(false); 4372 4373 bool FoundEnd = false; 4374 while (!getLexer().is(AsmToken::Eof)) { 4375 while (getLexer().is(AsmToken::Space)) { 4376 CollectStream << getLexer().getTok().getString(); 4377 Lex(); 4378 } 4379 4380 if (getLexer().is(AsmToken::Identifier)) { 4381 StringRef ID = getLexer().getTok().getIdentifier(); 4382 if (ID == AssemblerDirectiveEnd) { 4383 Lex(); 4384 FoundEnd = true; 4385 break; 4386 } 4387 } 4388 4389 CollectStream << Parser.parseStringToEndOfStatement() 4390 << getContext().getAsmInfo()->getSeparatorString(); 4391 4392 Parser.eatToEndOfStatement(); 4393 } 4394 4395 getLexer().setSkipSpace(true); 4396 4397 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4398 return TokError(Twine("expected directive ") + 4399 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4400 } 4401 4402 CollectStream.flush(); 4403 return false; 4404 } 4405 4406 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4407 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4408 std::string String; 4409 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4410 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4411 return true; 4412 4413 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4414 if (!PALMetadata->setFromString(String)) 4415 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4416 return false; 4417 } 4418 4419 /// Parse the assembler directive for old linear-format PAL metadata. 4420 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4421 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4422 return Error(getParser().getTok().getLoc(), 4423 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4424 "not available on non-amdpal OSes")).str()); 4425 } 4426 4427 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4428 PALMetadata->setLegacy(); 4429 for (;;) { 4430 uint32_t Key, Value; 4431 if (ParseAsAbsoluteExpression(Key)) { 4432 return TokError(Twine("invalid value in ") + 4433 Twine(PALMD::AssemblerDirective)); 4434 } 4435 if (getLexer().isNot(AsmToken::Comma)) { 4436 return TokError(Twine("expected an even number of values in ") + 4437 Twine(PALMD::AssemblerDirective)); 4438 } 4439 Lex(); 4440 if (ParseAsAbsoluteExpression(Value)) { 4441 return TokError(Twine("invalid value in ") + 4442 Twine(PALMD::AssemblerDirective)); 4443 } 4444 PALMetadata->setRegister(Key, Value); 4445 if (getLexer().isNot(AsmToken::Comma)) 4446 break; 4447 Lex(); 4448 } 4449 return false; 4450 } 4451 4452 /// ParseDirectiveAMDGPULDS 4453 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4454 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4455 if (getParser().checkForValidSection()) 4456 return true; 4457 4458 StringRef Name; 4459 SMLoc NameLoc = getLexer().getLoc(); 4460 if (getParser().parseIdentifier(Name)) 4461 return TokError("expected identifier in directive"); 4462 4463 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4464 if (parseToken(AsmToken::Comma, "expected ','")) 4465 return true; 4466 4467 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4468 4469 int64_t Size; 4470 SMLoc SizeLoc = getLexer().getLoc(); 4471 if (getParser().parseAbsoluteExpression(Size)) 4472 return true; 4473 if (Size < 0) 4474 return Error(SizeLoc, "size must be non-negative"); 4475 if (Size > LocalMemorySize) 4476 return Error(SizeLoc, "size is too large"); 4477 4478 int64_t Alignment = 4; 4479 if (getLexer().is(AsmToken::Comma)) { 4480 Lex(); 4481 SMLoc AlignLoc = getLexer().getLoc(); 4482 if (getParser().parseAbsoluteExpression(Alignment)) 4483 return true; 4484 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4485 return Error(AlignLoc, "alignment must be a power of two"); 4486 4487 // Alignment larger than the size of LDS is possible in theory, as long 4488 // as the linker manages to place to symbol at address 0, but we do want 4489 // to make sure the alignment fits nicely into a 32-bit integer. 4490 if (Alignment >= 1u << 31) 4491 return Error(AlignLoc, "alignment is too large"); 4492 } 4493 4494 if (parseToken(AsmToken::EndOfStatement, 4495 "unexpected token in '.amdgpu_lds' directive")) 4496 return true; 4497 4498 Symbol->redefineIfPossible(); 4499 if (!Symbol->isUndefined()) 4500 return Error(NameLoc, "invalid symbol redefinition"); 4501 4502 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4503 return false; 4504 } 4505 4506 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4507 StringRef IDVal = DirectiveID.getString(); 4508 4509 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4510 if (IDVal == ".amdgcn_target") 4511 return ParseDirectiveAMDGCNTarget(); 4512 4513 if (IDVal == ".amdhsa_kernel") 4514 return ParseDirectiveAMDHSAKernel(); 4515 4516 // TODO: Restructure/combine with PAL metadata directive. 4517 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4518 return ParseDirectiveHSAMetadata(); 4519 } else { 4520 if (IDVal == ".hsa_code_object_version") 4521 return ParseDirectiveHSACodeObjectVersion(); 4522 4523 if (IDVal == ".hsa_code_object_isa") 4524 return ParseDirectiveHSACodeObjectISA(); 4525 4526 if (IDVal == ".amd_kernel_code_t") 4527 return ParseDirectiveAMDKernelCodeT(); 4528 4529 if (IDVal == ".amdgpu_hsa_kernel") 4530 return ParseDirectiveAMDGPUHsaKernel(); 4531 4532 if (IDVal == ".amd_amdgpu_isa") 4533 return ParseDirectiveISAVersion(); 4534 4535 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4536 return ParseDirectiveHSAMetadata(); 4537 } 4538 4539 if (IDVal == ".amdgpu_lds") 4540 return ParseDirectiveAMDGPULDS(); 4541 4542 if (IDVal == PALMD::AssemblerDirectiveBegin) 4543 return ParseDirectivePALMetadataBegin(); 4544 4545 if (IDVal == PALMD::AssemblerDirective) 4546 return ParseDirectivePALMetadata(); 4547 4548 return true; 4549 } 4550 4551 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4552 unsigned RegNo) const { 4553 4554 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4555 R.isValid(); ++R) { 4556 if (*R == RegNo) 4557 return isGFX9() || isGFX10(); 4558 } 4559 4560 // GFX10 has 2 more SGPRs 104 and 105. 4561 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4562 R.isValid(); ++R) { 4563 if (*R == RegNo) 4564 return hasSGPR104_SGPR105(); 4565 } 4566 4567 switch (RegNo) { 4568 case AMDGPU::SRC_SHARED_BASE: 4569 case AMDGPU::SRC_SHARED_LIMIT: 4570 case AMDGPU::SRC_PRIVATE_BASE: 4571 case AMDGPU::SRC_PRIVATE_LIMIT: 4572 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4573 return !isCI() && !isSI() && !isVI(); 4574 case AMDGPU::TBA: 4575 case AMDGPU::TBA_LO: 4576 case AMDGPU::TBA_HI: 4577 case AMDGPU::TMA: 4578 case AMDGPU::TMA_LO: 4579 case AMDGPU::TMA_HI: 4580 return !isGFX9() && !isGFX10(); 4581 case AMDGPU::XNACK_MASK: 4582 case AMDGPU::XNACK_MASK_LO: 4583 case AMDGPU::XNACK_MASK_HI: 4584 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4585 case AMDGPU::SGPR_NULL: 4586 return isGFX10(); 4587 default: 4588 break; 4589 } 4590 4591 if (isCI()) 4592 return true; 4593 4594 if (isSI() || isGFX10()) { 4595 // No flat_scr on SI. 4596 // On GFX10 flat scratch is not a valid register operand and can only be 4597 // accessed with s_setreg/s_getreg. 4598 switch (RegNo) { 4599 case AMDGPU::FLAT_SCR: 4600 case AMDGPU::FLAT_SCR_LO: 4601 case AMDGPU::FLAT_SCR_HI: 4602 return false; 4603 default: 4604 return true; 4605 } 4606 } 4607 4608 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4609 // SI/CI have. 4610 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4611 R.isValid(); ++R) { 4612 if (*R == RegNo) 4613 return hasSGPR102_SGPR103(); 4614 } 4615 4616 return true; 4617 } 4618 4619 OperandMatchResultTy 4620 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4621 OperandMode Mode) { 4622 // Try to parse with a custom parser 4623 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4624 4625 // If we successfully parsed the operand or if there as an error parsing, 4626 // we are done. 4627 // 4628 // If we are parsing after we reach EndOfStatement then this means we 4629 // are appending default values to the Operands list. This is only done 4630 // by custom parser, so we shouldn't continue on to the generic parsing. 4631 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4632 getLexer().is(AsmToken::EndOfStatement)) 4633 return ResTy; 4634 4635 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4636 unsigned Prefix = Operands.size(); 4637 SMLoc LBraceLoc = getTok().getLoc(); 4638 Parser.Lex(); // eat the '[' 4639 4640 for (;;) { 4641 ResTy = parseReg(Operands); 4642 if (ResTy != MatchOperand_Success) 4643 return ResTy; 4644 4645 if (getLexer().is(AsmToken::RBrac)) 4646 break; 4647 4648 if (getLexer().isNot(AsmToken::Comma)) 4649 return MatchOperand_ParseFail; 4650 Parser.Lex(); 4651 } 4652 4653 if (Operands.size() - Prefix > 1) { 4654 Operands.insert(Operands.begin() + Prefix, 4655 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4656 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4657 getTok().getLoc())); 4658 } 4659 4660 Parser.Lex(); // eat the ']' 4661 return MatchOperand_Success; 4662 } 4663 4664 return parseRegOrImm(Operands); 4665 } 4666 4667 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4668 // Clear any forced encodings from the previous instruction. 4669 setForcedEncodingSize(0); 4670 setForcedDPP(false); 4671 setForcedSDWA(false); 4672 4673 if (Name.endswith("_e64")) { 4674 setForcedEncodingSize(64); 4675 return Name.substr(0, Name.size() - 4); 4676 } else if (Name.endswith("_e32")) { 4677 setForcedEncodingSize(32); 4678 return Name.substr(0, Name.size() - 4); 4679 } else if (Name.endswith("_dpp")) { 4680 setForcedDPP(true); 4681 return Name.substr(0, Name.size() - 4); 4682 } else if (Name.endswith("_sdwa")) { 4683 setForcedSDWA(true); 4684 return Name.substr(0, Name.size() - 5); 4685 } 4686 return Name; 4687 } 4688 4689 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4690 StringRef Name, 4691 SMLoc NameLoc, OperandVector &Operands) { 4692 // Add the instruction mnemonic 4693 Name = parseMnemonicSuffix(Name); 4694 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4695 4696 bool IsMIMG = Name.startswith("image_"); 4697 4698 while (!getLexer().is(AsmToken::EndOfStatement)) { 4699 OperandMode Mode = OperandMode_Default; 4700 if (IsMIMG && isGFX10() && Operands.size() == 2) 4701 Mode = OperandMode_NSA; 4702 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4703 4704 // Eat the comma or space if there is one. 4705 if (getLexer().is(AsmToken::Comma)) 4706 Parser.Lex(); 4707 4708 switch (Res) { 4709 case MatchOperand_Success: break; 4710 case MatchOperand_ParseFail: 4711 // FIXME: use real operand location rather than the current location. 4712 Error(getLexer().getLoc(), "failed parsing operand."); 4713 while (!getLexer().is(AsmToken::EndOfStatement)) { 4714 Parser.Lex(); 4715 } 4716 return true; 4717 case MatchOperand_NoMatch: 4718 // FIXME: use real operand location rather than the current location. 4719 Error(getLexer().getLoc(), "not a valid operand."); 4720 while (!getLexer().is(AsmToken::EndOfStatement)) { 4721 Parser.Lex(); 4722 } 4723 return true; 4724 } 4725 } 4726 4727 return false; 4728 } 4729 4730 //===----------------------------------------------------------------------===// 4731 // Utility functions 4732 //===----------------------------------------------------------------------===// 4733 4734 OperandMatchResultTy 4735 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4736 4737 if (!trySkipId(Prefix, AsmToken::Colon)) 4738 return MatchOperand_NoMatch; 4739 4740 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4741 } 4742 4743 OperandMatchResultTy 4744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4745 AMDGPUOperand::ImmTy ImmTy, 4746 bool (*ConvertResult)(int64_t&)) { 4747 SMLoc S = getLoc(); 4748 int64_t Value = 0; 4749 4750 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4751 if (Res != MatchOperand_Success) 4752 return Res; 4753 4754 if (ConvertResult && !ConvertResult(Value)) { 4755 Error(S, "invalid " + StringRef(Prefix) + " value."); 4756 } 4757 4758 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4759 return MatchOperand_Success; 4760 } 4761 4762 OperandMatchResultTy 4763 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4764 OperandVector &Operands, 4765 AMDGPUOperand::ImmTy ImmTy, 4766 bool (*ConvertResult)(int64_t&)) { 4767 SMLoc S = getLoc(); 4768 if (!trySkipId(Prefix, AsmToken::Colon)) 4769 return MatchOperand_NoMatch; 4770 4771 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4772 return MatchOperand_ParseFail; 4773 4774 unsigned Val = 0; 4775 const unsigned MaxSize = 4; 4776 4777 // FIXME: How to verify the number of elements matches the number of src 4778 // operands? 4779 for (int I = 0; ; ++I) { 4780 int64_t Op; 4781 SMLoc Loc = getLoc(); 4782 if (!parseExpr(Op)) 4783 return MatchOperand_ParseFail; 4784 4785 if (Op != 0 && Op != 1) { 4786 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4787 return MatchOperand_ParseFail; 4788 } 4789 4790 Val |= (Op << I); 4791 4792 if (trySkipToken(AsmToken::RBrac)) 4793 break; 4794 4795 if (I + 1 == MaxSize) { 4796 Error(getLoc(), "expected a closing square bracket"); 4797 return MatchOperand_ParseFail; 4798 } 4799 4800 if (!skipToken(AsmToken::Comma, "expected a comma")) 4801 return MatchOperand_ParseFail; 4802 } 4803 4804 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4805 return MatchOperand_Success; 4806 } 4807 4808 OperandMatchResultTy 4809 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4810 AMDGPUOperand::ImmTy ImmTy) { 4811 int64_t Bit = 0; 4812 SMLoc S = Parser.getTok().getLoc(); 4813 4814 // We are at the end of the statement, and this is a default argument, so 4815 // use a default value. 4816 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4817 switch(getLexer().getKind()) { 4818 case AsmToken::Identifier: { 4819 StringRef Tok = Parser.getTok().getString(); 4820 if (Tok == Name) { 4821 if (Tok == "r128" && !hasMIMG_R128()) 4822 Error(S, "r128 modifier is not supported on this GPU"); 4823 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4824 Error(S, "a16 modifier is not supported on this GPU"); 4825 Bit = 1; 4826 Parser.Lex(); 4827 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4828 Bit = 0; 4829 Parser.Lex(); 4830 } else { 4831 return MatchOperand_NoMatch; 4832 } 4833 break; 4834 } 4835 default: 4836 return MatchOperand_NoMatch; 4837 } 4838 } 4839 4840 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4841 return MatchOperand_ParseFail; 4842 4843 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4844 ImmTy = AMDGPUOperand::ImmTyR128A16; 4845 4846 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4847 return MatchOperand_Success; 4848 } 4849 4850 static void addOptionalImmOperand( 4851 MCInst& Inst, const OperandVector& Operands, 4852 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4853 AMDGPUOperand::ImmTy ImmT, 4854 int64_t Default = 0) { 4855 auto i = OptionalIdx.find(ImmT); 4856 if (i != OptionalIdx.end()) { 4857 unsigned Idx = i->second; 4858 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4859 } else { 4860 Inst.addOperand(MCOperand::createImm(Default)); 4861 } 4862 } 4863 4864 OperandMatchResultTy 4865 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4866 if (getLexer().isNot(AsmToken::Identifier)) { 4867 return MatchOperand_NoMatch; 4868 } 4869 StringRef Tok = Parser.getTok().getString(); 4870 if (Tok != Prefix) { 4871 return MatchOperand_NoMatch; 4872 } 4873 4874 Parser.Lex(); 4875 if (getLexer().isNot(AsmToken::Colon)) { 4876 return MatchOperand_ParseFail; 4877 } 4878 4879 Parser.Lex(); 4880 if (getLexer().isNot(AsmToken::Identifier)) { 4881 return MatchOperand_ParseFail; 4882 } 4883 4884 Value = Parser.getTok().getString(); 4885 return MatchOperand_Success; 4886 } 4887 4888 //===----------------------------------------------------------------------===// 4889 // MTBUF format 4890 //===----------------------------------------------------------------------===// 4891 4892 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 4893 int64_t MaxVal, 4894 int64_t &Fmt) { 4895 int64_t Val; 4896 SMLoc Loc = getLoc(); 4897 4898 auto Res = parseIntWithPrefix(Pref, Val); 4899 if (Res == MatchOperand_ParseFail) 4900 return false; 4901 if (Res == MatchOperand_NoMatch) 4902 return true; 4903 4904 if (Val < 0 || Val > MaxVal) { 4905 Error(Loc, Twine("out of range ", StringRef(Pref))); 4906 return false; 4907 } 4908 4909 Fmt = Val; 4910 return true; 4911 } 4912 4913 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4914 // values to live in a joint format operand in the MCInst encoding. 4915 OperandMatchResultTy 4916 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 4917 using namespace llvm::AMDGPU::MTBUFFormat; 4918 4919 int64_t Dfmt = DFMT_UNDEF; 4920 int64_t Nfmt = NFMT_UNDEF; 4921 4922 // dfmt and nfmt can appear in either order, and each is optional. 4923 for (int I = 0; I < 2; ++I) { 4924 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 4925 return MatchOperand_ParseFail; 4926 4927 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 4928 return MatchOperand_ParseFail; 4929 } 4930 // Skip optional comma between dfmt/nfmt 4931 // but guard against 2 commas following each other. 4932 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 4933 !peekToken().is(AsmToken::Comma)) { 4934 trySkipToken(AsmToken::Comma); 4935 } 4936 } 4937 4938 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 4939 return MatchOperand_NoMatch; 4940 4941 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 4942 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 4943 4944 Format = encodeDfmtNfmt(Dfmt, Nfmt); 4945 return MatchOperand_Success; 4946 } 4947 4948 OperandMatchResultTy 4949 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 4950 using namespace llvm::AMDGPU::MTBUFFormat; 4951 4952 int64_t Fmt = UFMT_UNDEF; 4953 4954 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 4955 return MatchOperand_ParseFail; 4956 4957 if (Fmt == UFMT_UNDEF) 4958 return MatchOperand_NoMatch; 4959 4960 Format = Fmt; 4961 return MatchOperand_Success; 4962 } 4963 4964 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 4965 int64_t &Nfmt, 4966 StringRef FormatStr, 4967 SMLoc Loc) { 4968 using namespace llvm::AMDGPU::MTBUFFormat; 4969 int64_t Format; 4970 4971 Format = getDfmt(FormatStr); 4972 if (Format != DFMT_UNDEF) { 4973 Dfmt = Format; 4974 return true; 4975 } 4976 4977 Format = getNfmt(FormatStr, getSTI()); 4978 if (Format != NFMT_UNDEF) { 4979 Nfmt = Format; 4980 return true; 4981 } 4982 4983 Error(Loc, "unsupported format"); 4984 return false; 4985 } 4986 4987 OperandMatchResultTy 4988 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 4989 SMLoc FormatLoc, 4990 int64_t &Format) { 4991 using namespace llvm::AMDGPU::MTBUFFormat; 4992 4993 int64_t Dfmt = DFMT_UNDEF; 4994 int64_t Nfmt = NFMT_UNDEF; 4995 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 4996 return MatchOperand_ParseFail; 4997 4998 if (trySkipToken(AsmToken::Comma)) { 4999 StringRef Str; 5000 SMLoc Loc = getLoc(); 5001 if (!parseId(Str, "expected a format string") || 5002 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5003 return MatchOperand_ParseFail; 5004 } 5005 if (Dfmt == DFMT_UNDEF) { 5006 Error(Loc, "duplicate numeric format"); 5007 } else if (Nfmt == NFMT_UNDEF){ 5008 Error(Loc, "duplicate data format"); 5009 } 5010 } 5011 5012 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5013 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5014 5015 if (isGFX10()) { 5016 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5017 if (Ufmt == UFMT_UNDEF) 5018 Error(FormatLoc, "unsupported format"); 5019 Format = Ufmt; 5020 } else { 5021 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5022 } 5023 5024 return MatchOperand_Success; 5025 } 5026 5027 OperandMatchResultTy 5028 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5029 SMLoc Loc, 5030 int64_t &Format) { 5031 using namespace llvm::AMDGPU::MTBUFFormat; 5032 5033 auto Id = getUnifiedFormat(FormatStr); 5034 if (Id == UFMT_UNDEF) 5035 return MatchOperand_NoMatch; 5036 5037 if (!isGFX10()) { 5038 Error(Loc, "unified format is not supported on this GPU"); 5039 return MatchOperand_ParseFail; 5040 } 5041 5042 Format = Id; 5043 return MatchOperand_Success; 5044 } 5045 5046 OperandMatchResultTy 5047 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5048 using namespace llvm::AMDGPU::MTBUFFormat; 5049 SMLoc Loc = getLoc(); 5050 5051 if (!parseExpr(Format)) 5052 return MatchOperand_ParseFail; 5053 if (!isValidFormatEncoding(Format, getSTI())) { 5054 Error(Loc, "out of range format"); 5055 return MatchOperand_ParseFail; 5056 } 5057 5058 return MatchOperand_Success; 5059 } 5060 5061 OperandMatchResultTy 5062 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5063 using namespace llvm::AMDGPU::MTBUFFormat; 5064 5065 if (!trySkipId("format", AsmToken::Colon)) 5066 return MatchOperand_NoMatch; 5067 5068 if (trySkipToken(AsmToken::LBrac)) { 5069 StringRef FormatStr; 5070 SMLoc Loc = getLoc(); 5071 if (!parseId(FormatStr, "expected a format string")) 5072 return MatchOperand_ParseFail; 5073 5074 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5075 if (Res == MatchOperand_NoMatch) 5076 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5077 if (Res != MatchOperand_Success) 5078 return Res; 5079 5080 skipToken(AsmToken::RBrac, "expected a closing square bracket"); 5081 return MatchOperand_Success; 5082 } 5083 5084 return parseNumericFormat(Format); 5085 } 5086 5087 OperandMatchResultTy 5088 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5089 using namespace llvm::AMDGPU::MTBUFFormat; 5090 5091 int64_t Format = getDefaultFormatEncoding(getSTI()); 5092 OperandMatchResultTy Res; 5093 SMLoc Loc = getLoc(); 5094 5095 // Parse legacy format syntax. 5096 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5097 if (Res == MatchOperand_ParseFail) 5098 return Res; 5099 5100 bool FormatFound = (Res == MatchOperand_Success); 5101 5102 Operands.push_back( 5103 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5104 5105 if (FormatFound) 5106 trySkipToken(AsmToken::Comma); 5107 5108 if (isToken(AsmToken::EndOfStatement)) { 5109 // We are expecting an soffset operand, 5110 // but let matcher handle the error. 5111 return MatchOperand_Success; 5112 } 5113 5114 // Parse soffset. 5115 Res = parseRegOrImm(Operands); 5116 if (Res != MatchOperand_Success) 5117 return Res; 5118 5119 trySkipToken(AsmToken::Comma); 5120 5121 if (!FormatFound) { 5122 if (parseSymbolicOrNumericFormat(Format) == MatchOperand_Success) { 5123 auto Size = Operands.size(); 5124 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5125 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5126 Op.setImm(Format); 5127 } 5128 return MatchOperand_Success; 5129 } 5130 5131 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5132 Error(getLoc(), "duplicate format"); 5133 return MatchOperand_ParseFail; 5134 } 5135 return MatchOperand_Success; 5136 } 5137 5138 //===----------------------------------------------------------------------===// 5139 // ds 5140 //===----------------------------------------------------------------------===// 5141 5142 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5143 const OperandVector &Operands) { 5144 OptionalImmIndexMap OptionalIdx; 5145 5146 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5147 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5148 5149 // Add the register arguments 5150 if (Op.isReg()) { 5151 Op.addRegOperands(Inst, 1); 5152 continue; 5153 } 5154 5155 // Handle optional arguments 5156 OptionalIdx[Op.getImmTy()] = i; 5157 } 5158 5159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5162 5163 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5164 } 5165 5166 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5167 bool IsGdsHardcoded) { 5168 OptionalImmIndexMap OptionalIdx; 5169 5170 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5171 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5172 5173 // Add the register arguments 5174 if (Op.isReg()) { 5175 Op.addRegOperands(Inst, 1); 5176 continue; 5177 } 5178 5179 if (Op.isToken() && Op.getToken() == "gds") { 5180 IsGdsHardcoded = true; 5181 continue; 5182 } 5183 5184 // Handle optional arguments 5185 OptionalIdx[Op.getImmTy()] = i; 5186 } 5187 5188 AMDGPUOperand::ImmTy OffsetType = 5189 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5190 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5191 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5192 AMDGPUOperand::ImmTyOffset; 5193 5194 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5195 5196 if (!IsGdsHardcoded) { 5197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5198 } 5199 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5200 } 5201 5202 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5203 OptionalImmIndexMap OptionalIdx; 5204 5205 unsigned OperandIdx[4]; 5206 unsigned EnMask = 0; 5207 int SrcIdx = 0; 5208 5209 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5210 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5211 5212 // Add the register arguments 5213 if (Op.isReg()) { 5214 assert(SrcIdx < 4); 5215 OperandIdx[SrcIdx] = Inst.size(); 5216 Op.addRegOperands(Inst, 1); 5217 ++SrcIdx; 5218 continue; 5219 } 5220 5221 if (Op.isOff()) { 5222 assert(SrcIdx < 4); 5223 OperandIdx[SrcIdx] = Inst.size(); 5224 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5225 ++SrcIdx; 5226 continue; 5227 } 5228 5229 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5230 Op.addImmOperands(Inst, 1); 5231 continue; 5232 } 5233 5234 if (Op.isToken() && Op.getToken() == "done") 5235 continue; 5236 5237 // Handle optional arguments 5238 OptionalIdx[Op.getImmTy()] = i; 5239 } 5240 5241 assert(SrcIdx == 4); 5242 5243 bool Compr = false; 5244 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5245 Compr = true; 5246 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5247 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5248 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5249 } 5250 5251 for (auto i = 0; i < SrcIdx; ++i) { 5252 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5253 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5254 } 5255 } 5256 5257 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5258 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5259 5260 Inst.addOperand(MCOperand::createImm(EnMask)); 5261 } 5262 5263 //===----------------------------------------------------------------------===// 5264 // s_waitcnt 5265 //===----------------------------------------------------------------------===// 5266 5267 static bool 5268 encodeCnt( 5269 const AMDGPU::IsaVersion ISA, 5270 int64_t &IntVal, 5271 int64_t CntVal, 5272 bool Saturate, 5273 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5274 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5275 { 5276 bool Failed = false; 5277 5278 IntVal = encode(ISA, IntVal, CntVal); 5279 if (CntVal != decode(ISA, IntVal)) { 5280 if (Saturate) { 5281 IntVal = encode(ISA, IntVal, -1); 5282 } else { 5283 Failed = true; 5284 } 5285 } 5286 return Failed; 5287 } 5288 5289 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5290 5291 SMLoc CntLoc = getLoc(); 5292 StringRef CntName = getTokenStr(); 5293 5294 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5295 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5296 return false; 5297 5298 int64_t CntVal; 5299 SMLoc ValLoc = getLoc(); 5300 if (!parseExpr(CntVal)) 5301 return false; 5302 5303 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5304 5305 bool Failed = true; 5306 bool Sat = CntName.endswith("_sat"); 5307 5308 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5309 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5310 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5311 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5312 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5313 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5314 } else { 5315 Error(CntLoc, "invalid counter name " + CntName); 5316 return false; 5317 } 5318 5319 if (Failed) { 5320 Error(ValLoc, "too large value for " + CntName); 5321 return false; 5322 } 5323 5324 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5325 return false; 5326 5327 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5328 if (isToken(AsmToken::EndOfStatement)) { 5329 Error(getLoc(), "expected a counter name"); 5330 return false; 5331 } 5332 } 5333 5334 return true; 5335 } 5336 5337 OperandMatchResultTy 5338 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5339 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5340 int64_t Waitcnt = getWaitcntBitMask(ISA); 5341 SMLoc S = getLoc(); 5342 5343 // If parse failed, do not return error code 5344 // to avoid excessive error messages. 5345 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5346 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5347 } else { 5348 parseExpr(Waitcnt); 5349 } 5350 5351 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5352 return MatchOperand_Success; 5353 } 5354 5355 bool 5356 AMDGPUOperand::isSWaitCnt() const { 5357 return isImm(); 5358 } 5359 5360 //===----------------------------------------------------------------------===// 5361 // hwreg 5362 //===----------------------------------------------------------------------===// 5363 5364 bool 5365 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5366 int64_t &Offset, 5367 int64_t &Width) { 5368 using namespace llvm::AMDGPU::Hwreg; 5369 5370 // The register may be specified by name or using a numeric code 5371 if (isToken(AsmToken::Identifier) && 5372 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5373 HwReg.IsSymbolic = true; 5374 lex(); // skip message name 5375 } else if (!parseExpr(HwReg.Id)) { 5376 return false; 5377 } 5378 5379 if (trySkipToken(AsmToken::RParen)) 5380 return true; 5381 5382 // parse optional params 5383 return 5384 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5385 parseExpr(Offset) && 5386 skipToken(AsmToken::Comma, "expected a comma") && 5387 parseExpr(Width) && 5388 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5389 } 5390 5391 bool 5392 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5393 const int64_t Offset, 5394 const int64_t Width, 5395 const SMLoc Loc) { 5396 5397 using namespace llvm::AMDGPU::Hwreg; 5398 5399 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5400 Error(Loc, "specified hardware register is not supported on this GPU"); 5401 return false; 5402 } else if (!isValidHwreg(HwReg.Id)) { 5403 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5404 return false; 5405 } else if (!isValidHwregOffset(Offset)) { 5406 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5407 return false; 5408 } else if (!isValidHwregWidth(Width)) { 5409 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5410 return false; 5411 } 5412 return true; 5413 } 5414 5415 OperandMatchResultTy 5416 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5417 using namespace llvm::AMDGPU::Hwreg; 5418 5419 int64_t ImmVal = 0; 5420 SMLoc Loc = getLoc(); 5421 5422 // If parse failed, do not return error code 5423 // to avoid excessive error messages. 5424 if (trySkipId("hwreg", AsmToken::LParen)) { 5425 OperandInfoTy HwReg(ID_UNKNOWN_); 5426 int64_t Offset = OFFSET_DEFAULT_; 5427 int64_t Width = WIDTH_DEFAULT_; 5428 if (parseHwregBody(HwReg, Offset, Width) && 5429 validateHwreg(HwReg, Offset, Width, Loc)) { 5430 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5431 } 5432 } else if (parseExpr(ImmVal)) { 5433 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5434 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5435 } 5436 5437 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5438 return MatchOperand_Success; 5439 } 5440 5441 bool AMDGPUOperand::isHwreg() const { 5442 return isImmTy(ImmTyHwreg); 5443 } 5444 5445 //===----------------------------------------------------------------------===// 5446 // sendmsg 5447 //===----------------------------------------------------------------------===// 5448 5449 bool 5450 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5451 OperandInfoTy &Op, 5452 OperandInfoTy &Stream) { 5453 using namespace llvm::AMDGPU::SendMsg; 5454 5455 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5456 Msg.IsSymbolic = true; 5457 lex(); // skip message name 5458 } else if (!parseExpr(Msg.Id)) { 5459 return false; 5460 } 5461 5462 if (trySkipToken(AsmToken::Comma)) { 5463 Op.IsDefined = true; 5464 if (isToken(AsmToken::Identifier) && 5465 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5466 lex(); // skip operation name 5467 } else if (!parseExpr(Op.Id)) { 5468 return false; 5469 } 5470 5471 if (trySkipToken(AsmToken::Comma)) { 5472 Stream.IsDefined = true; 5473 if (!parseExpr(Stream.Id)) 5474 return false; 5475 } 5476 } 5477 5478 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5479 } 5480 5481 bool 5482 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5483 const OperandInfoTy &Op, 5484 const OperandInfoTy &Stream, 5485 const SMLoc S) { 5486 using namespace llvm::AMDGPU::SendMsg; 5487 5488 // Validation strictness depends on whether message is specified 5489 // in a symbolc or in a numeric form. In the latter case 5490 // only encoding possibility is checked. 5491 bool Strict = Msg.IsSymbolic; 5492 5493 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5494 Error(S, "invalid message id"); 5495 return false; 5496 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5497 Error(S, Op.IsDefined ? 5498 "message does not support operations" : 5499 "missing message operation"); 5500 return false; 5501 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5502 Error(S, "invalid operation id"); 5503 return false; 5504 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5505 Error(S, "message operation does not support streams"); 5506 return false; 5507 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5508 Error(S, "invalid message stream id"); 5509 return false; 5510 } 5511 return true; 5512 } 5513 5514 OperandMatchResultTy 5515 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5516 using namespace llvm::AMDGPU::SendMsg; 5517 5518 int64_t ImmVal = 0; 5519 SMLoc Loc = getLoc(); 5520 5521 // If parse failed, do not return error code 5522 // to avoid excessive error messages. 5523 if (trySkipId("sendmsg", AsmToken::LParen)) { 5524 OperandInfoTy Msg(ID_UNKNOWN_); 5525 OperandInfoTy Op(OP_NONE_); 5526 OperandInfoTy Stream(STREAM_ID_NONE_); 5527 if (parseSendMsgBody(Msg, Op, Stream) && 5528 validateSendMsg(Msg, Op, Stream, Loc)) { 5529 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5530 } 5531 } else if (parseExpr(ImmVal)) { 5532 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5533 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5534 } 5535 5536 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5537 return MatchOperand_Success; 5538 } 5539 5540 bool AMDGPUOperand::isSendMsg() const { 5541 return isImmTy(ImmTySendMsg); 5542 } 5543 5544 //===----------------------------------------------------------------------===// 5545 // v_interp 5546 //===----------------------------------------------------------------------===// 5547 5548 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5549 if (getLexer().getKind() != AsmToken::Identifier) 5550 return MatchOperand_NoMatch; 5551 5552 StringRef Str = Parser.getTok().getString(); 5553 int Slot = StringSwitch<int>(Str) 5554 .Case("p10", 0) 5555 .Case("p20", 1) 5556 .Case("p0", 2) 5557 .Default(-1); 5558 5559 SMLoc S = Parser.getTok().getLoc(); 5560 if (Slot == -1) 5561 return MatchOperand_ParseFail; 5562 5563 Parser.Lex(); 5564 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5565 AMDGPUOperand::ImmTyInterpSlot)); 5566 return MatchOperand_Success; 5567 } 5568 5569 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5570 if (getLexer().getKind() != AsmToken::Identifier) 5571 return MatchOperand_NoMatch; 5572 5573 StringRef Str = Parser.getTok().getString(); 5574 if (!Str.startswith("attr")) 5575 return MatchOperand_NoMatch; 5576 5577 StringRef Chan = Str.take_back(2); 5578 int AttrChan = StringSwitch<int>(Chan) 5579 .Case(".x", 0) 5580 .Case(".y", 1) 5581 .Case(".z", 2) 5582 .Case(".w", 3) 5583 .Default(-1); 5584 if (AttrChan == -1) 5585 return MatchOperand_ParseFail; 5586 5587 Str = Str.drop_back(2).drop_front(4); 5588 5589 uint8_t Attr; 5590 if (Str.getAsInteger(10, Attr)) 5591 return MatchOperand_ParseFail; 5592 5593 SMLoc S = Parser.getTok().getLoc(); 5594 Parser.Lex(); 5595 if (Attr > 63) { 5596 Error(S, "out of bounds attr"); 5597 return MatchOperand_Success; 5598 } 5599 5600 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5601 5602 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5603 AMDGPUOperand::ImmTyInterpAttr)); 5604 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5605 AMDGPUOperand::ImmTyAttrChan)); 5606 return MatchOperand_Success; 5607 } 5608 5609 //===----------------------------------------------------------------------===// 5610 // exp 5611 //===----------------------------------------------------------------------===// 5612 5613 void AMDGPUAsmParser::errorExpTgt() { 5614 Error(Parser.getTok().getLoc(), "invalid exp target"); 5615 } 5616 5617 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5618 uint8_t &Val) { 5619 if (Str == "null") { 5620 Val = 9; 5621 return MatchOperand_Success; 5622 } 5623 5624 if (Str.startswith("mrt")) { 5625 Str = Str.drop_front(3); 5626 if (Str == "z") { // == mrtz 5627 Val = 8; 5628 return MatchOperand_Success; 5629 } 5630 5631 if (Str.getAsInteger(10, Val)) 5632 return MatchOperand_ParseFail; 5633 5634 if (Val > 7) 5635 errorExpTgt(); 5636 5637 return MatchOperand_Success; 5638 } 5639 5640 if (Str.startswith("pos")) { 5641 Str = Str.drop_front(3); 5642 if (Str.getAsInteger(10, Val)) 5643 return MatchOperand_ParseFail; 5644 5645 if (Val > 4 || (Val == 4 && !isGFX10())) 5646 errorExpTgt(); 5647 5648 Val += 12; 5649 return MatchOperand_Success; 5650 } 5651 5652 if (isGFX10() && Str == "prim") { 5653 Val = 20; 5654 return MatchOperand_Success; 5655 } 5656 5657 if (Str.startswith("param")) { 5658 Str = Str.drop_front(5); 5659 if (Str.getAsInteger(10, Val)) 5660 return MatchOperand_ParseFail; 5661 5662 if (Val >= 32) 5663 errorExpTgt(); 5664 5665 Val += 32; 5666 return MatchOperand_Success; 5667 } 5668 5669 if (Str.startswith("invalid_target_")) { 5670 Str = Str.drop_front(15); 5671 if (Str.getAsInteger(10, Val)) 5672 return MatchOperand_ParseFail; 5673 5674 errorExpTgt(); 5675 return MatchOperand_Success; 5676 } 5677 5678 return MatchOperand_NoMatch; 5679 } 5680 5681 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5682 uint8_t Val; 5683 StringRef Str = Parser.getTok().getString(); 5684 5685 auto Res = parseExpTgtImpl(Str, Val); 5686 if (Res != MatchOperand_Success) 5687 return Res; 5688 5689 SMLoc S = Parser.getTok().getLoc(); 5690 Parser.Lex(); 5691 5692 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5693 AMDGPUOperand::ImmTyExpTgt)); 5694 return MatchOperand_Success; 5695 } 5696 5697 //===----------------------------------------------------------------------===// 5698 // parser helpers 5699 //===----------------------------------------------------------------------===// 5700 5701 bool 5702 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5703 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5704 } 5705 5706 bool 5707 AMDGPUAsmParser::isId(const StringRef Id) const { 5708 return isId(getToken(), Id); 5709 } 5710 5711 bool 5712 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5713 return getTokenKind() == Kind; 5714 } 5715 5716 bool 5717 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5718 if (isId(Id)) { 5719 lex(); 5720 return true; 5721 } 5722 return false; 5723 } 5724 5725 bool 5726 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5727 if (isId(Id) && peekToken().is(Kind)) { 5728 lex(); 5729 lex(); 5730 return true; 5731 } 5732 return false; 5733 } 5734 5735 bool 5736 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5737 if (isToken(Kind)) { 5738 lex(); 5739 return true; 5740 } 5741 return false; 5742 } 5743 5744 bool 5745 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5746 const StringRef ErrMsg) { 5747 if (!trySkipToken(Kind)) { 5748 Error(getLoc(), ErrMsg); 5749 return false; 5750 } 5751 return true; 5752 } 5753 5754 bool 5755 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5756 return !getParser().parseAbsoluteExpression(Imm); 5757 } 5758 5759 bool 5760 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5761 SMLoc S = getLoc(); 5762 5763 const MCExpr *Expr; 5764 if (Parser.parseExpression(Expr)) 5765 return false; 5766 5767 int64_t IntVal; 5768 if (Expr->evaluateAsAbsolute(IntVal)) { 5769 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5770 } else { 5771 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5772 } 5773 return true; 5774 } 5775 5776 bool 5777 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5778 if (isToken(AsmToken::String)) { 5779 Val = getToken().getStringContents(); 5780 lex(); 5781 return true; 5782 } else { 5783 Error(getLoc(), ErrMsg); 5784 return false; 5785 } 5786 } 5787 5788 bool 5789 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5790 if (isToken(AsmToken::Identifier)) { 5791 Val = getTokenStr(); 5792 lex(); 5793 return true; 5794 } else { 5795 Error(getLoc(), ErrMsg); 5796 return false; 5797 } 5798 } 5799 5800 AsmToken 5801 AMDGPUAsmParser::getToken() const { 5802 return Parser.getTok(); 5803 } 5804 5805 AsmToken 5806 AMDGPUAsmParser::peekToken() { 5807 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5808 } 5809 5810 void 5811 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5812 auto TokCount = getLexer().peekTokens(Tokens); 5813 5814 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5815 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5816 } 5817 5818 AsmToken::TokenKind 5819 AMDGPUAsmParser::getTokenKind() const { 5820 return getLexer().getKind(); 5821 } 5822 5823 SMLoc 5824 AMDGPUAsmParser::getLoc() const { 5825 return getToken().getLoc(); 5826 } 5827 5828 StringRef 5829 AMDGPUAsmParser::getTokenStr() const { 5830 return getToken().getString(); 5831 } 5832 5833 void 5834 AMDGPUAsmParser::lex() { 5835 Parser.Lex(); 5836 } 5837 5838 //===----------------------------------------------------------------------===// 5839 // swizzle 5840 //===----------------------------------------------------------------------===// 5841 5842 LLVM_READNONE 5843 static unsigned 5844 encodeBitmaskPerm(const unsigned AndMask, 5845 const unsigned OrMask, 5846 const unsigned XorMask) { 5847 using namespace llvm::AMDGPU::Swizzle; 5848 5849 return BITMASK_PERM_ENC | 5850 (AndMask << BITMASK_AND_SHIFT) | 5851 (OrMask << BITMASK_OR_SHIFT) | 5852 (XorMask << BITMASK_XOR_SHIFT); 5853 } 5854 5855 bool 5856 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5857 const unsigned MinVal, 5858 const unsigned MaxVal, 5859 const StringRef ErrMsg) { 5860 for (unsigned i = 0; i < OpNum; ++i) { 5861 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5862 return false; 5863 } 5864 SMLoc ExprLoc = Parser.getTok().getLoc(); 5865 if (!parseExpr(Op[i])) { 5866 return false; 5867 } 5868 if (Op[i] < MinVal || Op[i] > MaxVal) { 5869 Error(ExprLoc, ErrMsg); 5870 return false; 5871 } 5872 } 5873 5874 return true; 5875 } 5876 5877 bool 5878 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5879 using namespace llvm::AMDGPU::Swizzle; 5880 5881 int64_t Lane[LANE_NUM]; 5882 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5883 "expected a 2-bit lane id")) { 5884 Imm = QUAD_PERM_ENC; 5885 for (unsigned I = 0; I < LANE_NUM; ++I) { 5886 Imm |= Lane[I] << (LANE_SHIFT * I); 5887 } 5888 return true; 5889 } 5890 return false; 5891 } 5892 5893 bool 5894 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5895 using namespace llvm::AMDGPU::Swizzle; 5896 5897 SMLoc S = Parser.getTok().getLoc(); 5898 int64_t GroupSize; 5899 int64_t LaneIdx; 5900 5901 if (!parseSwizzleOperands(1, &GroupSize, 5902 2, 32, 5903 "group size must be in the interval [2,32]")) { 5904 return false; 5905 } 5906 if (!isPowerOf2_64(GroupSize)) { 5907 Error(S, "group size must be a power of two"); 5908 return false; 5909 } 5910 if (parseSwizzleOperands(1, &LaneIdx, 5911 0, GroupSize - 1, 5912 "lane id must be in the interval [0,group size - 1]")) { 5913 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5914 return true; 5915 } 5916 return false; 5917 } 5918 5919 bool 5920 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5921 using namespace llvm::AMDGPU::Swizzle; 5922 5923 SMLoc S = Parser.getTok().getLoc(); 5924 int64_t GroupSize; 5925 5926 if (!parseSwizzleOperands(1, &GroupSize, 5927 2, 32, "group size must be in the interval [2,32]")) { 5928 return false; 5929 } 5930 if (!isPowerOf2_64(GroupSize)) { 5931 Error(S, "group size must be a power of two"); 5932 return false; 5933 } 5934 5935 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5936 return true; 5937 } 5938 5939 bool 5940 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5941 using namespace llvm::AMDGPU::Swizzle; 5942 5943 SMLoc S = Parser.getTok().getLoc(); 5944 int64_t GroupSize; 5945 5946 if (!parseSwizzleOperands(1, &GroupSize, 5947 1, 16, "group size must be in the interval [1,16]")) { 5948 return false; 5949 } 5950 if (!isPowerOf2_64(GroupSize)) { 5951 Error(S, "group size must be a power of two"); 5952 return false; 5953 } 5954 5955 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5956 return true; 5957 } 5958 5959 bool 5960 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5961 using namespace llvm::AMDGPU::Swizzle; 5962 5963 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5964 return false; 5965 } 5966 5967 StringRef Ctl; 5968 SMLoc StrLoc = Parser.getTok().getLoc(); 5969 if (!parseString(Ctl)) { 5970 return false; 5971 } 5972 if (Ctl.size() != BITMASK_WIDTH) { 5973 Error(StrLoc, "expected a 5-character mask"); 5974 return false; 5975 } 5976 5977 unsigned AndMask = 0; 5978 unsigned OrMask = 0; 5979 unsigned XorMask = 0; 5980 5981 for (size_t i = 0; i < Ctl.size(); ++i) { 5982 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5983 switch(Ctl[i]) { 5984 default: 5985 Error(StrLoc, "invalid mask"); 5986 return false; 5987 case '0': 5988 break; 5989 case '1': 5990 OrMask |= Mask; 5991 break; 5992 case 'p': 5993 AndMask |= Mask; 5994 break; 5995 case 'i': 5996 AndMask |= Mask; 5997 XorMask |= Mask; 5998 break; 5999 } 6000 } 6001 6002 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6003 return true; 6004 } 6005 6006 bool 6007 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6008 6009 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6010 6011 if (!parseExpr(Imm)) { 6012 return false; 6013 } 6014 if (!isUInt<16>(Imm)) { 6015 Error(OffsetLoc, "expected a 16-bit offset"); 6016 return false; 6017 } 6018 return true; 6019 } 6020 6021 bool 6022 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6023 using namespace llvm::AMDGPU::Swizzle; 6024 6025 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6026 6027 SMLoc ModeLoc = Parser.getTok().getLoc(); 6028 bool Ok = false; 6029 6030 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6031 Ok = parseSwizzleQuadPerm(Imm); 6032 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6033 Ok = parseSwizzleBitmaskPerm(Imm); 6034 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6035 Ok = parseSwizzleBroadcast(Imm); 6036 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6037 Ok = parseSwizzleSwap(Imm); 6038 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6039 Ok = parseSwizzleReverse(Imm); 6040 } else { 6041 Error(ModeLoc, "expected a swizzle mode"); 6042 } 6043 6044 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6045 } 6046 6047 return false; 6048 } 6049 6050 OperandMatchResultTy 6051 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6052 SMLoc S = Parser.getTok().getLoc(); 6053 int64_t Imm = 0; 6054 6055 if (trySkipId("offset")) { 6056 6057 bool Ok = false; 6058 if (skipToken(AsmToken::Colon, "expected a colon")) { 6059 if (trySkipId("swizzle")) { 6060 Ok = parseSwizzleMacro(Imm); 6061 } else { 6062 Ok = parseSwizzleOffset(Imm); 6063 } 6064 } 6065 6066 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6067 6068 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6069 } else { 6070 // Swizzle "offset" operand is optional. 6071 // If it is omitted, try parsing other optional operands. 6072 return parseOptionalOpr(Operands); 6073 } 6074 } 6075 6076 bool 6077 AMDGPUOperand::isSwizzle() const { 6078 return isImmTy(ImmTySwizzle); 6079 } 6080 6081 //===----------------------------------------------------------------------===// 6082 // VGPR Index Mode 6083 //===----------------------------------------------------------------------===// 6084 6085 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6086 6087 using namespace llvm::AMDGPU::VGPRIndexMode; 6088 6089 if (trySkipToken(AsmToken::RParen)) { 6090 return OFF; 6091 } 6092 6093 int64_t Imm = 0; 6094 6095 while (true) { 6096 unsigned Mode = 0; 6097 SMLoc S = Parser.getTok().getLoc(); 6098 6099 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6100 if (trySkipId(IdSymbolic[ModeId])) { 6101 Mode = 1 << ModeId; 6102 break; 6103 } 6104 } 6105 6106 if (Mode == 0) { 6107 Error(S, (Imm == 0)? 6108 "expected a VGPR index mode or a closing parenthesis" : 6109 "expected a VGPR index mode"); 6110 break; 6111 } 6112 6113 if (Imm & Mode) { 6114 Error(S, "duplicate VGPR index mode"); 6115 break; 6116 } 6117 Imm |= Mode; 6118 6119 if (trySkipToken(AsmToken::RParen)) 6120 break; 6121 if (!skipToken(AsmToken::Comma, 6122 "expected a comma or a closing parenthesis")) 6123 break; 6124 } 6125 6126 return Imm; 6127 } 6128 6129 OperandMatchResultTy 6130 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6131 6132 int64_t Imm = 0; 6133 SMLoc S = Parser.getTok().getLoc(); 6134 6135 if (getLexer().getKind() == AsmToken::Identifier && 6136 Parser.getTok().getString() == "gpr_idx" && 6137 getLexer().peekTok().is(AsmToken::LParen)) { 6138 6139 Parser.Lex(); 6140 Parser.Lex(); 6141 6142 // If parse failed, trigger an error but do not return error code 6143 // to avoid excessive error messages. 6144 Imm = parseGPRIdxMacro(); 6145 6146 } else { 6147 if (getParser().parseAbsoluteExpression(Imm)) 6148 return MatchOperand_NoMatch; 6149 if (Imm < 0 || !isUInt<4>(Imm)) { 6150 Error(S, "invalid immediate: only 4-bit values are legal"); 6151 } 6152 } 6153 6154 Operands.push_back( 6155 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6156 return MatchOperand_Success; 6157 } 6158 6159 bool AMDGPUOperand::isGPRIdxMode() const { 6160 return isImmTy(ImmTyGprIdxMode); 6161 } 6162 6163 //===----------------------------------------------------------------------===// 6164 // sopp branch targets 6165 //===----------------------------------------------------------------------===// 6166 6167 OperandMatchResultTy 6168 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6169 6170 // Make sure we are not parsing something 6171 // that looks like a label or an expression but is not. 6172 // This will improve error messages. 6173 if (isRegister() || isModifier()) 6174 return MatchOperand_NoMatch; 6175 6176 if (parseExpr(Operands)) { 6177 6178 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6179 assert(Opr.isImm() || Opr.isExpr()); 6180 SMLoc Loc = Opr.getStartLoc(); 6181 6182 // Currently we do not support arbitrary expressions as branch targets. 6183 // Only labels and absolute expressions are accepted. 6184 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6185 Error(Loc, "expected an absolute expression or a label"); 6186 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6187 Error(Loc, "expected a 16-bit signed jump offset"); 6188 } 6189 } 6190 6191 return MatchOperand_Success; // avoid excessive error messages 6192 } 6193 6194 //===----------------------------------------------------------------------===// 6195 // Boolean holding registers 6196 //===----------------------------------------------------------------------===// 6197 6198 OperandMatchResultTy 6199 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6200 return parseReg(Operands); 6201 } 6202 6203 //===----------------------------------------------------------------------===// 6204 // mubuf 6205 //===----------------------------------------------------------------------===// 6206 6207 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6208 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6209 } 6210 6211 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6212 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6213 } 6214 6215 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6216 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6217 } 6218 6219 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6220 const OperandVector &Operands, 6221 bool IsAtomic, 6222 bool IsAtomicReturn, 6223 bool IsLds) { 6224 bool IsLdsOpcode = IsLds; 6225 bool HasLdsModifier = false; 6226 OptionalImmIndexMap OptionalIdx; 6227 assert(IsAtomicReturn ? IsAtomic : true); 6228 unsigned FirstOperandIdx = 1; 6229 6230 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6231 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6232 6233 // Add the register arguments 6234 if (Op.isReg()) { 6235 Op.addRegOperands(Inst, 1); 6236 // Insert a tied src for atomic return dst. 6237 // This cannot be postponed as subsequent calls to 6238 // addImmOperands rely on correct number of MC operands. 6239 if (IsAtomicReturn && i == FirstOperandIdx) 6240 Op.addRegOperands(Inst, 1); 6241 continue; 6242 } 6243 6244 // Handle the case where soffset is an immediate 6245 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6246 Op.addImmOperands(Inst, 1); 6247 continue; 6248 } 6249 6250 HasLdsModifier |= Op.isLDS(); 6251 6252 // Handle tokens like 'offen' which are sometimes hard-coded into the 6253 // asm string. There are no MCInst operands for these. 6254 if (Op.isToken()) { 6255 continue; 6256 } 6257 assert(Op.isImm()); 6258 6259 // Handle optional arguments 6260 OptionalIdx[Op.getImmTy()] = i; 6261 } 6262 6263 // This is a workaround for an llvm quirk which may result in an 6264 // incorrect instruction selection. Lds and non-lds versions of 6265 // MUBUF instructions are identical except that lds versions 6266 // have mandatory 'lds' modifier. However this modifier follows 6267 // optional modifiers and llvm asm matcher regards this 'lds' 6268 // modifier as an optional one. As a result, an lds version 6269 // of opcode may be selected even if it has no 'lds' modifier. 6270 if (IsLdsOpcode && !HasLdsModifier) { 6271 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6272 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6273 Inst.setOpcode(NoLdsOpcode); 6274 IsLdsOpcode = false; 6275 } 6276 } 6277 6278 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6279 if (!IsAtomic) { // glc is hard-coded. 6280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6281 } 6282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6283 6284 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6286 } 6287 6288 if (isGFX10()) 6289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6290 } 6291 6292 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6293 OptionalImmIndexMap OptionalIdx; 6294 6295 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6296 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6297 6298 // Add the register arguments 6299 if (Op.isReg()) { 6300 Op.addRegOperands(Inst, 1); 6301 continue; 6302 } 6303 6304 // Handle the case where soffset is an immediate 6305 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6306 Op.addImmOperands(Inst, 1); 6307 continue; 6308 } 6309 6310 // Handle tokens like 'offen' which are sometimes hard-coded into the 6311 // asm string. There are no MCInst operands for these. 6312 if (Op.isToken()) { 6313 continue; 6314 } 6315 assert(Op.isImm()); 6316 6317 // Handle optional arguments 6318 OptionalIdx[Op.getImmTy()] = i; 6319 } 6320 6321 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6322 AMDGPUOperand::ImmTyOffset); 6323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6327 6328 if (isGFX10()) 6329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6330 } 6331 6332 //===----------------------------------------------------------------------===// 6333 // mimg 6334 //===----------------------------------------------------------------------===// 6335 6336 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6337 bool IsAtomic) { 6338 unsigned I = 1; 6339 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6340 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6341 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6342 } 6343 6344 if (IsAtomic) { 6345 // Add src, same as dst 6346 assert(Desc.getNumDefs() == 1); 6347 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6348 } 6349 6350 OptionalImmIndexMap OptionalIdx; 6351 6352 for (unsigned E = Operands.size(); I != E; ++I) { 6353 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6354 6355 // Add the register arguments 6356 if (Op.isReg()) { 6357 Op.addRegOperands(Inst, 1); 6358 } else if (Op.isImmModifier()) { 6359 OptionalIdx[Op.getImmTy()] = I; 6360 } else if (!Op.isToken()) { 6361 llvm_unreachable("unexpected operand type"); 6362 } 6363 } 6364 6365 bool IsGFX10 = isGFX10(); 6366 6367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6368 if (IsGFX10) 6369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6370 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6371 if (IsGFX10) 6372 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6373 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6376 if (IsGFX10) 6377 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6380 if (!IsGFX10) 6381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6383 } 6384 6385 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6386 cvtMIMG(Inst, Operands, true); 6387 } 6388 6389 //===----------------------------------------------------------------------===// 6390 // smrd 6391 //===----------------------------------------------------------------------===// 6392 6393 bool AMDGPUOperand::isSMRDOffset8() const { 6394 return isImm() && isUInt<8>(getImm()); 6395 } 6396 6397 bool AMDGPUOperand::isSMEMOffset() const { 6398 return isImm(); // Offset range is checked later by validator. 6399 } 6400 6401 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6402 // 32-bit literals are only supported on CI and we only want to use them 6403 // when the offset is > 8-bits. 6404 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6405 } 6406 6407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6408 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6409 } 6410 6411 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6412 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6413 } 6414 6415 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6416 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6417 } 6418 6419 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6420 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6421 } 6422 6423 //===----------------------------------------------------------------------===// 6424 // vop3 6425 //===----------------------------------------------------------------------===// 6426 6427 static bool ConvertOmodMul(int64_t &Mul) { 6428 if (Mul != 1 && Mul != 2 && Mul != 4) 6429 return false; 6430 6431 Mul >>= 1; 6432 return true; 6433 } 6434 6435 static bool ConvertOmodDiv(int64_t &Div) { 6436 if (Div == 1) { 6437 Div = 0; 6438 return true; 6439 } 6440 6441 if (Div == 2) { 6442 Div = 3; 6443 return true; 6444 } 6445 6446 return false; 6447 } 6448 6449 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6450 if (BoundCtrl == 0) { 6451 BoundCtrl = 1; 6452 return true; 6453 } 6454 6455 if (BoundCtrl == -1) { 6456 BoundCtrl = 0; 6457 return true; 6458 } 6459 6460 return false; 6461 } 6462 6463 // Note: the order in this table matches the order of operands in AsmString. 6464 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6465 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6466 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6467 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6468 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6469 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6470 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6471 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6472 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6473 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6474 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6475 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6476 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6477 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6478 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6479 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6480 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6481 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6482 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6483 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6484 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6485 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6486 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6487 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6488 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6489 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6490 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6491 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6492 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6493 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6494 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6495 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6496 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6497 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6498 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6499 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6500 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6501 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6502 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6503 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6504 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6505 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6506 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6507 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6508 }; 6509 6510 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6511 6512 OperandMatchResultTy res = parseOptionalOpr(Operands); 6513 6514 // This is a hack to enable hardcoded mandatory operands which follow 6515 // optional operands. 6516 // 6517 // Current design assumes that all operands after the first optional operand 6518 // are also optional. However implementation of some instructions violates 6519 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6520 // 6521 // To alleviate this problem, we have to (implicitly) parse extra operands 6522 // to make sure autogenerated parser of custom operands never hit hardcoded 6523 // mandatory operands. 6524 6525 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6526 if (res != MatchOperand_Success || 6527 isToken(AsmToken::EndOfStatement)) 6528 break; 6529 6530 trySkipToken(AsmToken::Comma); 6531 res = parseOptionalOpr(Operands); 6532 } 6533 6534 return res; 6535 } 6536 6537 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6538 OperandMatchResultTy res; 6539 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6540 // try to parse any optional operand here 6541 if (Op.IsBit) { 6542 res = parseNamedBit(Op.Name, Operands, Op.Type); 6543 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6544 res = parseOModOperand(Operands); 6545 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6546 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6547 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6548 res = parseSDWASel(Operands, Op.Name, Op.Type); 6549 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6550 res = parseSDWADstUnused(Operands); 6551 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6552 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6553 Op.Type == AMDGPUOperand::ImmTyNegLo || 6554 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6555 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6556 Op.ConvertResult); 6557 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6558 res = parseDim(Operands); 6559 } else { 6560 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6561 } 6562 if (res != MatchOperand_NoMatch) { 6563 return res; 6564 } 6565 } 6566 return MatchOperand_NoMatch; 6567 } 6568 6569 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6570 StringRef Name = Parser.getTok().getString(); 6571 if (Name == "mul") { 6572 return parseIntWithPrefix("mul", Operands, 6573 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6574 } 6575 6576 if (Name == "div") { 6577 return parseIntWithPrefix("div", Operands, 6578 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6579 } 6580 6581 return MatchOperand_NoMatch; 6582 } 6583 6584 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6585 cvtVOP3P(Inst, Operands); 6586 6587 int Opc = Inst.getOpcode(); 6588 6589 int SrcNum; 6590 const int Ops[] = { AMDGPU::OpName::src0, 6591 AMDGPU::OpName::src1, 6592 AMDGPU::OpName::src2 }; 6593 for (SrcNum = 0; 6594 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6595 ++SrcNum); 6596 assert(SrcNum > 0); 6597 6598 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6599 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6600 6601 if ((OpSel & (1 << SrcNum)) != 0) { 6602 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6603 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6604 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6605 } 6606 } 6607 6608 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6609 // 1. This operand is input modifiers 6610 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6611 // 2. This is not last operand 6612 && Desc.NumOperands > (OpNum + 1) 6613 // 3. Next operand is register class 6614 && Desc.OpInfo[OpNum + 1].RegClass != -1 6615 // 4. Next register is not tied to any other operand 6616 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6617 } 6618 6619 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6620 { 6621 OptionalImmIndexMap OptionalIdx; 6622 unsigned Opc = Inst.getOpcode(); 6623 6624 unsigned I = 1; 6625 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6626 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6627 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6628 } 6629 6630 for (unsigned E = Operands.size(); I != E; ++I) { 6631 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6632 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6633 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6634 } else if (Op.isInterpSlot() || 6635 Op.isInterpAttr() || 6636 Op.isAttrChan()) { 6637 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6638 } else if (Op.isImmModifier()) { 6639 OptionalIdx[Op.getImmTy()] = I; 6640 } else { 6641 llvm_unreachable("unhandled operand type"); 6642 } 6643 } 6644 6645 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6647 } 6648 6649 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6651 } 6652 6653 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6655 } 6656 } 6657 6658 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6659 OptionalImmIndexMap &OptionalIdx) { 6660 unsigned Opc = Inst.getOpcode(); 6661 6662 unsigned I = 1; 6663 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6664 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6665 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6666 } 6667 6668 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6669 // This instruction has src modifiers 6670 for (unsigned E = Operands.size(); I != E; ++I) { 6671 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6672 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6673 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6674 } else if (Op.isImmModifier()) { 6675 OptionalIdx[Op.getImmTy()] = I; 6676 } else if (Op.isRegOrImm()) { 6677 Op.addRegOrImmOperands(Inst, 1); 6678 } else { 6679 llvm_unreachable("unhandled operand type"); 6680 } 6681 } 6682 } else { 6683 // No src modifiers 6684 for (unsigned E = Operands.size(); I != E; ++I) { 6685 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6686 if (Op.isMod()) { 6687 OptionalIdx[Op.getImmTy()] = I; 6688 } else { 6689 Op.addRegOrImmOperands(Inst, 1); 6690 } 6691 } 6692 } 6693 6694 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6695 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6696 } 6697 6698 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6700 } 6701 6702 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6703 // it has src2 register operand that is tied to dst operand 6704 // we don't allow modifiers for this operand in assembler so src2_modifiers 6705 // should be 0. 6706 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6707 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6708 Opc == AMDGPU::V_MAC_F32_e64_vi || 6709 Opc == AMDGPU::V_MAC_F16_e64_vi || 6710 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6711 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6712 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6713 auto it = Inst.begin(); 6714 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6715 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6716 ++it; 6717 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6718 } 6719 } 6720 6721 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6722 OptionalImmIndexMap OptionalIdx; 6723 cvtVOP3(Inst, Operands, OptionalIdx); 6724 } 6725 6726 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6727 const OperandVector &Operands) { 6728 OptionalImmIndexMap OptIdx; 6729 const int Opc = Inst.getOpcode(); 6730 const MCInstrDesc &Desc = MII.get(Opc); 6731 6732 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6733 6734 cvtVOP3(Inst, Operands, OptIdx); 6735 6736 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6737 assert(!IsPacked); 6738 Inst.addOperand(Inst.getOperand(0)); 6739 } 6740 6741 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6742 // instruction, and then figure out where to actually put the modifiers 6743 6744 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6745 6746 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6747 if (OpSelHiIdx != -1) { 6748 int DefaultVal = IsPacked ? -1 : 0; 6749 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6750 DefaultVal); 6751 } 6752 6753 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6754 if (NegLoIdx != -1) { 6755 assert(IsPacked); 6756 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6757 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6758 } 6759 6760 const int Ops[] = { AMDGPU::OpName::src0, 6761 AMDGPU::OpName::src1, 6762 AMDGPU::OpName::src2 }; 6763 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6764 AMDGPU::OpName::src1_modifiers, 6765 AMDGPU::OpName::src2_modifiers }; 6766 6767 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6768 6769 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6770 unsigned OpSelHi = 0; 6771 unsigned NegLo = 0; 6772 unsigned NegHi = 0; 6773 6774 if (OpSelHiIdx != -1) { 6775 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6776 } 6777 6778 if (NegLoIdx != -1) { 6779 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6780 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6781 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6782 } 6783 6784 for (int J = 0; J < 3; ++J) { 6785 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6786 if (OpIdx == -1) 6787 break; 6788 6789 uint32_t ModVal = 0; 6790 6791 if ((OpSel & (1 << J)) != 0) 6792 ModVal |= SISrcMods::OP_SEL_0; 6793 6794 if ((OpSelHi & (1 << J)) != 0) 6795 ModVal |= SISrcMods::OP_SEL_1; 6796 6797 if ((NegLo & (1 << J)) != 0) 6798 ModVal |= SISrcMods::NEG; 6799 6800 if ((NegHi & (1 << J)) != 0) 6801 ModVal |= SISrcMods::NEG_HI; 6802 6803 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6804 6805 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6806 } 6807 } 6808 6809 //===----------------------------------------------------------------------===// 6810 // dpp 6811 //===----------------------------------------------------------------------===// 6812 6813 bool AMDGPUOperand::isDPP8() const { 6814 return isImmTy(ImmTyDPP8); 6815 } 6816 6817 bool AMDGPUOperand::isDPPCtrl() const { 6818 using namespace AMDGPU::DPP; 6819 6820 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6821 if (result) { 6822 int64_t Imm = getImm(); 6823 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6824 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6825 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6826 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6827 (Imm == DppCtrl::WAVE_SHL1) || 6828 (Imm == DppCtrl::WAVE_ROL1) || 6829 (Imm == DppCtrl::WAVE_SHR1) || 6830 (Imm == DppCtrl::WAVE_ROR1) || 6831 (Imm == DppCtrl::ROW_MIRROR) || 6832 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6833 (Imm == DppCtrl::BCAST15) || 6834 (Imm == DppCtrl::BCAST31) || 6835 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6836 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6837 } 6838 return false; 6839 } 6840 6841 //===----------------------------------------------------------------------===// 6842 // mAI 6843 //===----------------------------------------------------------------------===// 6844 6845 bool AMDGPUOperand::isBLGP() const { 6846 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6847 } 6848 6849 bool AMDGPUOperand::isCBSZ() const { 6850 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6851 } 6852 6853 bool AMDGPUOperand::isABID() const { 6854 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6855 } 6856 6857 bool AMDGPUOperand::isS16Imm() const { 6858 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6859 } 6860 6861 bool AMDGPUOperand::isU16Imm() const { 6862 return isImm() && isUInt<16>(getImm()); 6863 } 6864 6865 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6866 if (!isGFX10()) 6867 return MatchOperand_NoMatch; 6868 6869 SMLoc S = Parser.getTok().getLoc(); 6870 6871 if (getLexer().isNot(AsmToken::Identifier)) 6872 return MatchOperand_NoMatch; 6873 if (getLexer().getTok().getString() != "dim") 6874 return MatchOperand_NoMatch; 6875 6876 Parser.Lex(); 6877 if (getLexer().isNot(AsmToken::Colon)) 6878 return MatchOperand_ParseFail; 6879 6880 Parser.Lex(); 6881 6882 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6883 // integer. 6884 std::string Token; 6885 if (getLexer().is(AsmToken::Integer)) { 6886 SMLoc Loc = getLexer().getTok().getEndLoc(); 6887 Token = std::string(getLexer().getTok().getString()); 6888 Parser.Lex(); 6889 if (getLexer().getTok().getLoc() != Loc) 6890 return MatchOperand_ParseFail; 6891 } 6892 if (getLexer().isNot(AsmToken::Identifier)) 6893 return MatchOperand_ParseFail; 6894 Token += getLexer().getTok().getString(); 6895 6896 StringRef DimId = Token; 6897 if (DimId.startswith("SQ_RSRC_IMG_")) 6898 DimId = DimId.substr(12); 6899 6900 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6901 if (!DimInfo) 6902 return MatchOperand_ParseFail; 6903 6904 Parser.Lex(); 6905 6906 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6907 AMDGPUOperand::ImmTyDim)); 6908 return MatchOperand_Success; 6909 } 6910 6911 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6912 SMLoc S = Parser.getTok().getLoc(); 6913 StringRef Prefix; 6914 6915 if (getLexer().getKind() == AsmToken::Identifier) { 6916 Prefix = Parser.getTok().getString(); 6917 } else { 6918 return MatchOperand_NoMatch; 6919 } 6920 6921 if (Prefix != "dpp8") 6922 return parseDPPCtrl(Operands); 6923 if (!isGFX10()) 6924 return MatchOperand_NoMatch; 6925 6926 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6927 6928 int64_t Sels[8]; 6929 6930 Parser.Lex(); 6931 if (getLexer().isNot(AsmToken::Colon)) 6932 return MatchOperand_ParseFail; 6933 6934 Parser.Lex(); 6935 if (getLexer().isNot(AsmToken::LBrac)) 6936 return MatchOperand_ParseFail; 6937 6938 Parser.Lex(); 6939 if (getParser().parseAbsoluteExpression(Sels[0])) 6940 return MatchOperand_ParseFail; 6941 if (0 > Sels[0] || 7 < Sels[0]) 6942 return MatchOperand_ParseFail; 6943 6944 for (size_t i = 1; i < 8; ++i) { 6945 if (getLexer().isNot(AsmToken::Comma)) 6946 return MatchOperand_ParseFail; 6947 6948 Parser.Lex(); 6949 if (getParser().parseAbsoluteExpression(Sels[i])) 6950 return MatchOperand_ParseFail; 6951 if (0 > Sels[i] || 7 < Sels[i]) 6952 return MatchOperand_ParseFail; 6953 } 6954 6955 if (getLexer().isNot(AsmToken::RBrac)) 6956 return MatchOperand_ParseFail; 6957 Parser.Lex(); 6958 6959 unsigned DPP8 = 0; 6960 for (size_t i = 0; i < 8; ++i) 6961 DPP8 |= (Sels[i] << (i * 3)); 6962 6963 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6964 return MatchOperand_Success; 6965 } 6966 6967 OperandMatchResultTy 6968 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6969 using namespace AMDGPU::DPP; 6970 6971 SMLoc S = Parser.getTok().getLoc(); 6972 StringRef Prefix; 6973 int64_t Int; 6974 6975 if (getLexer().getKind() == AsmToken::Identifier) { 6976 Prefix = Parser.getTok().getString(); 6977 } else { 6978 return MatchOperand_NoMatch; 6979 } 6980 6981 if (Prefix == "row_mirror") { 6982 Int = DppCtrl::ROW_MIRROR; 6983 Parser.Lex(); 6984 } else if (Prefix == "row_half_mirror") { 6985 Int = DppCtrl::ROW_HALF_MIRROR; 6986 Parser.Lex(); 6987 } else { 6988 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6989 if (Prefix != "quad_perm" 6990 && Prefix != "row_shl" 6991 && Prefix != "row_shr" 6992 && Prefix != "row_ror" 6993 && Prefix != "wave_shl" 6994 && Prefix != "wave_rol" 6995 && Prefix != "wave_shr" 6996 && Prefix != "wave_ror" 6997 && Prefix != "row_bcast" 6998 && Prefix != "row_share" 6999 && Prefix != "row_xmask") { 7000 return MatchOperand_NoMatch; 7001 } 7002 7003 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7004 return MatchOperand_NoMatch; 7005 7006 if (!isVI() && !isGFX9() && 7007 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7008 Prefix == "wave_rol" || Prefix == "wave_ror" || 7009 Prefix == "row_bcast")) 7010 return MatchOperand_NoMatch; 7011 7012 Parser.Lex(); 7013 if (getLexer().isNot(AsmToken::Colon)) 7014 return MatchOperand_ParseFail; 7015 7016 if (Prefix == "quad_perm") { 7017 // quad_perm:[%d,%d,%d,%d] 7018 Parser.Lex(); 7019 if (getLexer().isNot(AsmToken::LBrac)) 7020 return MatchOperand_ParseFail; 7021 Parser.Lex(); 7022 7023 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7024 return MatchOperand_ParseFail; 7025 7026 for (int i = 0; i < 3; ++i) { 7027 if (getLexer().isNot(AsmToken::Comma)) 7028 return MatchOperand_ParseFail; 7029 Parser.Lex(); 7030 7031 int64_t Temp; 7032 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7033 return MatchOperand_ParseFail; 7034 const int shift = i*2 + 2; 7035 Int += (Temp << shift); 7036 } 7037 7038 if (getLexer().isNot(AsmToken::RBrac)) 7039 return MatchOperand_ParseFail; 7040 Parser.Lex(); 7041 } else { 7042 // sel:%d 7043 Parser.Lex(); 7044 if (getParser().parseAbsoluteExpression(Int)) 7045 return MatchOperand_ParseFail; 7046 7047 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7048 Int |= DppCtrl::ROW_SHL0; 7049 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7050 Int |= DppCtrl::ROW_SHR0; 7051 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7052 Int |= DppCtrl::ROW_ROR0; 7053 } else if (Prefix == "wave_shl" && 1 == Int) { 7054 Int = DppCtrl::WAVE_SHL1; 7055 } else if (Prefix == "wave_rol" && 1 == Int) { 7056 Int = DppCtrl::WAVE_ROL1; 7057 } else if (Prefix == "wave_shr" && 1 == Int) { 7058 Int = DppCtrl::WAVE_SHR1; 7059 } else if (Prefix == "wave_ror" && 1 == Int) { 7060 Int = DppCtrl::WAVE_ROR1; 7061 } else if (Prefix == "row_bcast") { 7062 if (Int == 15) { 7063 Int = DppCtrl::BCAST15; 7064 } else if (Int == 31) { 7065 Int = DppCtrl::BCAST31; 7066 } else { 7067 return MatchOperand_ParseFail; 7068 } 7069 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7070 Int |= DppCtrl::ROW_SHARE_FIRST; 7071 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7072 Int |= DppCtrl::ROW_XMASK_FIRST; 7073 } else { 7074 return MatchOperand_ParseFail; 7075 } 7076 } 7077 } 7078 7079 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7080 return MatchOperand_Success; 7081 } 7082 7083 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7084 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7085 } 7086 7087 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7088 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7089 } 7090 7091 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7092 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7093 } 7094 7095 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7096 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7097 } 7098 7099 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7100 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7101 } 7102 7103 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7104 OptionalImmIndexMap OptionalIdx; 7105 7106 unsigned I = 1; 7107 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7108 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7109 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7110 } 7111 7112 int Fi = 0; 7113 for (unsigned E = Operands.size(); I != E; ++I) { 7114 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7115 MCOI::TIED_TO); 7116 if (TiedTo != -1) { 7117 assert((unsigned)TiedTo < Inst.getNumOperands()); 7118 // handle tied old or src2 for MAC instructions 7119 Inst.addOperand(Inst.getOperand(TiedTo)); 7120 } 7121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7122 // Add the register arguments 7123 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7124 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7125 // Skip it. 7126 continue; 7127 } 7128 7129 if (IsDPP8) { 7130 if (Op.isDPP8()) { 7131 Op.addImmOperands(Inst, 1); 7132 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7133 Op.addRegWithFPInputModsOperands(Inst, 2); 7134 } else if (Op.isFI()) { 7135 Fi = Op.getImm(); 7136 } else if (Op.isReg()) { 7137 Op.addRegOperands(Inst, 1); 7138 } else { 7139 llvm_unreachable("Invalid operand type"); 7140 } 7141 } else { 7142 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7143 Op.addRegWithFPInputModsOperands(Inst, 2); 7144 } else if (Op.isDPPCtrl()) { 7145 Op.addImmOperands(Inst, 1); 7146 } else if (Op.isImm()) { 7147 // Handle optional arguments 7148 OptionalIdx[Op.getImmTy()] = I; 7149 } else { 7150 llvm_unreachable("Invalid operand type"); 7151 } 7152 } 7153 } 7154 7155 if (IsDPP8) { 7156 using namespace llvm::AMDGPU::DPP; 7157 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7158 } else { 7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7162 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7164 } 7165 } 7166 } 7167 7168 //===----------------------------------------------------------------------===// 7169 // sdwa 7170 //===----------------------------------------------------------------------===// 7171 7172 OperandMatchResultTy 7173 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7174 AMDGPUOperand::ImmTy Type) { 7175 using namespace llvm::AMDGPU::SDWA; 7176 7177 SMLoc S = Parser.getTok().getLoc(); 7178 StringRef Value; 7179 OperandMatchResultTy res; 7180 7181 res = parseStringWithPrefix(Prefix, Value); 7182 if (res != MatchOperand_Success) { 7183 return res; 7184 } 7185 7186 int64_t Int; 7187 Int = StringSwitch<int64_t>(Value) 7188 .Case("BYTE_0", SdwaSel::BYTE_0) 7189 .Case("BYTE_1", SdwaSel::BYTE_1) 7190 .Case("BYTE_2", SdwaSel::BYTE_2) 7191 .Case("BYTE_3", SdwaSel::BYTE_3) 7192 .Case("WORD_0", SdwaSel::WORD_0) 7193 .Case("WORD_1", SdwaSel::WORD_1) 7194 .Case("DWORD", SdwaSel::DWORD) 7195 .Default(0xffffffff); 7196 Parser.Lex(); // eat last token 7197 7198 if (Int == 0xffffffff) { 7199 return MatchOperand_ParseFail; 7200 } 7201 7202 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7203 return MatchOperand_Success; 7204 } 7205 7206 OperandMatchResultTy 7207 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7208 using namespace llvm::AMDGPU::SDWA; 7209 7210 SMLoc S = Parser.getTok().getLoc(); 7211 StringRef Value; 7212 OperandMatchResultTy res; 7213 7214 res = parseStringWithPrefix("dst_unused", Value); 7215 if (res != MatchOperand_Success) { 7216 return res; 7217 } 7218 7219 int64_t Int; 7220 Int = StringSwitch<int64_t>(Value) 7221 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7222 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7223 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7224 .Default(0xffffffff); 7225 Parser.Lex(); // eat last token 7226 7227 if (Int == 0xffffffff) { 7228 return MatchOperand_ParseFail; 7229 } 7230 7231 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7232 return MatchOperand_Success; 7233 } 7234 7235 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7236 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7237 } 7238 7239 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7240 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7241 } 7242 7243 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7244 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7245 } 7246 7247 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7248 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7249 } 7250 7251 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7252 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7253 } 7254 7255 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7256 uint64_t BasicInstType, 7257 bool SkipDstVcc, 7258 bool SkipSrcVcc) { 7259 using namespace llvm::AMDGPU::SDWA; 7260 7261 OptionalImmIndexMap OptionalIdx; 7262 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7263 bool SkippedVcc = false; 7264 7265 unsigned I = 1; 7266 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7267 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7268 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7269 } 7270 7271 for (unsigned E = Operands.size(); I != E; ++I) { 7272 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7273 if (SkipVcc && !SkippedVcc && Op.isReg() && 7274 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7275 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7276 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7277 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7278 // Skip VCC only if we didn't skip it on previous iteration. 7279 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7280 if (BasicInstType == SIInstrFlags::VOP2 && 7281 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7282 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7283 SkippedVcc = true; 7284 continue; 7285 } else if (BasicInstType == SIInstrFlags::VOPC && 7286 Inst.getNumOperands() == 0) { 7287 SkippedVcc = true; 7288 continue; 7289 } 7290 } 7291 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7292 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7293 } else if (Op.isImm()) { 7294 // Handle optional arguments 7295 OptionalIdx[Op.getImmTy()] = I; 7296 } else { 7297 llvm_unreachable("Invalid operand type"); 7298 } 7299 SkippedVcc = false; 7300 } 7301 7302 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7303 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7304 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7305 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7306 switch (BasicInstType) { 7307 case SIInstrFlags::VOP1: 7308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7309 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7310 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7311 } 7312 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7315 break; 7316 7317 case SIInstrFlags::VOP2: 7318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7319 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7321 } 7322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7326 break; 7327 7328 case SIInstrFlags::VOPC: 7329 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7333 break; 7334 7335 default: 7336 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7337 } 7338 } 7339 7340 // special case v_mac_{f16, f32}: 7341 // it has src2 register operand that is tied to dst operand 7342 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7343 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7344 auto it = Inst.begin(); 7345 std::advance( 7346 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7347 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7348 } 7349 } 7350 7351 //===----------------------------------------------------------------------===// 7352 // mAI 7353 //===----------------------------------------------------------------------===// 7354 7355 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7356 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7357 } 7358 7359 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7360 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7361 } 7362 7363 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7364 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7365 } 7366 7367 /// Force static initialization. 7368 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7369 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7370 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7371 } 7372 7373 #define GET_REGISTER_MATCHER 7374 #define GET_MATCHER_IMPLEMENTATION 7375 #define GET_MNEMONIC_SPELL_CHECKER 7376 #include "AMDGPUGenAsmMatcher.inc" 7377 7378 // This fuction should be defined after auto-generated include so that we have 7379 // MatchClassKind enum defined 7380 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7381 unsigned Kind) { 7382 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7383 // But MatchInstructionImpl() expects to meet token and fails to validate 7384 // operand. This method checks if we are given immediate operand but expect to 7385 // get corresponding token. 7386 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7387 switch (Kind) { 7388 case MCK_addr64: 7389 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7390 case MCK_gds: 7391 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7392 case MCK_lds: 7393 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7394 case MCK_glc: 7395 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7396 case MCK_idxen: 7397 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7398 case MCK_offen: 7399 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7400 case MCK_SSrcB32: 7401 // When operands have expression values, they will return true for isToken, 7402 // because it is not possible to distinguish between a token and an 7403 // expression at parse time. MatchInstructionImpl() will always try to 7404 // match an operand as a token, when isToken returns true, and when the 7405 // name of the expression is not a valid token, the match will fail, 7406 // so we need to handle it here. 7407 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7408 case MCK_SSrcF32: 7409 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7410 case MCK_SoppBrTarget: 7411 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7412 case MCK_VReg32OrOff: 7413 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7414 case MCK_InterpSlot: 7415 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7416 case MCK_Attr: 7417 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7418 case MCK_AttrChan: 7419 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7420 case MCK_ImmSMEMOffset: 7421 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7422 case MCK_SReg_64: 7423 case MCK_SReg_64_XEXEC: 7424 // Null is defined as a 32-bit register but 7425 // it should also be enabled with 64-bit operands. 7426 // The following code enables it for SReg_64 operands 7427 // used as source and destination. Remaining source 7428 // operands are handled in isInlinableImm. 7429 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7430 default: 7431 return Match_InvalidOperand; 7432 } 7433 } 7434 7435 //===----------------------------------------------------------------------===// 7436 // endpgm 7437 //===----------------------------------------------------------------------===// 7438 7439 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7440 SMLoc S = Parser.getTok().getLoc(); 7441 int64_t Imm = 0; 7442 7443 if (!parseExpr(Imm)) { 7444 // The operand is optional, if not present default to 0 7445 Imm = 0; 7446 } 7447 7448 if (!isUInt<16>(Imm)) { 7449 Error(S, "expected a 16-bit value"); 7450 return MatchOperand_ParseFail; 7451 } 7452 7453 Operands.push_back( 7454 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7455 return MatchOperand_Success; 7456 } 7457 7458 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7459