1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 if (Kind != Expression || !Expr) 220 return false; 221 222 // When parsing operands, we can't always tell if something was meant to be 223 // a token, like 'gds', or an expression that references a global variable. 224 // In this case, we assume the string is an expression, and if we need to 225 // interpret is a token, then we treat the symbol name as the token. 226 return isa<MCSymbolRefExpr>(Expr); 227 } 228 229 bool isImm() const override { 230 return Kind == Immediate; 231 } 232 233 bool isInlinableImm(MVT type) const; 234 bool isLiteralImm(MVT type) const; 235 236 bool isRegKind() const { 237 return Kind == Register; 238 } 239 240 bool isReg() const override { 241 return isRegKind() && !hasModifiers(); 242 } 243 244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 245 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 246 } 247 248 bool isRegOrImmWithInt16InputMods() const { 249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 250 } 251 252 bool isRegOrImmWithInt32InputMods() const { 253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 254 } 255 256 bool isRegOrImmWithInt64InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 258 } 259 260 bool isRegOrImmWithFP16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 262 } 263 264 bool isRegOrImmWithFP32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 266 } 267 268 bool isRegOrImmWithFP64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 270 } 271 272 bool isVReg() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID) || 274 isRegClass(AMDGPU::VReg_64RegClassID) || 275 isRegClass(AMDGPU::VReg_96RegClassID) || 276 isRegClass(AMDGPU::VReg_128RegClassID) || 277 isRegClass(AMDGPU::VReg_256RegClassID) || 278 isRegClass(AMDGPU::VReg_512RegClassID); 279 } 280 281 bool isVReg32() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID); 283 } 284 285 bool isVReg32OrOff() const { 286 return isOff() || isVReg32(); 287 } 288 289 bool isSDWAOperand(MVT type) const; 290 bool isSDWAFP16Operand() const; 291 bool isSDWAFP32Operand() const; 292 bool isSDWAInt16Operand() const; 293 bool isSDWAInt32Operand() const; 294 295 bool isImmTy(ImmTy ImmT) const { 296 return isImm() && Imm.Type == ImmT; 297 } 298 299 bool isImmModifier() const { 300 return isImm() && Imm.Type != ImmTyNone; 301 } 302 303 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 304 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 305 bool isDMask() const { return isImmTy(ImmTyDMask); } 306 bool isDim() const { return isImmTy(ImmTyDim); } 307 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 308 bool isDA() const { return isImmTy(ImmTyDA); } 309 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 310 bool isLWE() const { return isImmTy(ImmTyLWE); } 311 bool isOff() const { return isImmTy(ImmTyOff); } 312 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 313 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 314 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 315 bool isOffen() const { return isImmTy(ImmTyOffen); } 316 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 317 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 318 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 319 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 320 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 321 322 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 323 bool isGDS() const { return isImmTy(ImmTyGDS); } 324 bool isLDS() const { return isImmTy(ImmTyLDS); } 325 bool isDLC() const { return isImmTy(ImmTyDLC); } 326 bool isGLC() const { return isImmTy(ImmTyGLC); } 327 bool isSLC() const { return isImmTy(ImmTySLC); } 328 bool isTFE() const { return isImmTy(ImmTyTFE); } 329 bool isD16() const { return isImmTy(ImmTyD16); } 330 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 331 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 332 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 333 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 334 bool isFI() const { return isImmTy(ImmTyDppFi); } 335 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 336 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 337 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 338 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 339 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 340 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 341 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 342 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 343 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 344 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 345 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 346 bool isHigh() const { return isImmTy(ImmTyHigh); } 347 348 bool isMod() const { 349 return isClampSI() || isOModSI(); 350 } 351 352 bool isRegOrImm() const { 353 return isReg() || isImm(); 354 } 355 356 bool isRegClass(unsigned RCID) const; 357 358 bool isInlineValue() const; 359 360 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 361 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 362 } 363 364 bool isSCSrcB16() const { 365 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 366 } 367 368 bool isSCSrcV2B16() const { 369 return isSCSrcB16(); 370 } 371 372 bool isSCSrcB32() const { 373 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 374 } 375 376 bool isSCSrcB64() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 378 } 379 380 bool isBoolReg() const; 381 382 bool isSCSrcF16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 384 } 385 386 bool isSCSrcV2F16() const { 387 return isSCSrcF16(); 388 } 389 390 bool isSCSrcF32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 392 } 393 394 bool isSCSrcF64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 396 } 397 398 bool isSSrcB32() const { 399 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 400 } 401 402 bool isSSrcB16() const { 403 return isSCSrcB16() || isLiteralImm(MVT::i16); 404 } 405 406 bool isSSrcV2B16() const { 407 llvm_unreachable("cannot happen"); 408 return isSSrcB16(); 409 } 410 411 bool isSSrcB64() const { 412 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 413 // See isVSrc64(). 414 return isSCSrcB64() || isLiteralImm(MVT::i64); 415 } 416 417 bool isSSrcF32() const { 418 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 419 } 420 421 bool isSSrcF64() const { 422 return isSCSrcB64() || isLiteralImm(MVT::f64); 423 } 424 425 bool isSSrcF16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::f16); 427 } 428 429 bool isSSrcV2F16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcF16(); 432 } 433 434 bool isSSrcOrLdsB32() const { 435 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 436 isLiteralImm(MVT::i32) || isExpr(); 437 } 438 439 bool isVCSrcB32() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 441 } 442 443 bool isVCSrcB64() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 445 } 446 447 bool isVCSrcB16() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 449 } 450 451 bool isVCSrcV2B16() const { 452 return isVCSrcB16(); 453 } 454 455 bool isVCSrcF32() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 457 } 458 459 bool isVCSrcF64() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 461 } 462 463 bool isVCSrcF16() const { 464 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 465 } 466 467 bool isVCSrcV2F16() const { 468 return isVCSrcF16(); 469 } 470 471 bool isVSrcB32() const { 472 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 473 } 474 475 bool isVSrcB64() const { 476 return isVCSrcF64() || isLiteralImm(MVT::i64); 477 } 478 479 bool isVSrcB16() const { 480 return isVCSrcF16() || isLiteralImm(MVT::i16); 481 } 482 483 bool isVSrcV2B16() const { 484 return isVSrcB16() || isLiteralImm(MVT::v2i16); 485 } 486 487 bool isVSrcF32() const { 488 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 489 } 490 491 bool isVSrcF64() const { 492 return isVCSrcF64() || isLiteralImm(MVT::f64); 493 } 494 495 bool isVSrcF16() const { 496 return isVCSrcF16() || isLiteralImm(MVT::f16); 497 } 498 499 bool isVSrcV2F16() const { 500 return isVSrcF16() || isLiteralImm(MVT::v2f16); 501 } 502 503 bool isVISrcB32() const { 504 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 505 } 506 507 bool isVISrcB16() const { 508 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 509 } 510 511 bool isVISrcV2B16() const { 512 return isVISrcB16(); 513 } 514 515 bool isVISrcF32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 517 } 518 519 bool isVISrcF16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 521 } 522 523 bool isVISrcV2F16() const { 524 return isVISrcF16() || isVISrcB32(); 525 } 526 527 bool isAISrcB32() const { 528 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 529 } 530 531 bool isAISrcB16() const { 532 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 533 } 534 535 bool isAISrcV2B16() const { 536 return isAISrcB16(); 537 } 538 539 bool isAISrcF32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 541 } 542 543 bool isAISrcF16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 545 } 546 547 bool isAISrcV2F16() const { 548 return isAISrcF16() || isAISrcB32(); 549 } 550 551 bool isAISrc_128B32() const { 552 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 553 } 554 555 bool isAISrc_128B16() const { 556 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 557 } 558 559 bool isAISrc_128V2B16() const { 560 return isAISrc_128B16(); 561 } 562 563 bool isAISrc_128F32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 565 } 566 567 bool isAISrc_128F16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 569 } 570 571 bool isAISrc_128V2F16() const { 572 return isAISrc_128F16() || isAISrc_128B32(); 573 } 574 575 bool isAISrc_512B32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 577 } 578 579 bool isAISrc_512B16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 581 } 582 583 bool isAISrc_512V2B16() const { 584 return isAISrc_512B16(); 585 } 586 587 bool isAISrc_512F32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 589 } 590 591 bool isAISrc_512F16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 593 } 594 595 bool isAISrc_512V2F16() const { 596 return isAISrc_512F16() || isAISrc_512B32(); 597 } 598 599 bool isAISrc_1024B32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 601 } 602 603 bool isAISrc_1024B16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 605 } 606 607 bool isAISrc_1024V2B16() const { 608 return isAISrc_1024B16(); 609 } 610 611 bool isAISrc_1024F32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 613 } 614 615 bool isAISrc_1024F16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 617 } 618 619 bool isAISrc_1024V2F16() const { 620 return isAISrc_1024F16() || isAISrc_1024B32(); 621 } 622 623 bool isKImmFP32() const { 624 return isLiteralImm(MVT::f32); 625 } 626 627 bool isKImmFP16() const { 628 return isLiteralImm(MVT::f16); 629 } 630 631 bool isMem() const override { 632 return false; 633 } 634 635 bool isExpr() const { 636 return Kind == Expression; 637 } 638 639 bool isSoppBrTarget() const { 640 return isExpr() || isImm(); 641 } 642 643 bool isSWaitCnt() const; 644 bool isHwreg() const; 645 bool isSendMsg() const; 646 bool isSwizzle() const; 647 bool isSMRDOffset8() const; 648 bool isSMRDOffset20() const; 649 bool isSMRDLiteralOffset() const; 650 bool isDPP8() const; 651 bool isDPPCtrl() const; 652 bool isBLGP() const; 653 bool isCBSZ() const; 654 bool isABID() const; 655 bool isGPRIdxMode() const; 656 bool isS16Imm() const; 657 bool isU16Imm() const; 658 bool isEndpgm() const; 659 660 StringRef getExpressionAsToken() const { 661 assert(isExpr()); 662 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 663 return S->getSymbol().getName(); 664 } 665 666 StringRef getToken() const { 667 assert(isToken()); 668 669 if (Kind == Expression) 670 return getExpressionAsToken(); 671 672 return StringRef(Tok.Data, Tok.Length); 673 } 674 675 int64_t getImm() const { 676 assert(isImm()); 677 return Imm.Val; 678 } 679 680 ImmTy getImmTy() const { 681 assert(isImm()); 682 return Imm.Type; 683 } 684 685 unsigned getReg() const override { 686 assert(isRegKind()); 687 return Reg.RegNo; 688 } 689 690 SMLoc getStartLoc() const override { 691 return StartLoc; 692 } 693 694 SMLoc getEndLoc() const override { 695 return EndLoc; 696 } 697 698 SMRange getLocRange() const { 699 return SMRange(StartLoc, EndLoc); 700 } 701 702 Modifiers getModifiers() const { 703 assert(isRegKind() || isImmTy(ImmTyNone)); 704 return isRegKind() ? Reg.Mods : Imm.Mods; 705 } 706 707 void setModifiers(Modifiers Mods) { 708 assert(isRegKind() || isImmTy(ImmTyNone)); 709 if (isRegKind()) 710 Reg.Mods = Mods; 711 else 712 Imm.Mods = Mods; 713 } 714 715 bool hasModifiers() const { 716 return getModifiers().hasModifiers(); 717 } 718 719 bool hasFPModifiers() const { 720 return getModifiers().hasFPModifiers(); 721 } 722 723 bool hasIntModifiers() const { 724 return getModifiers().hasIntModifiers(); 725 } 726 727 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 728 729 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 730 731 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 732 733 template <unsigned Bitwidth> 734 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 735 736 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 737 addKImmFPOperands<16>(Inst, N); 738 } 739 740 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 741 addKImmFPOperands<32>(Inst, N); 742 } 743 744 void addRegOperands(MCInst &Inst, unsigned N) const; 745 746 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 747 addRegOperands(Inst, N); 748 } 749 750 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 751 if (isRegKind()) 752 addRegOperands(Inst, N); 753 else if (isExpr()) 754 Inst.addOperand(MCOperand::createExpr(Expr)); 755 else 756 addImmOperands(Inst, N); 757 } 758 759 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 760 Modifiers Mods = getModifiers(); 761 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 762 if (isRegKind()) { 763 addRegOperands(Inst, N); 764 } else { 765 addImmOperands(Inst, N, false); 766 } 767 } 768 769 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 770 assert(!hasIntModifiers()); 771 addRegOrImmWithInputModsOperands(Inst, N); 772 } 773 774 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 775 assert(!hasFPModifiers()); 776 addRegOrImmWithInputModsOperands(Inst, N); 777 } 778 779 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 780 Modifiers Mods = getModifiers(); 781 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 782 assert(isRegKind()); 783 addRegOperands(Inst, N); 784 } 785 786 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegWithInputModsOperands(Inst, N); 794 } 795 796 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 797 if (isImm()) 798 addImmOperands(Inst, N); 799 else { 800 assert(isExpr()); 801 Inst.addOperand(MCOperand::createExpr(Expr)); 802 } 803 } 804 805 static void printImmTy(raw_ostream& OS, ImmTy Type) { 806 switch (Type) { 807 case ImmTyNone: OS << "None"; break; 808 case ImmTyGDS: OS << "GDS"; break; 809 case ImmTyLDS: OS << "LDS"; break; 810 case ImmTyOffen: OS << "Offen"; break; 811 case ImmTyIdxen: OS << "Idxen"; break; 812 case ImmTyAddr64: OS << "Addr64"; break; 813 case ImmTyOffset: OS << "Offset"; break; 814 case ImmTyInstOffset: OS << "InstOffset"; break; 815 case ImmTyOffset0: OS << "Offset0"; break; 816 case ImmTyOffset1: OS << "Offset1"; break; 817 case ImmTyDLC: OS << "DLC"; break; 818 case ImmTyGLC: OS << "GLC"; break; 819 case ImmTySLC: OS << "SLC"; break; 820 case ImmTyTFE: OS << "TFE"; break; 821 case ImmTyD16: OS << "D16"; break; 822 case ImmTyFORMAT: OS << "FORMAT"; break; 823 case ImmTyClampSI: OS << "ClampSI"; break; 824 case ImmTyOModSI: OS << "OModSI"; break; 825 case ImmTyDPP8: OS << "DPP8"; break; 826 case ImmTyDppCtrl: OS << "DppCtrl"; break; 827 case ImmTyDppRowMask: OS << "DppRowMask"; break; 828 case ImmTyDppBankMask: OS << "DppBankMask"; break; 829 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 830 case ImmTyDppFi: OS << "FI"; break; 831 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 832 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 833 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 834 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 835 case ImmTyDMask: OS << "DMask"; break; 836 case ImmTyDim: OS << "Dim"; break; 837 case ImmTyUNorm: OS << "UNorm"; break; 838 case ImmTyDA: OS << "DA"; break; 839 case ImmTyR128A16: OS << "R128A16"; break; 840 case ImmTyLWE: OS << "LWE"; break; 841 case ImmTyOff: OS << "Off"; break; 842 case ImmTyExpTgt: OS << "ExpTgt"; break; 843 case ImmTyExpCompr: OS << "ExpCompr"; break; 844 case ImmTyExpVM: OS << "ExpVM"; break; 845 case ImmTyHwreg: OS << "Hwreg"; break; 846 case ImmTySendMsg: OS << "SendMsg"; break; 847 case ImmTyInterpSlot: OS << "InterpSlot"; break; 848 case ImmTyInterpAttr: OS << "InterpAttr"; break; 849 case ImmTyAttrChan: OS << "AttrChan"; break; 850 case ImmTyOpSel: OS << "OpSel"; break; 851 case ImmTyOpSelHi: OS << "OpSelHi"; break; 852 case ImmTyNegLo: OS << "NegLo"; break; 853 case ImmTyNegHi: OS << "NegHi"; break; 854 case ImmTySwizzle: OS << "Swizzle"; break; 855 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 856 case ImmTyHigh: OS << "High"; break; 857 case ImmTyBLGP: OS << "BLGP"; break; 858 case ImmTyCBSZ: OS << "CBSZ"; break; 859 case ImmTyABID: OS << "ABID"; break; 860 case ImmTyEndpgm: OS << "Endpgm"; break; 861 } 862 } 863 864 void print(raw_ostream &OS) const override { 865 switch (Kind) { 866 case Register: 867 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 868 break; 869 case Immediate: 870 OS << '<' << getImm(); 871 if (getImmTy() != ImmTyNone) { 872 OS << " type: "; printImmTy(OS, getImmTy()); 873 } 874 OS << " mods: " << Imm.Mods << '>'; 875 break; 876 case Token: 877 OS << '\'' << getToken() << '\''; 878 break; 879 case Expression: 880 OS << "<expr " << *Expr << '>'; 881 break; 882 } 883 } 884 885 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 886 int64_t Val, SMLoc Loc, 887 ImmTy Type = ImmTyNone, 888 bool IsFPImm = false) { 889 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 890 Op->Imm.Val = Val; 891 Op->Imm.IsFPImm = IsFPImm; 892 Op->Imm.Type = Type; 893 Op->Imm.Mods = Modifiers(); 894 Op->StartLoc = Loc; 895 Op->EndLoc = Loc; 896 return Op; 897 } 898 899 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 900 StringRef Str, SMLoc Loc, 901 bool HasExplicitEncodingSize = true) { 902 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 903 Res->Tok.Data = Str.data(); 904 Res->Tok.Length = Str.size(); 905 Res->StartLoc = Loc; 906 Res->EndLoc = Loc; 907 return Res; 908 } 909 910 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 911 unsigned RegNo, SMLoc S, 912 SMLoc E) { 913 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 914 Op->Reg.RegNo = RegNo; 915 Op->Reg.Mods = Modifiers(); 916 Op->StartLoc = S; 917 Op->EndLoc = E; 918 return Op; 919 } 920 921 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 922 const class MCExpr *Expr, SMLoc S) { 923 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 924 Op->Expr = Expr; 925 Op->StartLoc = S; 926 Op->EndLoc = S; 927 return Op; 928 } 929 }; 930 931 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 932 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 933 return OS; 934 } 935 936 //===----------------------------------------------------------------------===// 937 // AsmParser 938 //===----------------------------------------------------------------------===// 939 940 // Holds info related to the current kernel, e.g. count of SGPRs used. 941 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 942 // .amdgpu_hsa_kernel or at EOF. 943 class KernelScopeInfo { 944 int SgprIndexUnusedMin = -1; 945 int VgprIndexUnusedMin = -1; 946 MCContext *Ctx = nullptr; 947 948 void usesSgprAt(int i) { 949 if (i >= SgprIndexUnusedMin) { 950 SgprIndexUnusedMin = ++i; 951 if (Ctx) { 952 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 953 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 954 } 955 } 956 } 957 958 void usesVgprAt(int i) { 959 if (i >= VgprIndexUnusedMin) { 960 VgprIndexUnusedMin = ++i; 961 if (Ctx) { 962 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 963 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 964 } 965 } 966 } 967 968 public: 969 KernelScopeInfo() = default; 970 971 void initialize(MCContext &Context) { 972 Ctx = &Context; 973 usesSgprAt(SgprIndexUnusedMin = -1); 974 usesVgprAt(VgprIndexUnusedMin = -1); 975 } 976 977 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 978 switch (RegKind) { 979 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 980 case IS_AGPR: // fall through 981 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 982 default: break; 983 } 984 } 985 }; 986 987 class AMDGPUAsmParser : public MCTargetAsmParser { 988 MCAsmParser &Parser; 989 990 // Number of extra operands parsed after the first optional operand. 991 // This may be necessary to skip hardcoded mandatory operands. 992 static const unsigned MAX_OPR_LOOKAHEAD = 8; 993 994 unsigned ForcedEncodingSize = 0; 995 bool ForcedDPP = false; 996 bool ForcedSDWA = false; 997 KernelScopeInfo KernelScope; 998 999 /// @name Auto-generated Match Functions 1000 /// { 1001 1002 #define GET_ASSEMBLER_HEADER 1003 #include "AMDGPUGenAsmMatcher.inc" 1004 1005 /// } 1006 1007 private: 1008 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1009 bool OutOfRangeError(SMRange Range); 1010 /// Calculate VGPR/SGPR blocks required for given target, reserved 1011 /// registers, and user-specified NextFreeXGPR values. 1012 /// 1013 /// \param Features [in] Target features, used for bug corrections. 1014 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1015 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1016 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1017 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1018 /// descriptor field, if valid. 1019 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1020 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1021 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1022 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1023 /// \param VGPRBlocks [out] Result VGPR block count. 1024 /// \param SGPRBlocks [out] Result SGPR block count. 1025 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1026 bool FlatScrUsed, bool XNACKUsed, 1027 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1028 SMRange VGPRRange, unsigned NextFreeSGPR, 1029 SMRange SGPRRange, unsigned &VGPRBlocks, 1030 unsigned &SGPRBlocks); 1031 bool ParseDirectiveAMDGCNTarget(); 1032 bool ParseDirectiveAMDHSAKernel(); 1033 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1034 bool ParseDirectiveHSACodeObjectVersion(); 1035 bool ParseDirectiveHSACodeObjectISA(); 1036 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1037 bool ParseDirectiveAMDKernelCodeT(); 1038 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1039 bool ParseDirectiveAMDGPUHsaKernel(); 1040 1041 bool ParseDirectiveISAVersion(); 1042 bool ParseDirectiveHSAMetadata(); 1043 bool ParseDirectivePALMetadataBegin(); 1044 bool ParseDirectivePALMetadata(); 1045 bool ParseDirectiveAMDGPULDS(); 1046 1047 /// Common code to parse out a block of text (typically YAML) between start and 1048 /// end directives. 1049 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1050 const char *AssemblerDirectiveEnd, 1051 std::string &CollectString); 1052 1053 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1054 RegisterKind RegKind, unsigned Reg1, 1055 unsigned RegNum); 1056 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1057 unsigned& RegNum, unsigned& RegWidth, 1058 unsigned *DwordRegIndex); 1059 bool isRegister(); 1060 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1061 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1062 void initializeGprCountSymbol(RegisterKind RegKind); 1063 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1064 unsigned RegWidth); 1065 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1066 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1067 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1068 bool IsGdsHardcoded); 1069 1070 public: 1071 enum AMDGPUMatchResultTy { 1072 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1073 }; 1074 enum OperandMode { 1075 OperandMode_Default, 1076 OperandMode_NSA, 1077 }; 1078 1079 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1080 1081 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1082 const MCInstrInfo &MII, 1083 const MCTargetOptions &Options) 1084 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1085 MCAsmParserExtension::Initialize(Parser); 1086 1087 if (getFeatureBits().none()) { 1088 // Set default features. 1089 copySTI().ToggleFeature("southern-islands"); 1090 } 1091 1092 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1093 1094 { 1095 // TODO: make those pre-defined variables read-only. 1096 // Currently there is none suitable machinery in the core llvm-mc for this. 1097 // MCSymbol::isRedefinable is intended for another purpose, and 1098 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1099 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1100 MCContext &Ctx = getContext(); 1101 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1102 MCSymbol *Sym = 1103 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1104 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1105 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1106 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1107 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1108 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1109 } else { 1110 MCSymbol *Sym = 1111 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1112 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1113 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1114 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1115 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1116 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1117 } 1118 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1119 initializeGprCountSymbol(IS_VGPR); 1120 initializeGprCountSymbol(IS_SGPR); 1121 } else 1122 KernelScope.initialize(getContext()); 1123 } 1124 } 1125 1126 bool hasXNACK() const { 1127 return AMDGPU::hasXNACK(getSTI()); 1128 } 1129 1130 bool hasMIMG_R128() const { 1131 return AMDGPU::hasMIMG_R128(getSTI()); 1132 } 1133 1134 bool hasPackedD16() const { 1135 return AMDGPU::hasPackedD16(getSTI()); 1136 } 1137 1138 bool isSI() const { 1139 return AMDGPU::isSI(getSTI()); 1140 } 1141 1142 bool isCI() const { 1143 return AMDGPU::isCI(getSTI()); 1144 } 1145 1146 bool isVI() const { 1147 return AMDGPU::isVI(getSTI()); 1148 } 1149 1150 bool isGFX9() const { 1151 return AMDGPU::isGFX9(getSTI()); 1152 } 1153 1154 bool isGFX10() const { 1155 return AMDGPU::isGFX10(getSTI()); 1156 } 1157 1158 bool hasInv2PiInlineImm() const { 1159 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1160 } 1161 1162 bool hasFlatOffsets() const { 1163 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1164 } 1165 1166 bool hasSGPR102_SGPR103() const { 1167 return !isVI() && !isGFX9(); 1168 } 1169 1170 bool hasSGPR104_SGPR105() const { 1171 return isGFX10(); 1172 } 1173 1174 bool hasIntClamp() const { 1175 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1176 } 1177 1178 AMDGPUTargetStreamer &getTargetStreamer() { 1179 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1180 return static_cast<AMDGPUTargetStreamer &>(TS); 1181 } 1182 1183 const MCRegisterInfo *getMRI() const { 1184 // We need this const_cast because for some reason getContext() is not const 1185 // in MCAsmParser. 1186 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1187 } 1188 1189 const MCInstrInfo *getMII() const { 1190 return &MII; 1191 } 1192 1193 const FeatureBitset &getFeatureBits() const { 1194 return getSTI().getFeatureBits(); 1195 } 1196 1197 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1198 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1199 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1200 1201 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1202 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1203 bool isForcedDPP() const { return ForcedDPP; } 1204 bool isForcedSDWA() const { return ForcedSDWA; } 1205 ArrayRef<unsigned> getMatchedVariants() const; 1206 1207 std::unique_ptr<AMDGPUOperand> parseRegister(); 1208 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1209 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1210 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1211 unsigned Kind) override; 1212 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1213 OperandVector &Operands, MCStreamer &Out, 1214 uint64_t &ErrorInfo, 1215 bool MatchingInlineAsm) override; 1216 bool ParseDirective(AsmToken DirectiveID) override; 1217 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1218 OperandMode Mode = OperandMode_Default); 1219 StringRef parseMnemonicSuffix(StringRef Name); 1220 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1221 SMLoc NameLoc, OperandVector &Operands) override; 1222 //bool ProcessInstruction(MCInst &Inst); 1223 1224 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1225 1226 OperandMatchResultTy 1227 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1228 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1229 bool (*ConvertResult)(int64_t &) = nullptr); 1230 1231 OperandMatchResultTy 1232 parseOperandArrayWithPrefix(const char *Prefix, 1233 OperandVector &Operands, 1234 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1235 bool (*ConvertResult)(int64_t&) = nullptr); 1236 1237 OperandMatchResultTy 1238 parseNamedBit(const char *Name, OperandVector &Operands, 1239 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1240 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1241 StringRef &Value); 1242 1243 bool isModifier(); 1244 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1245 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1246 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1247 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool parseSP3NegModifier(); 1249 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1250 OperandMatchResultTy parseReg(OperandVector &Operands); 1251 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1252 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1253 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1254 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1255 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1256 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1257 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1258 1259 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1260 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1261 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1262 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1263 1264 bool parseCnt(int64_t &IntVal); 1265 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1266 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1267 1268 private: 1269 struct OperandInfoTy { 1270 int64_t Id; 1271 bool IsSymbolic = false; 1272 bool IsDefined = false; 1273 1274 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1275 }; 1276 1277 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1278 bool validateSendMsg(const OperandInfoTy &Msg, 1279 const OperandInfoTy &Op, 1280 const OperandInfoTy &Stream, 1281 const SMLoc Loc); 1282 1283 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1284 bool validateHwreg(const OperandInfoTy &HwReg, 1285 const int64_t Offset, 1286 const int64_t Width, 1287 const SMLoc Loc); 1288 1289 void errorExpTgt(); 1290 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1291 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1292 1293 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1294 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1295 bool validateSOPLiteral(const MCInst &Inst) const; 1296 bool validateConstantBusLimitations(const MCInst &Inst); 1297 bool validateEarlyClobberLimitations(const MCInst &Inst); 1298 bool validateIntClampSupported(const MCInst &Inst); 1299 bool validateMIMGAtomicDMask(const MCInst &Inst); 1300 bool validateMIMGGatherDMask(const MCInst &Inst); 1301 bool validateMIMGDataSize(const MCInst &Inst); 1302 bool validateMIMGAddrSize(const MCInst &Inst); 1303 bool validateMIMGD16(const MCInst &Inst); 1304 bool validateMIMGDim(const MCInst &Inst); 1305 bool validateLdsDirect(const MCInst &Inst); 1306 bool validateOpSel(const MCInst &Inst); 1307 bool validateVccOperand(unsigned Reg) const; 1308 bool validateVOP3Literal(const MCInst &Inst) const; 1309 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1310 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1311 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1312 1313 bool isId(const StringRef Id) const; 1314 bool isId(const AsmToken &Token, const StringRef Id) const; 1315 bool isToken(const AsmToken::TokenKind Kind) const; 1316 bool trySkipId(const StringRef Id); 1317 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1318 bool trySkipToken(const AsmToken::TokenKind Kind); 1319 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1320 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1321 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1322 AsmToken::TokenKind getTokenKind() const; 1323 bool parseExpr(int64_t &Imm); 1324 StringRef getTokenStr() const; 1325 AsmToken peekToken(); 1326 AsmToken getToken() const; 1327 SMLoc getLoc() const; 1328 void lex(); 1329 1330 public: 1331 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1332 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1333 1334 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1335 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1336 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1337 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1338 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1339 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1340 1341 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1342 const unsigned MinVal, 1343 const unsigned MaxVal, 1344 const StringRef ErrMsg); 1345 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1346 bool parseSwizzleOffset(int64_t &Imm); 1347 bool parseSwizzleMacro(int64_t &Imm); 1348 bool parseSwizzleQuadPerm(int64_t &Imm); 1349 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1350 bool parseSwizzleBroadcast(int64_t &Imm); 1351 bool parseSwizzleSwap(int64_t &Imm); 1352 bool parseSwizzleReverse(int64_t &Imm); 1353 1354 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1355 int64_t parseGPRIdxMacro(); 1356 1357 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1358 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1359 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1360 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1361 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1362 1363 AMDGPUOperand::Ptr defaultDLC() const; 1364 AMDGPUOperand::Ptr defaultGLC() const; 1365 AMDGPUOperand::Ptr defaultSLC() const; 1366 1367 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1368 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1369 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1370 AMDGPUOperand::Ptr defaultFlatOffset() const; 1371 1372 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1373 1374 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1375 OptionalImmIndexMap &OptionalIdx); 1376 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1377 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1378 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1379 1380 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1381 1382 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1383 bool IsAtomic = false); 1384 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1385 1386 OperandMatchResultTy parseDim(OperandVector &Operands); 1387 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1388 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1389 AMDGPUOperand::Ptr defaultRowMask() const; 1390 AMDGPUOperand::Ptr defaultBankMask() const; 1391 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1392 AMDGPUOperand::Ptr defaultFI() const; 1393 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1394 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1395 1396 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1397 AMDGPUOperand::ImmTy Type); 1398 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1399 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1400 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1401 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1402 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1403 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1404 uint64_t BasicInstType, bool skipVcc = false); 1405 1406 AMDGPUOperand::Ptr defaultBLGP() const; 1407 AMDGPUOperand::Ptr defaultCBSZ() const; 1408 AMDGPUOperand::Ptr defaultABID() const; 1409 1410 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1411 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1412 }; 1413 1414 struct OptionalOperand { 1415 const char *Name; 1416 AMDGPUOperand::ImmTy Type; 1417 bool IsBit; 1418 bool (*ConvertResult)(int64_t&); 1419 }; 1420 1421 } // end anonymous namespace 1422 1423 // May be called with integer type with equivalent bitwidth. 1424 static const fltSemantics *getFltSemantics(unsigned Size) { 1425 switch (Size) { 1426 case 4: 1427 return &APFloat::IEEEsingle(); 1428 case 8: 1429 return &APFloat::IEEEdouble(); 1430 case 2: 1431 return &APFloat::IEEEhalf(); 1432 default: 1433 llvm_unreachable("unsupported fp type"); 1434 } 1435 } 1436 1437 static const fltSemantics *getFltSemantics(MVT VT) { 1438 return getFltSemantics(VT.getSizeInBits() / 8); 1439 } 1440 1441 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1442 switch (OperandType) { 1443 case AMDGPU::OPERAND_REG_IMM_INT32: 1444 case AMDGPU::OPERAND_REG_IMM_FP32: 1445 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1446 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1447 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1448 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1449 return &APFloat::IEEEsingle(); 1450 case AMDGPU::OPERAND_REG_IMM_INT64: 1451 case AMDGPU::OPERAND_REG_IMM_FP64: 1452 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1453 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1454 return &APFloat::IEEEdouble(); 1455 case AMDGPU::OPERAND_REG_IMM_INT16: 1456 case AMDGPU::OPERAND_REG_IMM_FP16: 1457 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1458 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1459 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1460 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1461 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1462 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1463 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1464 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1465 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1466 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1467 return &APFloat::IEEEhalf(); 1468 default: 1469 llvm_unreachable("unsupported fp type"); 1470 } 1471 } 1472 1473 //===----------------------------------------------------------------------===// 1474 // Operand 1475 //===----------------------------------------------------------------------===// 1476 1477 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1478 bool Lost; 1479 1480 // Convert literal to single precision 1481 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1482 APFloat::rmNearestTiesToEven, 1483 &Lost); 1484 // We allow precision lost but not overflow or underflow 1485 if (Status != APFloat::opOK && 1486 Lost && 1487 ((Status & APFloat::opOverflow) != 0 || 1488 (Status & APFloat::opUnderflow) != 0)) { 1489 return false; 1490 } 1491 1492 return true; 1493 } 1494 1495 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1496 return isUIntN(Size, Val) || isIntN(Size, Val); 1497 } 1498 1499 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1500 1501 // This is a hack to enable named inline values like 1502 // shared_base with both 32-bit and 64-bit operands. 1503 // Note that these values are defined as 1504 // 32-bit operands only. 1505 if (isInlineValue()) { 1506 return true; 1507 } 1508 1509 if (!isImmTy(ImmTyNone)) { 1510 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1511 return false; 1512 } 1513 // TODO: We should avoid using host float here. It would be better to 1514 // check the float bit values which is what a few other places do. 1515 // We've had bot failures before due to weird NaN support on mips hosts. 1516 1517 APInt Literal(64, Imm.Val); 1518 1519 if (Imm.IsFPImm) { // We got fp literal token 1520 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1521 return AMDGPU::isInlinableLiteral64(Imm.Val, 1522 AsmParser->hasInv2PiInlineImm()); 1523 } 1524 1525 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1526 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1527 return false; 1528 1529 if (type.getScalarSizeInBits() == 16) { 1530 return AMDGPU::isInlinableLiteral16( 1531 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1532 AsmParser->hasInv2PiInlineImm()); 1533 } 1534 1535 // Check if single precision literal is inlinable 1536 return AMDGPU::isInlinableLiteral32( 1537 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1538 AsmParser->hasInv2PiInlineImm()); 1539 } 1540 1541 // We got int literal token. 1542 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1543 return AMDGPU::isInlinableLiteral64(Imm.Val, 1544 AsmParser->hasInv2PiInlineImm()); 1545 } 1546 1547 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1548 return false; 1549 } 1550 1551 if (type.getScalarSizeInBits() == 16) { 1552 return AMDGPU::isInlinableLiteral16( 1553 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1554 AsmParser->hasInv2PiInlineImm()); 1555 } 1556 1557 return AMDGPU::isInlinableLiteral32( 1558 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1559 AsmParser->hasInv2PiInlineImm()); 1560 } 1561 1562 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1563 // Check that this immediate can be added as literal 1564 if (!isImmTy(ImmTyNone)) { 1565 return false; 1566 } 1567 1568 if (!Imm.IsFPImm) { 1569 // We got int literal token. 1570 1571 if (type == MVT::f64 && hasFPModifiers()) { 1572 // Cannot apply fp modifiers to int literals preserving the same semantics 1573 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1574 // disable these cases. 1575 return false; 1576 } 1577 1578 unsigned Size = type.getSizeInBits(); 1579 if (Size == 64) 1580 Size = 32; 1581 1582 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1583 // types. 1584 return isSafeTruncation(Imm.Val, Size); 1585 } 1586 1587 // We got fp literal token 1588 if (type == MVT::f64) { // Expected 64-bit fp operand 1589 // We would set low 64-bits of literal to zeroes but we accept this literals 1590 return true; 1591 } 1592 1593 if (type == MVT::i64) { // Expected 64-bit int operand 1594 // We don't allow fp literals in 64-bit integer instructions. It is 1595 // unclear how we should encode them. 1596 return false; 1597 } 1598 1599 // We allow fp literals with f16x2 operands assuming that the specified 1600 // literal goes into the lower half and the upper half is zero. We also 1601 // require that the literal may be losslesly converted to f16. 1602 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1603 (type == MVT::v2i16)? MVT::i16 : type; 1604 1605 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1606 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1607 } 1608 1609 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1610 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1611 } 1612 1613 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1614 if (AsmParser->isVI()) 1615 return isVReg32(); 1616 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1617 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1618 else 1619 return false; 1620 } 1621 1622 bool AMDGPUOperand::isSDWAFP16Operand() const { 1623 return isSDWAOperand(MVT::f16); 1624 } 1625 1626 bool AMDGPUOperand::isSDWAFP32Operand() const { 1627 return isSDWAOperand(MVT::f32); 1628 } 1629 1630 bool AMDGPUOperand::isSDWAInt16Operand() const { 1631 return isSDWAOperand(MVT::i16); 1632 } 1633 1634 bool AMDGPUOperand::isSDWAInt32Operand() const { 1635 return isSDWAOperand(MVT::i32); 1636 } 1637 1638 bool AMDGPUOperand::isBoolReg() const { 1639 return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1640 isSCSrcB64() : isSCSrcB32(); 1641 } 1642 1643 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1644 { 1645 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1646 assert(Size == 2 || Size == 4 || Size == 8); 1647 1648 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1649 1650 if (Imm.Mods.Abs) { 1651 Val &= ~FpSignMask; 1652 } 1653 if (Imm.Mods.Neg) { 1654 Val ^= FpSignMask; 1655 } 1656 1657 return Val; 1658 } 1659 1660 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1661 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1662 Inst.getNumOperands())) { 1663 addLiteralImmOperand(Inst, Imm.Val, 1664 ApplyModifiers & 1665 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1666 } else { 1667 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1668 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1669 } 1670 } 1671 1672 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1673 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1674 auto OpNum = Inst.getNumOperands(); 1675 // Check that this operand accepts literals 1676 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1677 1678 if (ApplyModifiers) { 1679 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1680 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1681 Val = applyInputFPModifiers(Val, Size); 1682 } 1683 1684 APInt Literal(64, Val); 1685 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1686 1687 if (Imm.IsFPImm) { // We got fp literal token 1688 switch (OpTy) { 1689 case AMDGPU::OPERAND_REG_IMM_INT64: 1690 case AMDGPU::OPERAND_REG_IMM_FP64: 1691 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1692 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1693 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1694 AsmParser->hasInv2PiInlineImm())) { 1695 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1696 return; 1697 } 1698 1699 // Non-inlineable 1700 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1701 // For fp operands we check if low 32 bits are zeros 1702 if (Literal.getLoBits(32) != 0) { 1703 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1704 "Can't encode literal as exact 64-bit floating-point operand. " 1705 "Low 32-bits will be set to zero"); 1706 } 1707 1708 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1709 return; 1710 } 1711 1712 // We don't allow fp literals in 64-bit integer instructions. It is 1713 // unclear how we should encode them. This case should be checked earlier 1714 // in predicate methods (isLiteralImm()) 1715 llvm_unreachable("fp literal in 64-bit integer instruction."); 1716 1717 case AMDGPU::OPERAND_REG_IMM_INT32: 1718 case AMDGPU::OPERAND_REG_IMM_FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1720 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1722 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1723 case AMDGPU::OPERAND_REG_IMM_INT16: 1724 case AMDGPU::OPERAND_REG_IMM_FP16: 1725 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1726 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1733 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1734 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1735 bool lost; 1736 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1737 // Convert literal to single precision 1738 FPLiteral.convert(*getOpFltSemantics(OpTy), 1739 APFloat::rmNearestTiesToEven, &lost); 1740 // We allow precision lost but not overflow or underflow. This should be 1741 // checked earlier in isLiteralImm() 1742 1743 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1744 Inst.addOperand(MCOperand::createImm(ImmVal)); 1745 return; 1746 } 1747 default: 1748 llvm_unreachable("invalid operand size"); 1749 } 1750 1751 return; 1752 } 1753 1754 // We got int literal token. 1755 // Only sign extend inline immediates. 1756 switch (OpTy) { 1757 case AMDGPU::OPERAND_REG_IMM_INT32: 1758 case AMDGPU::OPERAND_REG_IMM_FP32: 1759 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1760 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1763 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1764 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1765 if (isSafeTruncation(Val, 32) && 1766 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1767 AsmParser->hasInv2PiInlineImm())) { 1768 Inst.addOperand(MCOperand::createImm(Val)); 1769 return; 1770 } 1771 1772 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1773 return; 1774 1775 case AMDGPU::OPERAND_REG_IMM_INT64: 1776 case AMDGPU::OPERAND_REG_IMM_FP64: 1777 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1778 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1779 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1780 Inst.addOperand(MCOperand::createImm(Val)); 1781 return; 1782 } 1783 1784 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1785 return; 1786 1787 case AMDGPU::OPERAND_REG_IMM_INT16: 1788 case AMDGPU::OPERAND_REG_IMM_FP16: 1789 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1790 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1791 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1792 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1793 if (isSafeTruncation(Val, 16) && 1794 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1795 AsmParser->hasInv2PiInlineImm())) { 1796 Inst.addOperand(MCOperand::createImm(Val)); 1797 return; 1798 } 1799 1800 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1801 return; 1802 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1804 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1807 assert(isSafeTruncation(Val, 16)); 1808 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1809 AsmParser->hasInv2PiInlineImm())); 1810 1811 Inst.addOperand(MCOperand::createImm(Val)); 1812 return; 1813 } 1814 default: 1815 llvm_unreachable("invalid operand size"); 1816 } 1817 } 1818 1819 template <unsigned Bitwidth> 1820 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1821 APInt Literal(64, Imm.Val); 1822 1823 if (!Imm.IsFPImm) { 1824 // We got int literal token. 1825 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1826 return; 1827 } 1828 1829 bool Lost; 1830 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1831 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1832 APFloat::rmNearestTiesToEven, &Lost); 1833 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1834 } 1835 1836 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1837 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1838 } 1839 1840 static bool isInlineValue(unsigned Reg) { 1841 switch (Reg) { 1842 case AMDGPU::SRC_SHARED_BASE: 1843 case AMDGPU::SRC_SHARED_LIMIT: 1844 case AMDGPU::SRC_PRIVATE_BASE: 1845 case AMDGPU::SRC_PRIVATE_LIMIT: 1846 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1847 return true; 1848 case AMDGPU::SRC_VCCZ: 1849 case AMDGPU::SRC_EXECZ: 1850 case AMDGPU::SRC_SCC: 1851 return true; 1852 default: 1853 return false; 1854 } 1855 } 1856 1857 bool AMDGPUOperand::isInlineValue() const { 1858 return isRegKind() && ::isInlineValue(getReg()); 1859 } 1860 1861 //===----------------------------------------------------------------------===// 1862 // AsmParser 1863 //===----------------------------------------------------------------------===// 1864 1865 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1866 if (Is == IS_VGPR) { 1867 switch (RegWidth) { 1868 default: return -1; 1869 case 1: return AMDGPU::VGPR_32RegClassID; 1870 case 2: return AMDGPU::VReg_64RegClassID; 1871 case 3: return AMDGPU::VReg_96RegClassID; 1872 case 4: return AMDGPU::VReg_128RegClassID; 1873 case 8: return AMDGPU::VReg_256RegClassID; 1874 case 16: return AMDGPU::VReg_512RegClassID; 1875 } 1876 } else if (Is == IS_TTMP) { 1877 switch (RegWidth) { 1878 default: return -1; 1879 case 1: return AMDGPU::TTMP_32RegClassID; 1880 case 2: return AMDGPU::TTMP_64RegClassID; 1881 case 4: return AMDGPU::TTMP_128RegClassID; 1882 case 8: return AMDGPU::TTMP_256RegClassID; 1883 case 16: return AMDGPU::TTMP_512RegClassID; 1884 } 1885 } else if (Is == IS_SGPR) { 1886 switch (RegWidth) { 1887 default: return -1; 1888 case 1: return AMDGPU::SGPR_32RegClassID; 1889 case 2: return AMDGPU::SGPR_64RegClassID; 1890 case 4: return AMDGPU::SGPR_128RegClassID; 1891 case 8: return AMDGPU::SGPR_256RegClassID; 1892 case 16: return AMDGPU::SGPR_512RegClassID; 1893 } 1894 } else if (Is == IS_AGPR) { 1895 switch (RegWidth) { 1896 default: return -1; 1897 case 1: return AMDGPU::AGPR_32RegClassID; 1898 case 2: return AMDGPU::AReg_64RegClassID; 1899 case 4: return AMDGPU::AReg_128RegClassID; 1900 case 16: return AMDGPU::AReg_512RegClassID; 1901 case 32: return AMDGPU::AReg_1024RegClassID; 1902 } 1903 } 1904 return -1; 1905 } 1906 1907 static unsigned getSpecialRegForName(StringRef RegName) { 1908 return StringSwitch<unsigned>(RegName) 1909 .Case("exec", AMDGPU::EXEC) 1910 .Case("vcc", AMDGPU::VCC) 1911 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1912 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1913 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1914 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1915 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1916 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1917 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1918 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1919 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1920 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1921 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1922 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1923 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1924 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1925 .Case("m0", AMDGPU::M0) 1926 .Case("vccz", AMDGPU::SRC_VCCZ) 1927 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1928 .Case("execz", AMDGPU::SRC_EXECZ) 1929 .Case("src_execz", AMDGPU::SRC_EXECZ) 1930 .Case("scc", AMDGPU::SRC_SCC) 1931 .Case("src_scc", AMDGPU::SRC_SCC) 1932 .Case("tba", AMDGPU::TBA) 1933 .Case("tma", AMDGPU::TMA) 1934 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1935 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1936 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1937 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1938 .Case("vcc_lo", AMDGPU::VCC_LO) 1939 .Case("vcc_hi", AMDGPU::VCC_HI) 1940 .Case("exec_lo", AMDGPU::EXEC_LO) 1941 .Case("exec_hi", AMDGPU::EXEC_HI) 1942 .Case("tma_lo", AMDGPU::TMA_LO) 1943 .Case("tma_hi", AMDGPU::TMA_HI) 1944 .Case("tba_lo", AMDGPU::TBA_LO) 1945 .Case("tba_hi", AMDGPU::TBA_HI) 1946 .Case("null", AMDGPU::SGPR_NULL) 1947 .Default(0); 1948 } 1949 1950 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1951 SMLoc &EndLoc) { 1952 auto R = parseRegister(); 1953 if (!R) return true; 1954 assert(R->isReg()); 1955 RegNo = R->getReg(); 1956 StartLoc = R->getStartLoc(); 1957 EndLoc = R->getEndLoc(); 1958 return false; 1959 } 1960 1961 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1962 RegisterKind RegKind, unsigned Reg1, 1963 unsigned RegNum) { 1964 switch (RegKind) { 1965 case IS_SPECIAL: 1966 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1967 Reg = AMDGPU::EXEC; 1968 RegWidth = 2; 1969 return true; 1970 } 1971 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1972 Reg = AMDGPU::FLAT_SCR; 1973 RegWidth = 2; 1974 return true; 1975 } 1976 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1977 Reg = AMDGPU::XNACK_MASK; 1978 RegWidth = 2; 1979 return true; 1980 } 1981 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1982 Reg = AMDGPU::VCC; 1983 RegWidth = 2; 1984 return true; 1985 } 1986 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1987 Reg = AMDGPU::TBA; 1988 RegWidth = 2; 1989 return true; 1990 } 1991 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1992 Reg = AMDGPU::TMA; 1993 RegWidth = 2; 1994 return true; 1995 } 1996 return false; 1997 case IS_VGPR: 1998 case IS_SGPR: 1999 case IS_AGPR: 2000 case IS_TTMP: 2001 if (Reg1 != Reg + RegWidth) { 2002 return false; 2003 } 2004 RegWidth++; 2005 return true; 2006 default: 2007 llvm_unreachable("unexpected register kind"); 2008 } 2009 } 2010 2011 static const StringRef Registers[] = { 2012 { "v" }, 2013 { "s" }, 2014 { "ttmp" }, 2015 { "acc" }, 2016 { "a" }, 2017 }; 2018 2019 bool 2020 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2021 const AsmToken &NextToken) const { 2022 2023 // A list of consecutive registers: [s0,s1,s2,s3] 2024 if (Token.is(AsmToken::LBrac)) 2025 return true; 2026 2027 if (!Token.is(AsmToken::Identifier)) 2028 return false; 2029 2030 // A single register like s0 or a range of registers like s[0:1] 2031 2032 StringRef RegName = Token.getString(); 2033 2034 for (StringRef Reg : Registers) { 2035 if (RegName.startswith(Reg)) { 2036 if (Reg.size() < RegName.size()) { 2037 unsigned RegNum; 2038 // A single register with an index: rXX 2039 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2040 return true; 2041 } else { 2042 // A range of registers: r[XX:YY]. 2043 if (NextToken.is(AsmToken::LBrac)) 2044 return true; 2045 } 2046 } 2047 } 2048 2049 return getSpecialRegForName(RegName); 2050 } 2051 2052 bool 2053 AMDGPUAsmParser::isRegister() 2054 { 2055 return isRegister(getToken(), peekToken()); 2056 } 2057 2058 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2059 unsigned &RegNum, unsigned &RegWidth, 2060 unsigned *DwordRegIndex) { 2061 if (DwordRegIndex) { *DwordRegIndex = 0; } 2062 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2063 if (getLexer().is(AsmToken::Identifier)) { 2064 StringRef RegName = Parser.getTok().getString(); 2065 if ((Reg = getSpecialRegForName(RegName))) { 2066 Parser.Lex(); 2067 RegKind = IS_SPECIAL; 2068 } else { 2069 unsigned RegNumIndex = 0; 2070 if (RegName[0] == 'v') { 2071 RegNumIndex = 1; 2072 RegKind = IS_VGPR; 2073 } else if (RegName[0] == 's') { 2074 RegNumIndex = 1; 2075 RegKind = IS_SGPR; 2076 } else if (RegName[0] == 'a') { 2077 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2078 RegKind = IS_AGPR; 2079 } else if (RegName.startswith("ttmp")) { 2080 RegNumIndex = strlen("ttmp"); 2081 RegKind = IS_TTMP; 2082 } else { 2083 return false; 2084 } 2085 if (RegName.size() > RegNumIndex) { 2086 // Single 32-bit register: vXX. 2087 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2088 return false; 2089 Parser.Lex(); 2090 RegWidth = 1; 2091 } else { 2092 // Range of registers: v[XX:YY]. ":YY" is optional. 2093 Parser.Lex(); 2094 int64_t RegLo, RegHi; 2095 if (getLexer().isNot(AsmToken::LBrac)) 2096 return false; 2097 Parser.Lex(); 2098 2099 if (getParser().parseAbsoluteExpression(RegLo)) 2100 return false; 2101 2102 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2103 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2104 return false; 2105 Parser.Lex(); 2106 2107 if (isRBrace) { 2108 RegHi = RegLo; 2109 } else { 2110 if (getParser().parseAbsoluteExpression(RegHi)) 2111 return false; 2112 2113 if (getLexer().isNot(AsmToken::RBrac)) 2114 return false; 2115 Parser.Lex(); 2116 } 2117 RegNum = (unsigned) RegLo; 2118 RegWidth = (RegHi - RegLo) + 1; 2119 } 2120 } 2121 } else if (getLexer().is(AsmToken::LBrac)) { 2122 // List of consecutive registers: [s0,s1,s2,s3] 2123 Parser.Lex(); 2124 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2125 return false; 2126 if (RegWidth != 1) 2127 return false; 2128 RegisterKind RegKind1; 2129 unsigned Reg1, RegNum1, RegWidth1; 2130 do { 2131 if (getLexer().is(AsmToken::Comma)) { 2132 Parser.Lex(); 2133 } else if (getLexer().is(AsmToken::RBrac)) { 2134 Parser.Lex(); 2135 break; 2136 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2137 if (RegWidth1 != 1) { 2138 return false; 2139 } 2140 if (RegKind1 != RegKind) { 2141 return false; 2142 } 2143 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2144 return false; 2145 } 2146 } else { 2147 return false; 2148 } 2149 } while (true); 2150 } else { 2151 return false; 2152 } 2153 switch (RegKind) { 2154 case IS_SPECIAL: 2155 RegNum = 0; 2156 RegWidth = 1; 2157 break; 2158 case IS_VGPR: 2159 case IS_SGPR: 2160 case IS_AGPR: 2161 case IS_TTMP: 2162 { 2163 unsigned Size = 1; 2164 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2165 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2166 Size = std::min(RegWidth, 4u); 2167 } 2168 if (RegNum % Size != 0) 2169 return false; 2170 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2171 RegNum = RegNum / Size; 2172 int RCID = getRegClass(RegKind, RegWidth); 2173 if (RCID == -1) 2174 return false; 2175 const MCRegisterClass RC = TRI->getRegClass(RCID); 2176 if (RegNum >= RC.getNumRegs()) 2177 return false; 2178 Reg = RC.getRegister(RegNum); 2179 break; 2180 } 2181 2182 default: 2183 llvm_unreachable("unexpected register kind"); 2184 } 2185 2186 if (!subtargetHasRegister(*TRI, Reg)) 2187 return false; 2188 return true; 2189 } 2190 2191 Optional<StringRef> 2192 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2193 switch (RegKind) { 2194 case IS_VGPR: 2195 return StringRef(".amdgcn.next_free_vgpr"); 2196 case IS_SGPR: 2197 return StringRef(".amdgcn.next_free_sgpr"); 2198 default: 2199 return None; 2200 } 2201 } 2202 2203 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2204 auto SymbolName = getGprCountSymbolName(RegKind); 2205 assert(SymbolName && "initializing invalid register kind"); 2206 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2207 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2208 } 2209 2210 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2211 unsigned DwordRegIndex, 2212 unsigned RegWidth) { 2213 // Symbols are only defined for GCN targets 2214 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2215 return true; 2216 2217 auto SymbolName = getGprCountSymbolName(RegKind); 2218 if (!SymbolName) 2219 return true; 2220 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2221 2222 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2223 int64_t OldCount; 2224 2225 if (!Sym->isVariable()) 2226 return !Error(getParser().getTok().getLoc(), 2227 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2228 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2229 return !Error( 2230 getParser().getTok().getLoc(), 2231 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2232 2233 if (OldCount <= NewMax) 2234 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2235 2236 return true; 2237 } 2238 2239 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2240 const auto &Tok = Parser.getTok(); 2241 SMLoc StartLoc = Tok.getLoc(); 2242 SMLoc EndLoc = Tok.getEndLoc(); 2243 RegisterKind RegKind; 2244 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2245 2246 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2247 //FIXME: improve error messages (bug 41303). 2248 Error(StartLoc, "not a valid operand."); 2249 return nullptr; 2250 } 2251 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2252 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2253 return nullptr; 2254 } else 2255 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2256 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2257 } 2258 2259 OperandMatchResultTy 2260 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2261 // TODO: add syntactic sugar for 1/(2*PI) 2262 2263 assert(!isRegister()); 2264 assert(!isModifier()); 2265 2266 const auto& Tok = getToken(); 2267 const auto& NextTok = peekToken(); 2268 bool IsReal = Tok.is(AsmToken::Real); 2269 SMLoc S = getLoc(); 2270 bool Negate = false; 2271 2272 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2273 lex(); 2274 IsReal = true; 2275 Negate = true; 2276 } 2277 2278 if (IsReal) { 2279 // Floating-point expressions are not supported. 2280 // Can only allow floating-point literals with an 2281 // optional sign. 2282 2283 StringRef Num = getTokenStr(); 2284 lex(); 2285 2286 APFloat RealVal(APFloat::IEEEdouble()); 2287 auto roundMode = APFloat::rmNearestTiesToEven; 2288 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2289 return MatchOperand_ParseFail; 2290 } 2291 if (Negate) 2292 RealVal.changeSign(); 2293 2294 Operands.push_back( 2295 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2296 AMDGPUOperand::ImmTyNone, true)); 2297 2298 return MatchOperand_Success; 2299 2300 } else { 2301 int64_t IntVal; 2302 const MCExpr *Expr; 2303 SMLoc S = getLoc(); 2304 2305 if (HasSP3AbsModifier) { 2306 // This is a workaround for handling expressions 2307 // as arguments of SP3 'abs' modifier, for example: 2308 // |1.0| 2309 // |-1| 2310 // |1+x| 2311 // This syntax is not compatible with syntax of standard 2312 // MC expressions (due to the trailing '|'). 2313 SMLoc EndLoc; 2314 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2315 return MatchOperand_ParseFail; 2316 } else { 2317 if (Parser.parseExpression(Expr)) 2318 return MatchOperand_ParseFail; 2319 } 2320 2321 if (Expr->evaluateAsAbsolute(IntVal)) { 2322 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2323 } else { 2324 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2325 } 2326 2327 return MatchOperand_Success; 2328 } 2329 2330 return MatchOperand_NoMatch; 2331 } 2332 2333 OperandMatchResultTy 2334 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2335 if (!isRegister()) 2336 return MatchOperand_NoMatch; 2337 2338 if (auto R = parseRegister()) { 2339 assert(R->isReg()); 2340 Operands.push_back(std::move(R)); 2341 return MatchOperand_Success; 2342 } 2343 return MatchOperand_ParseFail; 2344 } 2345 2346 OperandMatchResultTy 2347 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2348 auto res = parseReg(Operands); 2349 if (res != MatchOperand_NoMatch) { 2350 return res; 2351 } else if (isModifier()) { 2352 return MatchOperand_NoMatch; 2353 } else { 2354 return parseImm(Operands, HasSP3AbsMod); 2355 } 2356 } 2357 2358 bool 2359 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2360 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2361 const auto &str = Token.getString(); 2362 return str == "abs" || str == "neg" || str == "sext"; 2363 } 2364 return false; 2365 } 2366 2367 bool 2368 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2369 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2370 } 2371 2372 bool 2373 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2374 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2375 } 2376 2377 bool 2378 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2379 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2380 } 2381 2382 // Check if this is an operand modifier or an opcode modifier 2383 // which may look like an expression but it is not. We should 2384 // avoid parsing these modifiers as expressions. Currently 2385 // recognized sequences are: 2386 // |...| 2387 // abs(...) 2388 // neg(...) 2389 // sext(...) 2390 // -reg 2391 // -|...| 2392 // -abs(...) 2393 // name:... 2394 // Note that simple opcode modifiers like 'gds' may be parsed as 2395 // expressions; this is a special case. See getExpressionAsToken. 2396 // 2397 bool 2398 AMDGPUAsmParser::isModifier() { 2399 2400 AsmToken Tok = getToken(); 2401 AsmToken NextToken[2]; 2402 peekTokens(NextToken); 2403 2404 return isOperandModifier(Tok, NextToken[0]) || 2405 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2406 isOpcodeModifierWithVal(Tok, NextToken[0]); 2407 } 2408 2409 // Check if the current token is an SP3 'neg' modifier. 2410 // Currently this modifier is allowed in the following context: 2411 // 2412 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2413 // 2. Before an 'abs' modifier: -abs(...) 2414 // 3. Before an SP3 'abs' modifier: -|...| 2415 // 2416 // In all other cases "-" is handled as a part 2417 // of an expression that follows the sign. 2418 // 2419 // Note: When "-" is followed by an integer literal, 2420 // this is interpreted as integer negation rather 2421 // than a floating-point NEG modifier applied to N. 2422 // Beside being contr-intuitive, such use of floating-point 2423 // NEG modifier would have resulted in different meaning 2424 // of integer literals used with VOP1/2/C and VOP3, 2425 // for example: 2426 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2427 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2428 // Negative fp literals with preceding "-" are 2429 // handled likewise for unifomtity 2430 // 2431 bool 2432 AMDGPUAsmParser::parseSP3NegModifier() { 2433 2434 AsmToken NextToken[2]; 2435 peekTokens(NextToken); 2436 2437 if (isToken(AsmToken::Minus) && 2438 (isRegister(NextToken[0], NextToken[1]) || 2439 NextToken[0].is(AsmToken::Pipe) || 2440 isId(NextToken[0], "abs"))) { 2441 lex(); 2442 return true; 2443 } 2444 2445 return false; 2446 } 2447 2448 OperandMatchResultTy 2449 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2450 bool AllowImm) { 2451 bool Neg, SP3Neg; 2452 bool Abs, SP3Abs; 2453 SMLoc Loc; 2454 2455 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2456 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2457 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2458 return MatchOperand_ParseFail; 2459 } 2460 2461 SP3Neg = parseSP3NegModifier(); 2462 2463 Loc = getLoc(); 2464 Neg = trySkipId("neg"); 2465 if (Neg && SP3Neg) { 2466 Error(Loc, "expected register or immediate"); 2467 return MatchOperand_ParseFail; 2468 } 2469 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2470 return MatchOperand_ParseFail; 2471 2472 Abs = trySkipId("abs"); 2473 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2474 return MatchOperand_ParseFail; 2475 2476 Loc = getLoc(); 2477 SP3Abs = trySkipToken(AsmToken::Pipe); 2478 if (Abs && SP3Abs) { 2479 Error(Loc, "expected register or immediate"); 2480 return MatchOperand_ParseFail; 2481 } 2482 2483 OperandMatchResultTy Res; 2484 if (AllowImm) { 2485 Res = parseRegOrImm(Operands, SP3Abs); 2486 } else { 2487 Res = parseReg(Operands); 2488 } 2489 if (Res != MatchOperand_Success) { 2490 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2491 } 2492 2493 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2494 return MatchOperand_ParseFail; 2495 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2496 return MatchOperand_ParseFail; 2497 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2498 return MatchOperand_ParseFail; 2499 2500 AMDGPUOperand::Modifiers Mods; 2501 Mods.Abs = Abs || SP3Abs; 2502 Mods.Neg = Neg || SP3Neg; 2503 2504 if (Mods.hasFPModifiers()) { 2505 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2506 if (Op.isExpr()) { 2507 Error(Op.getStartLoc(), "expected an absolute expression"); 2508 return MatchOperand_ParseFail; 2509 } 2510 Op.setModifiers(Mods); 2511 } 2512 return MatchOperand_Success; 2513 } 2514 2515 OperandMatchResultTy 2516 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2517 bool AllowImm) { 2518 bool Sext = trySkipId("sext"); 2519 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2520 return MatchOperand_ParseFail; 2521 2522 OperandMatchResultTy Res; 2523 if (AllowImm) { 2524 Res = parseRegOrImm(Operands); 2525 } else { 2526 Res = parseReg(Operands); 2527 } 2528 if (Res != MatchOperand_Success) { 2529 return Sext? MatchOperand_ParseFail : Res; 2530 } 2531 2532 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2533 return MatchOperand_ParseFail; 2534 2535 AMDGPUOperand::Modifiers Mods; 2536 Mods.Sext = Sext; 2537 2538 if (Mods.hasIntModifiers()) { 2539 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2540 if (Op.isExpr()) { 2541 Error(Op.getStartLoc(), "expected an absolute expression"); 2542 return MatchOperand_ParseFail; 2543 } 2544 Op.setModifiers(Mods); 2545 } 2546 2547 return MatchOperand_Success; 2548 } 2549 2550 OperandMatchResultTy 2551 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2552 return parseRegOrImmWithFPInputMods(Operands, false); 2553 } 2554 2555 OperandMatchResultTy 2556 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2557 return parseRegOrImmWithIntInputMods(Operands, false); 2558 } 2559 2560 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2561 auto Loc = getLoc(); 2562 if (trySkipId("off")) { 2563 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2564 AMDGPUOperand::ImmTyOff, false)); 2565 return MatchOperand_Success; 2566 } 2567 2568 if (!isRegister()) 2569 return MatchOperand_NoMatch; 2570 2571 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2572 if (Reg) { 2573 Operands.push_back(std::move(Reg)); 2574 return MatchOperand_Success; 2575 } 2576 2577 return MatchOperand_ParseFail; 2578 2579 } 2580 2581 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2582 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2583 2584 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2585 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2586 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2587 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2588 return Match_InvalidOperand; 2589 2590 if ((TSFlags & SIInstrFlags::VOP3) && 2591 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2592 getForcedEncodingSize() != 64) 2593 return Match_PreferE32; 2594 2595 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2596 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2597 // v_mac_f32/16 allow only dst_sel == DWORD; 2598 auto OpNum = 2599 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2600 const auto &Op = Inst.getOperand(OpNum); 2601 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2602 return Match_InvalidOperand; 2603 } 2604 } 2605 2606 return Match_Success; 2607 } 2608 2609 // What asm variants we should check 2610 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2611 if (getForcedEncodingSize() == 32) { 2612 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2613 return makeArrayRef(Variants); 2614 } 2615 2616 if (isForcedVOP3()) { 2617 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2618 return makeArrayRef(Variants); 2619 } 2620 2621 if (isForcedSDWA()) { 2622 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2623 AMDGPUAsmVariants::SDWA9}; 2624 return makeArrayRef(Variants); 2625 } 2626 2627 if (isForcedDPP()) { 2628 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2629 return makeArrayRef(Variants); 2630 } 2631 2632 static const unsigned Variants[] = { 2633 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2634 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2635 }; 2636 2637 return makeArrayRef(Variants); 2638 } 2639 2640 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2641 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2642 const unsigned Num = Desc.getNumImplicitUses(); 2643 for (unsigned i = 0; i < Num; ++i) { 2644 unsigned Reg = Desc.ImplicitUses[i]; 2645 switch (Reg) { 2646 case AMDGPU::FLAT_SCR: 2647 case AMDGPU::VCC: 2648 case AMDGPU::VCC_LO: 2649 case AMDGPU::VCC_HI: 2650 case AMDGPU::M0: 2651 case AMDGPU::SGPR_NULL: 2652 return Reg; 2653 default: 2654 break; 2655 } 2656 } 2657 return AMDGPU::NoRegister; 2658 } 2659 2660 // NB: This code is correct only when used to check constant 2661 // bus limitations because GFX7 support no f16 inline constants. 2662 // Note that there are no cases when a GFX7 opcode violates 2663 // constant bus limitations due to the use of an f16 constant. 2664 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2665 unsigned OpIdx) const { 2666 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2667 2668 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2669 return false; 2670 } 2671 2672 const MCOperand &MO = Inst.getOperand(OpIdx); 2673 2674 int64_t Val = MO.getImm(); 2675 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2676 2677 switch (OpSize) { // expected operand size 2678 case 8: 2679 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2680 case 4: 2681 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2682 case 2: { 2683 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2684 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2685 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2686 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2687 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2688 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2689 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2690 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2691 } else { 2692 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2693 } 2694 } 2695 default: 2696 llvm_unreachable("invalid operand size"); 2697 } 2698 } 2699 2700 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2701 const MCOperand &MO = Inst.getOperand(OpIdx); 2702 if (MO.isImm()) { 2703 return !isInlineConstant(Inst, OpIdx); 2704 } 2705 return !MO.isReg() || 2706 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2707 } 2708 2709 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2710 const unsigned Opcode = Inst.getOpcode(); 2711 const MCInstrDesc &Desc = MII.get(Opcode); 2712 unsigned ConstantBusUseCount = 0; 2713 unsigned NumLiterals = 0; 2714 unsigned LiteralSize; 2715 2716 if (Desc.TSFlags & 2717 (SIInstrFlags::VOPC | 2718 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2719 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2720 SIInstrFlags::SDWA)) { 2721 // Check special imm operands (used by madmk, etc) 2722 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2723 ++ConstantBusUseCount; 2724 } 2725 2726 SmallDenseSet<unsigned> SGPRsUsed; 2727 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2728 if (SGPRUsed != AMDGPU::NoRegister) { 2729 SGPRsUsed.insert(SGPRUsed); 2730 ++ConstantBusUseCount; 2731 } 2732 2733 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2734 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2735 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2736 2737 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2738 2739 for (int OpIdx : OpIndices) { 2740 if (OpIdx == -1) break; 2741 2742 const MCOperand &MO = Inst.getOperand(OpIdx); 2743 if (usesConstantBus(Inst, OpIdx)) { 2744 if (MO.isReg()) { 2745 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2746 // Pairs of registers with a partial intersections like these 2747 // s0, s[0:1] 2748 // flat_scratch_lo, flat_scratch 2749 // flat_scratch_lo, flat_scratch_hi 2750 // are theoretically valid but they are disabled anyway. 2751 // Note that this code mimics SIInstrInfo::verifyInstruction 2752 if (!SGPRsUsed.count(Reg)) { 2753 SGPRsUsed.insert(Reg); 2754 ++ConstantBusUseCount; 2755 } 2756 SGPRUsed = Reg; 2757 } else { // Expression or a literal 2758 2759 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2760 continue; // special operand like VINTERP attr_chan 2761 2762 // An instruction may use only one literal. 2763 // This has been validated on the previous step. 2764 // See validateVOP3Literal. 2765 // This literal may be used as more than one operand. 2766 // If all these operands are of the same size, 2767 // this literal counts as one scalar value. 2768 // Otherwise it counts as 2 scalar values. 2769 // See "GFX10 Shader Programming", section 3.6.2.3. 2770 2771 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2772 if (Size < 4) Size = 4; 2773 2774 if (NumLiterals == 0) { 2775 NumLiterals = 1; 2776 LiteralSize = Size; 2777 } else if (LiteralSize != Size) { 2778 NumLiterals = 2; 2779 } 2780 } 2781 } 2782 } 2783 } 2784 ConstantBusUseCount += NumLiterals; 2785 2786 if (isGFX10()) 2787 return ConstantBusUseCount <= 2; 2788 2789 return ConstantBusUseCount <= 1; 2790 } 2791 2792 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2793 const unsigned Opcode = Inst.getOpcode(); 2794 const MCInstrDesc &Desc = MII.get(Opcode); 2795 2796 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2797 if (DstIdx == -1 || 2798 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2799 return true; 2800 } 2801 2802 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2803 2804 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2805 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2806 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2807 2808 assert(DstIdx != -1); 2809 const MCOperand &Dst = Inst.getOperand(DstIdx); 2810 assert(Dst.isReg()); 2811 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2812 2813 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2814 2815 for (int SrcIdx : SrcIndices) { 2816 if (SrcIdx == -1) break; 2817 const MCOperand &Src = Inst.getOperand(SrcIdx); 2818 if (Src.isReg()) { 2819 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2820 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2821 return false; 2822 } 2823 } 2824 } 2825 2826 return true; 2827 } 2828 2829 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2830 2831 const unsigned Opc = Inst.getOpcode(); 2832 const MCInstrDesc &Desc = MII.get(Opc); 2833 2834 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2835 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2836 assert(ClampIdx != -1); 2837 return Inst.getOperand(ClampIdx).getImm() == 0; 2838 } 2839 2840 return true; 2841 } 2842 2843 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2844 2845 const unsigned Opc = Inst.getOpcode(); 2846 const MCInstrDesc &Desc = MII.get(Opc); 2847 2848 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2849 return true; 2850 2851 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2852 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2853 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2854 2855 assert(VDataIdx != -1); 2856 assert(DMaskIdx != -1); 2857 assert(TFEIdx != -1); 2858 2859 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2860 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2861 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2862 if (DMask == 0) 2863 DMask = 1; 2864 2865 unsigned DataSize = 2866 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2867 if (hasPackedD16()) { 2868 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2869 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2870 DataSize = (DataSize + 1) / 2; 2871 } 2872 2873 return (VDataSize / 4) == DataSize + TFESize; 2874 } 2875 2876 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2877 const unsigned Opc = Inst.getOpcode(); 2878 const MCInstrDesc &Desc = MII.get(Opc); 2879 2880 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2881 return true; 2882 2883 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2884 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2885 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2886 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2887 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2888 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2889 2890 assert(VAddr0Idx != -1); 2891 assert(SrsrcIdx != -1); 2892 assert(DimIdx != -1); 2893 assert(SrsrcIdx > VAddr0Idx); 2894 2895 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2896 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2897 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2898 unsigned VAddrSize = 2899 IsNSA ? SrsrcIdx - VAddr0Idx 2900 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2901 2902 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2903 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2904 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2905 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2906 if (!IsNSA) { 2907 if (AddrSize > 8) 2908 AddrSize = 16; 2909 else if (AddrSize > 4) 2910 AddrSize = 8; 2911 } 2912 2913 return VAddrSize == AddrSize; 2914 } 2915 2916 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2917 2918 const unsigned Opc = Inst.getOpcode(); 2919 const MCInstrDesc &Desc = MII.get(Opc); 2920 2921 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2922 return true; 2923 if (!Desc.mayLoad() || !Desc.mayStore()) 2924 return true; // Not atomic 2925 2926 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2927 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2928 2929 // This is an incomplete check because image_atomic_cmpswap 2930 // may only use 0x3 and 0xf while other atomic operations 2931 // may use 0x1 and 0x3. However these limitations are 2932 // verified when we check that dmask matches dst size. 2933 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2934 } 2935 2936 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2937 2938 const unsigned Opc = Inst.getOpcode(); 2939 const MCInstrDesc &Desc = MII.get(Opc); 2940 2941 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2942 return true; 2943 2944 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2945 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2946 2947 // GATHER4 instructions use dmask in a different fashion compared to 2948 // other MIMG instructions. The only useful DMASK values are 2949 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2950 // (red,red,red,red) etc.) The ISA document doesn't mention 2951 // this. 2952 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2953 } 2954 2955 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2956 2957 const unsigned Opc = Inst.getOpcode(); 2958 const MCInstrDesc &Desc = MII.get(Opc); 2959 2960 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2961 return true; 2962 2963 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2964 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2965 if (isCI() || isSI()) 2966 return false; 2967 } 2968 2969 return true; 2970 } 2971 2972 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 2973 const unsigned Opc = Inst.getOpcode(); 2974 const MCInstrDesc &Desc = MII.get(Opc); 2975 2976 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2977 return true; 2978 2979 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2980 if (DimIdx < 0) 2981 return true; 2982 2983 long Imm = Inst.getOperand(DimIdx).getImm(); 2984 if (Imm < 0 || Imm >= 8) 2985 return false; 2986 2987 return true; 2988 } 2989 2990 static bool IsRevOpcode(const unsigned Opcode) 2991 { 2992 switch (Opcode) { 2993 case AMDGPU::V_SUBREV_F32_e32: 2994 case AMDGPU::V_SUBREV_F32_e64: 2995 case AMDGPU::V_SUBREV_F32_e32_gfx10: 2996 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 2997 case AMDGPU::V_SUBREV_F32_e32_vi: 2998 case AMDGPU::V_SUBREV_F32_e64_gfx10: 2999 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3000 case AMDGPU::V_SUBREV_F32_e64_vi: 3001 3002 case AMDGPU::V_SUBREV_I32_e32: 3003 case AMDGPU::V_SUBREV_I32_e64: 3004 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3005 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3006 3007 case AMDGPU::V_SUBBREV_U32_e32: 3008 case AMDGPU::V_SUBBREV_U32_e64: 3009 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3010 case AMDGPU::V_SUBBREV_U32_e32_vi: 3011 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3012 case AMDGPU::V_SUBBREV_U32_e64_vi: 3013 3014 case AMDGPU::V_SUBREV_U32_e32: 3015 case AMDGPU::V_SUBREV_U32_e64: 3016 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3017 case AMDGPU::V_SUBREV_U32_e32_vi: 3018 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3019 case AMDGPU::V_SUBREV_U32_e64_vi: 3020 3021 case AMDGPU::V_SUBREV_F16_e32: 3022 case AMDGPU::V_SUBREV_F16_e64: 3023 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3024 case AMDGPU::V_SUBREV_F16_e32_vi: 3025 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3026 case AMDGPU::V_SUBREV_F16_e64_vi: 3027 3028 case AMDGPU::V_SUBREV_U16_e32: 3029 case AMDGPU::V_SUBREV_U16_e64: 3030 case AMDGPU::V_SUBREV_U16_e32_vi: 3031 case AMDGPU::V_SUBREV_U16_e64_vi: 3032 3033 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3034 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3035 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3036 3037 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3038 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3039 3040 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3041 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3042 3043 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3044 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3045 3046 case AMDGPU::V_LSHRREV_B32_e32: 3047 case AMDGPU::V_LSHRREV_B32_e64: 3048 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3049 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3050 case AMDGPU::V_LSHRREV_B32_e32_vi: 3051 case AMDGPU::V_LSHRREV_B32_e64_vi: 3052 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3053 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3054 3055 case AMDGPU::V_ASHRREV_I32_e32: 3056 case AMDGPU::V_ASHRREV_I32_e64: 3057 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3058 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3059 case AMDGPU::V_ASHRREV_I32_e32_vi: 3060 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3061 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3062 case AMDGPU::V_ASHRREV_I32_e64_vi: 3063 3064 case AMDGPU::V_LSHLREV_B32_e32: 3065 case AMDGPU::V_LSHLREV_B32_e64: 3066 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3067 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3068 case AMDGPU::V_LSHLREV_B32_e32_vi: 3069 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3070 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3071 case AMDGPU::V_LSHLREV_B32_e64_vi: 3072 3073 case AMDGPU::V_LSHLREV_B16_e32: 3074 case AMDGPU::V_LSHLREV_B16_e64: 3075 case AMDGPU::V_LSHLREV_B16_e32_vi: 3076 case AMDGPU::V_LSHLREV_B16_e64_vi: 3077 case AMDGPU::V_LSHLREV_B16_gfx10: 3078 3079 case AMDGPU::V_LSHRREV_B16_e32: 3080 case AMDGPU::V_LSHRREV_B16_e64: 3081 case AMDGPU::V_LSHRREV_B16_e32_vi: 3082 case AMDGPU::V_LSHRREV_B16_e64_vi: 3083 case AMDGPU::V_LSHRREV_B16_gfx10: 3084 3085 case AMDGPU::V_ASHRREV_I16_e32: 3086 case AMDGPU::V_ASHRREV_I16_e64: 3087 case AMDGPU::V_ASHRREV_I16_e32_vi: 3088 case AMDGPU::V_ASHRREV_I16_e64_vi: 3089 case AMDGPU::V_ASHRREV_I16_gfx10: 3090 3091 case AMDGPU::V_LSHLREV_B64: 3092 case AMDGPU::V_LSHLREV_B64_gfx10: 3093 case AMDGPU::V_LSHLREV_B64_vi: 3094 3095 case AMDGPU::V_LSHRREV_B64: 3096 case AMDGPU::V_LSHRREV_B64_gfx10: 3097 case AMDGPU::V_LSHRREV_B64_vi: 3098 3099 case AMDGPU::V_ASHRREV_I64: 3100 case AMDGPU::V_ASHRREV_I64_gfx10: 3101 case AMDGPU::V_ASHRREV_I64_vi: 3102 3103 case AMDGPU::V_PK_LSHLREV_B16: 3104 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3105 case AMDGPU::V_PK_LSHLREV_B16_vi: 3106 3107 case AMDGPU::V_PK_LSHRREV_B16: 3108 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3109 case AMDGPU::V_PK_LSHRREV_B16_vi: 3110 case AMDGPU::V_PK_ASHRREV_I16: 3111 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3112 case AMDGPU::V_PK_ASHRREV_I16_vi: 3113 return true; 3114 default: 3115 return false; 3116 } 3117 } 3118 3119 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3120 3121 using namespace SIInstrFlags; 3122 const unsigned Opcode = Inst.getOpcode(); 3123 const MCInstrDesc &Desc = MII.get(Opcode); 3124 3125 // lds_direct register is defined so that it can be used 3126 // with 9-bit operands only. Ignore encodings which do not accept these. 3127 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3128 return true; 3129 3130 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3131 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3132 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3133 3134 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3135 3136 // lds_direct cannot be specified as either src1 or src2. 3137 for (int SrcIdx : SrcIndices) { 3138 if (SrcIdx == -1) break; 3139 const MCOperand &Src = Inst.getOperand(SrcIdx); 3140 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3141 return false; 3142 } 3143 } 3144 3145 if (Src0Idx == -1) 3146 return true; 3147 3148 const MCOperand &Src = Inst.getOperand(Src0Idx); 3149 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3150 return true; 3151 3152 // lds_direct is specified as src0. Check additional limitations. 3153 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3154 } 3155 3156 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3157 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3158 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3159 if (Op.isFlatOffset()) 3160 return Op.getStartLoc(); 3161 } 3162 return getLoc(); 3163 } 3164 3165 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3166 const OperandVector &Operands) { 3167 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3168 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3169 return true; 3170 3171 auto Opcode = Inst.getOpcode(); 3172 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3173 assert(OpNum != -1); 3174 3175 const auto &Op = Inst.getOperand(OpNum); 3176 if (!hasFlatOffsets() && Op.getImm() != 0) { 3177 Error(getFlatOffsetLoc(Operands), 3178 "flat offset modifier is not supported on this GPU"); 3179 return false; 3180 } 3181 3182 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3183 // For FLAT segment the offset must be positive; 3184 // MSB is ignored and forced to zero. 3185 unsigned OffsetSize = isGFX9() ? 13 : 12; 3186 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3187 if (!isIntN(OffsetSize, Op.getImm())) { 3188 Error(getFlatOffsetLoc(Operands), 3189 isGFX9() ? "expected a 13-bit signed offset" : 3190 "expected a 12-bit signed offset"); 3191 return false; 3192 } 3193 } else { 3194 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3195 Error(getFlatOffsetLoc(Operands), 3196 isGFX9() ? "expected a 12-bit unsigned offset" : 3197 "expected an 11-bit unsigned offset"); 3198 return false; 3199 } 3200 } 3201 3202 return true; 3203 } 3204 3205 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3206 unsigned Opcode = Inst.getOpcode(); 3207 const MCInstrDesc &Desc = MII.get(Opcode); 3208 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3209 return true; 3210 3211 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3212 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3213 3214 const int OpIndices[] = { Src0Idx, Src1Idx }; 3215 3216 unsigned NumLiterals = 0; 3217 uint32_t LiteralValue; 3218 3219 for (int OpIdx : OpIndices) { 3220 if (OpIdx == -1) break; 3221 3222 const MCOperand &MO = Inst.getOperand(OpIdx); 3223 if (MO.isImm() && 3224 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3225 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3226 !isInlineConstant(Inst, OpIdx)) { 3227 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3228 if (NumLiterals == 0 || LiteralValue != Value) { 3229 LiteralValue = Value; 3230 ++NumLiterals; 3231 } 3232 } 3233 } 3234 3235 return NumLiterals <= 1; 3236 } 3237 3238 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3239 const unsigned Opc = Inst.getOpcode(); 3240 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3241 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3242 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3243 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3244 3245 if (OpSel & ~3) 3246 return false; 3247 } 3248 return true; 3249 } 3250 3251 // Check if VCC register matches wavefront size 3252 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3253 auto FB = getFeatureBits(); 3254 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3255 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3256 } 3257 3258 // VOP3 literal is only allowed in GFX10+ and only one can be used 3259 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3260 unsigned Opcode = Inst.getOpcode(); 3261 const MCInstrDesc &Desc = MII.get(Opcode); 3262 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3263 return true; 3264 3265 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3266 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3267 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3268 3269 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3270 3271 unsigned NumLiterals = 0; 3272 uint32_t LiteralValue; 3273 3274 for (int OpIdx : OpIndices) { 3275 if (OpIdx == -1) break; 3276 3277 const MCOperand &MO = Inst.getOperand(OpIdx); 3278 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3279 continue; 3280 3281 if (!isInlineConstant(Inst, OpIdx)) { 3282 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3283 if (NumLiterals == 0 || LiteralValue != Value) { 3284 LiteralValue = Value; 3285 ++NumLiterals; 3286 } 3287 } 3288 } 3289 3290 return !NumLiterals || 3291 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3292 } 3293 3294 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3295 const SMLoc &IDLoc, 3296 const OperandVector &Operands) { 3297 if (!validateLdsDirect(Inst)) { 3298 Error(IDLoc, 3299 "invalid use of lds_direct"); 3300 return false; 3301 } 3302 if (!validateSOPLiteral(Inst)) { 3303 Error(IDLoc, 3304 "only one literal operand is allowed"); 3305 return false; 3306 } 3307 if (!validateVOP3Literal(Inst)) { 3308 Error(IDLoc, 3309 "invalid literal operand"); 3310 return false; 3311 } 3312 if (!validateConstantBusLimitations(Inst)) { 3313 Error(IDLoc, 3314 "invalid operand (violates constant bus restrictions)"); 3315 return false; 3316 } 3317 if (!validateEarlyClobberLimitations(Inst)) { 3318 Error(IDLoc, 3319 "destination must be different than all sources"); 3320 return false; 3321 } 3322 if (!validateIntClampSupported(Inst)) { 3323 Error(IDLoc, 3324 "integer clamping is not supported on this GPU"); 3325 return false; 3326 } 3327 if (!validateOpSel(Inst)) { 3328 Error(IDLoc, 3329 "invalid op_sel operand"); 3330 return false; 3331 } 3332 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3333 if (!validateMIMGD16(Inst)) { 3334 Error(IDLoc, 3335 "d16 modifier is not supported on this GPU"); 3336 return false; 3337 } 3338 if (!validateMIMGDim(Inst)) { 3339 Error(IDLoc, "dim modifier is required on this GPU"); 3340 return false; 3341 } 3342 if (!validateMIMGDataSize(Inst)) { 3343 Error(IDLoc, 3344 "image data size does not match dmask and tfe"); 3345 return false; 3346 } 3347 if (!validateMIMGAddrSize(Inst)) { 3348 Error(IDLoc, 3349 "image address size does not match dim and a16"); 3350 return false; 3351 } 3352 if (!validateMIMGAtomicDMask(Inst)) { 3353 Error(IDLoc, 3354 "invalid atomic image dmask"); 3355 return false; 3356 } 3357 if (!validateMIMGGatherDMask(Inst)) { 3358 Error(IDLoc, 3359 "invalid image_gather dmask: only one bit must be set"); 3360 return false; 3361 } 3362 if (!validateFlatOffset(Inst, Operands)) { 3363 return false; 3364 } 3365 3366 return true; 3367 } 3368 3369 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3370 const FeatureBitset &FBS, 3371 unsigned VariantID = 0); 3372 3373 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3374 OperandVector &Operands, 3375 MCStreamer &Out, 3376 uint64_t &ErrorInfo, 3377 bool MatchingInlineAsm) { 3378 MCInst Inst; 3379 unsigned Result = Match_Success; 3380 for (auto Variant : getMatchedVariants()) { 3381 uint64_t EI; 3382 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3383 Variant); 3384 // We order match statuses from least to most specific. We use most specific 3385 // status as resulting 3386 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3387 if ((R == Match_Success) || 3388 (R == Match_PreferE32) || 3389 (R == Match_MissingFeature && Result != Match_PreferE32) || 3390 (R == Match_InvalidOperand && Result != Match_MissingFeature 3391 && Result != Match_PreferE32) || 3392 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3393 && Result != Match_MissingFeature 3394 && Result != Match_PreferE32)) { 3395 Result = R; 3396 ErrorInfo = EI; 3397 } 3398 if (R == Match_Success) 3399 break; 3400 } 3401 3402 switch (Result) { 3403 default: break; 3404 case Match_Success: 3405 if (!validateInstruction(Inst, IDLoc, Operands)) { 3406 return true; 3407 } 3408 Inst.setLoc(IDLoc); 3409 Out.EmitInstruction(Inst, getSTI()); 3410 return false; 3411 3412 case Match_MissingFeature: 3413 return Error(IDLoc, "instruction not supported on this GPU"); 3414 3415 case Match_MnemonicFail: { 3416 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3417 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3418 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3419 return Error(IDLoc, "invalid instruction" + Suggestion, 3420 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3421 } 3422 3423 case Match_InvalidOperand: { 3424 SMLoc ErrorLoc = IDLoc; 3425 if (ErrorInfo != ~0ULL) { 3426 if (ErrorInfo >= Operands.size()) { 3427 return Error(IDLoc, "too few operands for instruction"); 3428 } 3429 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3430 if (ErrorLoc == SMLoc()) 3431 ErrorLoc = IDLoc; 3432 } 3433 return Error(ErrorLoc, "invalid operand for instruction"); 3434 } 3435 3436 case Match_PreferE32: 3437 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3438 "should be encoded as e32"); 3439 } 3440 llvm_unreachable("Implement any new match types added!"); 3441 } 3442 3443 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3444 int64_t Tmp = -1; 3445 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3446 return true; 3447 } 3448 if (getParser().parseAbsoluteExpression(Tmp)) { 3449 return true; 3450 } 3451 Ret = static_cast<uint32_t>(Tmp); 3452 return false; 3453 } 3454 3455 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3456 uint32_t &Minor) { 3457 if (ParseAsAbsoluteExpression(Major)) 3458 return TokError("invalid major version"); 3459 3460 if (getLexer().isNot(AsmToken::Comma)) 3461 return TokError("minor version number required, comma expected"); 3462 Lex(); 3463 3464 if (ParseAsAbsoluteExpression(Minor)) 3465 return TokError("invalid minor version"); 3466 3467 return false; 3468 } 3469 3470 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3471 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3472 return TokError("directive only supported for amdgcn architecture"); 3473 3474 std::string Target; 3475 3476 SMLoc TargetStart = getTok().getLoc(); 3477 if (getParser().parseEscapedString(Target)) 3478 return true; 3479 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3480 3481 std::string ExpectedTarget; 3482 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3483 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3484 3485 if (Target != ExpectedTargetOS.str()) 3486 return getParser().Error(TargetRange.Start, "target must match options", 3487 TargetRange); 3488 3489 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3490 return false; 3491 } 3492 3493 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3494 return getParser().Error(Range.Start, "value out of range", Range); 3495 } 3496 3497 bool AMDGPUAsmParser::calculateGPRBlocks( 3498 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3499 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3500 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3501 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3502 // TODO(scott.linder): These calculations are duplicated from 3503 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3504 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3505 3506 unsigned NumVGPRs = NextFreeVGPR; 3507 unsigned NumSGPRs = NextFreeSGPR; 3508 3509 if (Version.Major >= 10) 3510 NumSGPRs = 0; 3511 else { 3512 unsigned MaxAddressableNumSGPRs = 3513 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3514 3515 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3516 NumSGPRs > MaxAddressableNumSGPRs) 3517 return OutOfRangeError(SGPRRange); 3518 3519 NumSGPRs += 3520 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3521 3522 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3523 NumSGPRs > MaxAddressableNumSGPRs) 3524 return OutOfRangeError(SGPRRange); 3525 3526 if (Features.test(FeatureSGPRInitBug)) 3527 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3528 } 3529 3530 VGPRBlocks = 3531 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3532 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3533 3534 return false; 3535 } 3536 3537 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3538 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3539 return TokError("directive only supported for amdgcn architecture"); 3540 3541 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3542 return TokError("directive only supported for amdhsa OS"); 3543 3544 StringRef KernelName; 3545 if (getParser().parseIdentifier(KernelName)) 3546 return true; 3547 3548 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3549 3550 StringSet<> Seen; 3551 3552 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3553 3554 SMRange VGPRRange; 3555 uint64_t NextFreeVGPR = 0; 3556 SMRange SGPRRange; 3557 uint64_t NextFreeSGPR = 0; 3558 unsigned UserSGPRCount = 0; 3559 bool ReserveVCC = true; 3560 bool ReserveFlatScr = true; 3561 bool ReserveXNACK = hasXNACK(); 3562 Optional<bool> EnableWavefrontSize32; 3563 3564 while (true) { 3565 while (getLexer().is(AsmToken::EndOfStatement)) 3566 Lex(); 3567 3568 if (getLexer().isNot(AsmToken::Identifier)) 3569 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3570 3571 StringRef ID = getTok().getIdentifier(); 3572 SMRange IDRange = getTok().getLocRange(); 3573 Lex(); 3574 3575 if (ID == ".end_amdhsa_kernel") 3576 break; 3577 3578 if (Seen.find(ID) != Seen.end()) 3579 return TokError(".amdhsa_ directives cannot be repeated"); 3580 Seen.insert(ID); 3581 3582 SMLoc ValStart = getTok().getLoc(); 3583 int64_t IVal; 3584 if (getParser().parseAbsoluteExpression(IVal)) 3585 return true; 3586 SMLoc ValEnd = getTok().getLoc(); 3587 SMRange ValRange = SMRange(ValStart, ValEnd); 3588 3589 if (IVal < 0) 3590 return OutOfRangeError(ValRange); 3591 3592 uint64_t Val = IVal; 3593 3594 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3595 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3596 return OutOfRangeError(RANGE); \ 3597 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3598 3599 if (ID == ".amdhsa_group_segment_fixed_size") { 3600 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3601 return OutOfRangeError(ValRange); 3602 KD.group_segment_fixed_size = Val; 3603 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3604 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3605 return OutOfRangeError(ValRange); 3606 KD.private_segment_fixed_size = Val; 3607 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3608 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3609 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3610 Val, ValRange); 3611 UserSGPRCount += 4; 3612 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3613 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3615 ValRange); 3616 UserSGPRCount += 2; 3617 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3618 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3619 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3620 ValRange); 3621 UserSGPRCount += 2; 3622 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3623 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3624 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3625 Val, ValRange); 3626 UserSGPRCount += 2; 3627 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3628 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3629 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3630 ValRange); 3631 UserSGPRCount += 2; 3632 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3633 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3635 ValRange); 3636 UserSGPRCount += 2; 3637 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3638 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3640 Val, ValRange); 3641 UserSGPRCount += 1; 3642 } else if (ID == ".amdhsa_wavefront_size32") { 3643 if (IVersion.Major < 10) 3644 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3645 IDRange); 3646 EnableWavefrontSize32 = Val; 3647 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3648 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3649 Val, ValRange); 3650 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3651 PARSE_BITS_ENTRY( 3652 KD.compute_pgm_rsrc2, 3653 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3654 ValRange); 3655 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3656 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3657 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3658 ValRange); 3659 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3661 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3662 ValRange); 3663 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3664 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3665 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3666 ValRange); 3667 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3668 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3669 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3670 ValRange); 3671 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3672 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3673 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3674 ValRange); 3675 } else if (ID == ".amdhsa_next_free_vgpr") { 3676 VGPRRange = ValRange; 3677 NextFreeVGPR = Val; 3678 } else if (ID == ".amdhsa_next_free_sgpr") { 3679 SGPRRange = ValRange; 3680 NextFreeSGPR = Val; 3681 } else if (ID == ".amdhsa_reserve_vcc") { 3682 if (!isUInt<1>(Val)) 3683 return OutOfRangeError(ValRange); 3684 ReserveVCC = Val; 3685 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3686 if (IVersion.Major < 7) 3687 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3688 IDRange); 3689 if (!isUInt<1>(Val)) 3690 return OutOfRangeError(ValRange); 3691 ReserveFlatScr = Val; 3692 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3693 if (IVersion.Major < 8) 3694 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3695 IDRange); 3696 if (!isUInt<1>(Val)) 3697 return OutOfRangeError(ValRange); 3698 ReserveXNACK = Val; 3699 } else if (ID == ".amdhsa_float_round_mode_32") { 3700 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3701 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3702 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3703 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3704 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3705 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3706 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3707 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3708 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3709 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3710 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3711 ValRange); 3712 } else if (ID == ".amdhsa_dx10_clamp") { 3713 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3714 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3715 } else if (ID == ".amdhsa_ieee_mode") { 3716 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3717 Val, ValRange); 3718 } else if (ID == ".amdhsa_fp16_overflow") { 3719 if (IVersion.Major < 9) 3720 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3721 IDRange); 3722 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3723 ValRange); 3724 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3725 if (IVersion.Major < 10) 3726 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3727 IDRange); 3728 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3729 ValRange); 3730 } else if (ID == ".amdhsa_memory_ordered") { 3731 if (IVersion.Major < 10) 3732 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3733 IDRange); 3734 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3735 ValRange); 3736 } else if (ID == ".amdhsa_forward_progress") { 3737 if (IVersion.Major < 10) 3738 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3739 IDRange); 3740 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3741 ValRange); 3742 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3743 PARSE_BITS_ENTRY( 3744 KD.compute_pgm_rsrc2, 3745 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3746 ValRange); 3747 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3748 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3749 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3750 Val, ValRange); 3751 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3752 PARSE_BITS_ENTRY( 3753 KD.compute_pgm_rsrc2, 3754 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3755 ValRange); 3756 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3759 Val, ValRange); 3760 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3761 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3763 Val, ValRange); 3764 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3765 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3766 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3767 Val, ValRange); 3768 } else if (ID == ".amdhsa_exception_int_div_zero") { 3769 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3770 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3771 Val, ValRange); 3772 } else { 3773 return getParser().Error(IDRange.Start, 3774 "unknown .amdhsa_kernel directive", IDRange); 3775 } 3776 3777 #undef PARSE_BITS_ENTRY 3778 } 3779 3780 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3781 return TokError(".amdhsa_next_free_vgpr directive is required"); 3782 3783 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3784 return TokError(".amdhsa_next_free_sgpr directive is required"); 3785 3786 unsigned VGPRBlocks; 3787 unsigned SGPRBlocks; 3788 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3789 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3790 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3791 SGPRBlocks)) 3792 return true; 3793 3794 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3795 VGPRBlocks)) 3796 return OutOfRangeError(VGPRRange); 3797 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3798 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3799 3800 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3801 SGPRBlocks)) 3802 return OutOfRangeError(SGPRRange); 3803 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3804 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3805 SGPRBlocks); 3806 3807 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3808 return TokError("too many user SGPRs enabled"); 3809 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3810 UserSGPRCount); 3811 3812 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3813 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3814 ReserveFlatScr, ReserveXNACK); 3815 return false; 3816 } 3817 3818 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3819 uint32_t Major; 3820 uint32_t Minor; 3821 3822 if (ParseDirectiveMajorMinor(Major, Minor)) 3823 return true; 3824 3825 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3826 return false; 3827 } 3828 3829 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3830 uint32_t Major; 3831 uint32_t Minor; 3832 uint32_t Stepping; 3833 StringRef VendorName; 3834 StringRef ArchName; 3835 3836 // If this directive has no arguments, then use the ISA version for the 3837 // targeted GPU. 3838 if (getLexer().is(AsmToken::EndOfStatement)) { 3839 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3840 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3841 ISA.Stepping, 3842 "AMD", "AMDGPU"); 3843 return false; 3844 } 3845 3846 if (ParseDirectiveMajorMinor(Major, Minor)) 3847 return true; 3848 3849 if (getLexer().isNot(AsmToken::Comma)) 3850 return TokError("stepping version number required, comma expected"); 3851 Lex(); 3852 3853 if (ParseAsAbsoluteExpression(Stepping)) 3854 return TokError("invalid stepping version"); 3855 3856 if (getLexer().isNot(AsmToken::Comma)) 3857 return TokError("vendor name required, comma expected"); 3858 Lex(); 3859 3860 if (getLexer().isNot(AsmToken::String)) 3861 return TokError("invalid vendor name"); 3862 3863 VendorName = getLexer().getTok().getStringContents(); 3864 Lex(); 3865 3866 if (getLexer().isNot(AsmToken::Comma)) 3867 return TokError("arch name required, comma expected"); 3868 Lex(); 3869 3870 if (getLexer().isNot(AsmToken::String)) 3871 return TokError("invalid arch name"); 3872 3873 ArchName = getLexer().getTok().getStringContents(); 3874 Lex(); 3875 3876 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3877 VendorName, ArchName); 3878 return false; 3879 } 3880 3881 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3882 amd_kernel_code_t &Header) { 3883 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3884 // assembly for backwards compatibility. 3885 if (ID == "max_scratch_backing_memory_byte_size") { 3886 Parser.eatToEndOfStatement(); 3887 return false; 3888 } 3889 3890 SmallString<40> ErrStr; 3891 raw_svector_ostream Err(ErrStr); 3892 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3893 return TokError(Err.str()); 3894 } 3895 Lex(); 3896 3897 if (ID == "enable_wavefront_size32") { 3898 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3899 if (!isGFX10()) 3900 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3901 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3902 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3903 } else { 3904 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3905 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3906 } 3907 } 3908 3909 if (ID == "wavefront_size") { 3910 if (Header.wavefront_size == 5) { 3911 if (!isGFX10()) 3912 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3913 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3914 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3915 } else if (Header.wavefront_size == 6) { 3916 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3917 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3918 } 3919 } 3920 3921 if (ID == "enable_wgp_mode") { 3922 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3923 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3924 } 3925 3926 if (ID == "enable_mem_ordered") { 3927 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3928 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3929 } 3930 3931 if (ID == "enable_fwd_progress") { 3932 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3933 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3934 } 3935 3936 return false; 3937 } 3938 3939 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3940 amd_kernel_code_t Header; 3941 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3942 3943 while (true) { 3944 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3945 // will set the current token to EndOfStatement. 3946 while(getLexer().is(AsmToken::EndOfStatement)) 3947 Lex(); 3948 3949 if (getLexer().isNot(AsmToken::Identifier)) 3950 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3951 3952 StringRef ID = getLexer().getTok().getIdentifier(); 3953 Lex(); 3954 3955 if (ID == ".end_amd_kernel_code_t") 3956 break; 3957 3958 if (ParseAMDKernelCodeTValue(ID, Header)) 3959 return true; 3960 } 3961 3962 getTargetStreamer().EmitAMDKernelCodeT(Header); 3963 3964 return false; 3965 } 3966 3967 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3968 if (getLexer().isNot(AsmToken::Identifier)) 3969 return TokError("expected symbol name"); 3970 3971 StringRef KernelName = Parser.getTok().getString(); 3972 3973 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3974 ELF::STT_AMDGPU_HSA_KERNEL); 3975 Lex(); 3976 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3977 KernelScope.initialize(getContext()); 3978 return false; 3979 } 3980 3981 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3982 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3983 return Error(getParser().getTok().getLoc(), 3984 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3985 "architectures"); 3986 } 3987 3988 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3989 3990 std::string ISAVersionStringFromSTI; 3991 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3992 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3993 3994 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3995 return Error(getParser().getTok().getLoc(), 3996 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3997 "arguments specified through the command line"); 3998 } 3999 4000 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4001 Lex(); 4002 4003 return false; 4004 } 4005 4006 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4007 const char *AssemblerDirectiveBegin; 4008 const char *AssemblerDirectiveEnd; 4009 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4010 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4011 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4012 HSAMD::V3::AssemblerDirectiveEnd) 4013 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4014 HSAMD::AssemblerDirectiveEnd); 4015 4016 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4017 return Error(getParser().getTok().getLoc(), 4018 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4019 "not available on non-amdhsa OSes")).str()); 4020 } 4021 4022 std::string HSAMetadataString; 4023 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4024 HSAMetadataString)) 4025 return true; 4026 4027 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4028 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4029 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4030 } else { 4031 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4032 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4033 } 4034 4035 return false; 4036 } 4037 4038 /// Common code to parse out a block of text (typically YAML) between start and 4039 /// end directives. 4040 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4041 const char *AssemblerDirectiveEnd, 4042 std::string &CollectString) { 4043 4044 raw_string_ostream CollectStream(CollectString); 4045 4046 getLexer().setSkipSpace(false); 4047 4048 bool FoundEnd = false; 4049 while (!getLexer().is(AsmToken::Eof)) { 4050 while (getLexer().is(AsmToken::Space)) { 4051 CollectStream << getLexer().getTok().getString(); 4052 Lex(); 4053 } 4054 4055 if (getLexer().is(AsmToken::Identifier)) { 4056 StringRef ID = getLexer().getTok().getIdentifier(); 4057 if (ID == AssemblerDirectiveEnd) { 4058 Lex(); 4059 FoundEnd = true; 4060 break; 4061 } 4062 } 4063 4064 CollectStream << Parser.parseStringToEndOfStatement() 4065 << getContext().getAsmInfo()->getSeparatorString(); 4066 4067 Parser.eatToEndOfStatement(); 4068 } 4069 4070 getLexer().setSkipSpace(true); 4071 4072 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4073 return TokError(Twine("expected directive ") + 4074 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4075 } 4076 4077 CollectStream.flush(); 4078 return false; 4079 } 4080 4081 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4082 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4083 std::string String; 4084 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4085 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4086 return true; 4087 4088 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4089 if (!PALMetadata->setFromString(String)) 4090 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4091 return false; 4092 } 4093 4094 /// Parse the assembler directive for old linear-format PAL metadata. 4095 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4096 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4097 return Error(getParser().getTok().getLoc(), 4098 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4099 "not available on non-amdpal OSes")).str()); 4100 } 4101 4102 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4103 PALMetadata->setLegacy(); 4104 for (;;) { 4105 uint32_t Key, Value; 4106 if (ParseAsAbsoluteExpression(Key)) { 4107 return TokError(Twine("invalid value in ") + 4108 Twine(PALMD::AssemblerDirective)); 4109 } 4110 if (getLexer().isNot(AsmToken::Comma)) { 4111 return TokError(Twine("expected an even number of values in ") + 4112 Twine(PALMD::AssemblerDirective)); 4113 } 4114 Lex(); 4115 if (ParseAsAbsoluteExpression(Value)) { 4116 return TokError(Twine("invalid value in ") + 4117 Twine(PALMD::AssemblerDirective)); 4118 } 4119 PALMetadata->setRegister(Key, Value); 4120 if (getLexer().isNot(AsmToken::Comma)) 4121 break; 4122 Lex(); 4123 } 4124 return false; 4125 } 4126 4127 /// ParseDirectiveAMDGPULDS 4128 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4129 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4130 if (getParser().checkForValidSection()) 4131 return true; 4132 4133 StringRef Name; 4134 SMLoc NameLoc = getLexer().getLoc(); 4135 if (getParser().parseIdentifier(Name)) 4136 return TokError("expected identifier in directive"); 4137 4138 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4139 if (parseToken(AsmToken::Comma, "expected ','")) 4140 return true; 4141 4142 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4143 4144 int64_t Size; 4145 SMLoc SizeLoc = getLexer().getLoc(); 4146 if (getParser().parseAbsoluteExpression(Size)) 4147 return true; 4148 if (Size < 0) 4149 return Error(SizeLoc, "size must be non-negative"); 4150 if (Size > LocalMemorySize) 4151 return Error(SizeLoc, "size is too large"); 4152 4153 int64_t Align = 4; 4154 if (getLexer().is(AsmToken::Comma)) { 4155 Lex(); 4156 SMLoc AlignLoc = getLexer().getLoc(); 4157 if (getParser().parseAbsoluteExpression(Align)) 4158 return true; 4159 if (Align < 0 || !isPowerOf2_64(Align)) 4160 return Error(AlignLoc, "alignment must be a power of two"); 4161 4162 // Alignment larger than the size of LDS is possible in theory, as long 4163 // as the linker manages to place to symbol at address 0, but we do want 4164 // to make sure the alignment fits nicely into a 32-bit integer. 4165 if (Align >= 1u << 31) 4166 return Error(AlignLoc, "alignment is too large"); 4167 } 4168 4169 if (parseToken(AsmToken::EndOfStatement, 4170 "unexpected token in '.amdgpu_lds' directive")) 4171 return true; 4172 4173 Symbol->redefineIfPossible(); 4174 if (!Symbol->isUndefined()) 4175 return Error(NameLoc, "invalid symbol redefinition"); 4176 4177 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4178 return false; 4179 } 4180 4181 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4182 StringRef IDVal = DirectiveID.getString(); 4183 4184 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4185 if (IDVal == ".amdgcn_target") 4186 return ParseDirectiveAMDGCNTarget(); 4187 4188 if (IDVal == ".amdhsa_kernel") 4189 return ParseDirectiveAMDHSAKernel(); 4190 4191 // TODO: Restructure/combine with PAL metadata directive. 4192 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4193 return ParseDirectiveHSAMetadata(); 4194 } else { 4195 if (IDVal == ".hsa_code_object_version") 4196 return ParseDirectiveHSACodeObjectVersion(); 4197 4198 if (IDVal == ".hsa_code_object_isa") 4199 return ParseDirectiveHSACodeObjectISA(); 4200 4201 if (IDVal == ".amd_kernel_code_t") 4202 return ParseDirectiveAMDKernelCodeT(); 4203 4204 if (IDVal == ".amdgpu_hsa_kernel") 4205 return ParseDirectiveAMDGPUHsaKernel(); 4206 4207 if (IDVal == ".amd_amdgpu_isa") 4208 return ParseDirectiveISAVersion(); 4209 4210 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4211 return ParseDirectiveHSAMetadata(); 4212 } 4213 4214 if (IDVal == ".amdgpu_lds") 4215 return ParseDirectiveAMDGPULDS(); 4216 4217 if (IDVal == PALMD::AssemblerDirectiveBegin) 4218 return ParseDirectivePALMetadataBegin(); 4219 4220 if (IDVal == PALMD::AssemblerDirective) 4221 return ParseDirectivePALMetadata(); 4222 4223 return true; 4224 } 4225 4226 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4227 unsigned RegNo) const { 4228 4229 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4230 R.isValid(); ++R) { 4231 if (*R == RegNo) 4232 return isGFX9() || isGFX10(); 4233 } 4234 4235 // GFX10 has 2 more SGPRs 104 and 105. 4236 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4237 R.isValid(); ++R) { 4238 if (*R == RegNo) 4239 return hasSGPR104_SGPR105(); 4240 } 4241 4242 switch (RegNo) { 4243 case AMDGPU::SRC_SHARED_BASE: 4244 case AMDGPU::SRC_SHARED_LIMIT: 4245 case AMDGPU::SRC_PRIVATE_BASE: 4246 case AMDGPU::SRC_PRIVATE_LIMIT: 4247 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4248 return !isCI() && !isSI() && !isVI(); 4249 case AMDGPU::TBA: 4250 case AMDGPU::TBA_LO: 4251 case AMDGPU::TBA_HI: 4252 case AMDGPU::TMA: 4253 case AMDGPU::TMA_LO: 4254 case AMDGPU::TMA_HI: 4255 return !isGFX9() && !isGFX10(); 4256 case AMDGPU::XNACK_MASK: 4257 case AMDGPU::XNACK_MASK_LO: 4258 case AMDGPU::XNACK_MASK_HI: 4259 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4260 case AMDGPU::SGPR_NULL: 4261 return isGFX10(); 4262 default: 4263 break; 4264 } 4265 4266 if (isCI()) 4267 return true; 4268 4269 if (isSI() || isGFX10()) { 4270 // No flat_scr on SI. 4271 // On GFX10 flat scratch is not a valid register operand and can only be 4272 // accessed with s_setreg/s_getreg. 4273 switch (RegNo) { 4274 case AMDGPU::FLAT_SCR: 4275 case AMDGPU::FLAT_SCR_LO: 4276 case AMDGPU::FLAT_SCR_HI: 4277 return false; 4278 default: 4279 return true; 4280 } 4281 } 4282 4283 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4284 // SI/CI have. 4285 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4286 R.isValid(); ++R) { 4287 if (*R == RegNo) 4288 return hasSGPR102_SGPR103(); 4289 } 4290 4291 return true; 4292 } 4293 4294 OperandMatchResultTy 4295 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4296 OperandMode Mode) { 4297 // Try to parse with a custom parser 4298 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4299 4300 // If we successfully parsed the operand or if there as an error parsing, 4301 // we are done. 4302 // 4303 // If we are parsing after we reach EndOfStatement then this means we 4304 // are appending default values to the Operands list. This is only done 4305 // by custom parser, so we shouldn't continue on to the generic parsing. 4306 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4307 getLexer().is(AsmToken::EndOfStatement)) 4308 return ResTy; 4309 4310 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4311 unsigned Prefix = Operands.size(); 4312 SMLoc LBraceLoc = getTok().getLoc(); 4313 Parser.Lex(); // eat the '[' 4314 4315 for (;;) { 4316 ResTy = parseReg(Operands); 4317 if (ResTy != MatchOperand_Success) 4318 return ResTy; 4319 4320 if (getLexer().is(AsmToken::RBrac)) 4321 break; 4322 4323 if (getLexer().isNot(AsmToken::Comma)) 4324 return MatchOperand_ParseFail; 4325 Parser.Lex(); 4326 } 4327 4328 if (Operands.size() - Prefix > 1) { 4329 Operands.insert(Operands.begin() + Prefix, 4330 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4331 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4332 getTok().getLoc())); 4333 } 4334 4335 Parser.Lex(); // eat the ']' 4336 return MatchOperand_Success; 4337 } 4338 4339 return parseRegOrImm(Operands); 4340 } 4341 4342 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4343 // Clear any forced encodings from the previous instruction. 4344 setForcedEncodingSize(0); 4345 setForcedDPP(false); 4346 setForcedSDWA(false); 4347 4348 if (Name.endswith("_e64")) { 4349 setForcedEncodingSize(64); 4350 return Name.substr(0, Name.size() - 4); 4351 } else if (Name.endswith("_e32")) { 4352 setForcedEncodingSize(32); 4353 return Name.substr(0, Name.size() - 4); 4354 } else if (Name.endswith("_dpp")) { 4355 setForcedDPP(true); 4356 return Name.substr(0, Name.size() - 4); 4357 } else if (Name.endswith("_sdwa")) { 4358 setForcedSDWA(true); 4359 return Name.substr(0, Name.size() - 5); 4360 } 4361 return Name; 4362 } 4363 4364 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4365 StringRef Name, 4366 SMLoc NameLoc, OperandVector &Operands) { 4367 // Add the instruction mnemonic 4368 Name = parseMnemonicSuffix(Name); 4369 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4370 4371 bool IsMIMG = Name.startswith("image_"); 4372 4373 while (!getLexer().is(AsmToken::EndOfStatement)) { 4374 OperandMode Mode = OperandMode_Default; 4375 if (IsMIMG && isGFX10() && Operands.size() == 2) 4376 Mode = OperandMode_NSA; 4377 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4378 4379 // Eat the comma or space if there is one. 4380 if (getLexer().is(AsmToken::Comma)) 4381 Parser.Lex(); 4382 4383 switch (Res) { 4384 case MatchOperand_Success: break; 4385 case MatchOperand_ParseFail: 4386 // FIXME: use real operand location rather than the current location. 4387 Error(getLexer().getLoc(), "failed parsing operand."); 4388 while (!getLexer().is(AsmToken::EndOfStatement)) { 4389 Parser.Lex(); 4390 } 4391 return true; 4392 case MatchOperand_NoMatch: 4393 // FIXME: use real operand location rather than the current location. 4394 Error(getLexer().getLoc(), "not a valid operand."); 4395 while (!getLexer().is(AsmToken::EndOfStatement)) { 4396 Parser.Lex(); 4397 } 4398 return true; 4399 } 4400 } 4401 4402 return false; 4403 } 4404 4405 //===----------------------------------------------------------------------===// 4406 // Utility functions 4407 //===----------------------------------------------------------------------===// 4408 4409 OperandMatchResultTy 4410 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4411 4412 if (!trySkipId(Prefix, AsmToken::Colon)) 4413 return MatchOperand_NoMatch; 4414 4415 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4416 } 4417 4418 OperandMatchResultTy 4419 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4420 AMDGPUOperand::ImmTy ImmTy, 4421 bool (*ConvertResult)(int64_t&)) { 4422 SMLoc S = getLoc(); 4423 int64_t Value = 0; 4424 4425 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4426 if (Res != MatchOperand_Success) 4427 return Res; 4428 4429 if (ConvertResult && !ConvertResult(Value)) { 4430 Error(S, "invalid " + StringRef(Prefix) + " value."); 4431 } 4432 4433 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4434 return MatchOperand_Success; 4435 } 4436 4437 OperandMatchResultTy 4438 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4439 OperandVector &Operands, 4440 AMDGPUOperand::ImmTy ImmTy, 4441 bool (*ConvertResult)(int64_t&)) { 4442 SMLoc S = getLoc(); 4443 if (!trySkipId(Prefix, AsmToken::Colon)) 4444 return MatchOperand_NoMatch; 4445 4446 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4447 return MatchOperand_ParseFail; 4448 4449 unsigned Val = 0; 4450 const unsigned MaxSize = 4; 4451 4452 // FIXME: How to verify the number of elements matches the number of src 4453 // operands? 4454 for (int I = 0; ; ++I) { 4455 int64_t Op; 4456 SMLoc Loc = getLoc(); 4457 if (!parseExpr(Op)) 4458 return MatchOperand_ParseFail; 4459 4460 if (Op != 0 && Op != 1) { 4461 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4462 return MatchOperand_ParseFail; 4463 } 4464 4465 Val |= (Op << I); 4466 4467 if (trySkipToken(AsmToken::RBrac)) 4468 break; 4469 4470 if (I + 1 == MaxSize) { 4471 Error(getLoc(), "expected a closing square bracket"); 4472 return MatchOperand_ParseFail; 4473 } 4474 4475 if (!skipToken(AsmToken::Comma, "expected a comma")) 4476 return MatchOperand_ParseFail; 4477 } 4478 4479 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4480 return MatchOperand_Success; 4481 } 4482 4483 OperandMatchResultTy 4484 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4485 AMDGPUOperand::ImmTy ImmTy) { 4486 int64_t Bit = 0; 4487 SMLoc S = Parser.getTok().getLoc(); 4488 4489 // We are at the end of the statement, and this is a default argument, so 4490 // use a default value. 4491 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4492 switch(getLexer().getKind()) { 4493 case AsmToken::Identifier: { 4494 StringRef Tok = Parser.getTok().getString(); 4495 if (Tok == Name) { 4496 if (Tok == "r128" && isGFX9()) 4497 Error(S, "r128 modifier is not supported on this GPU"); 4498 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4499 Error(S, "a16 modifier is not supported on this GPU"); 4500 Bit = 1; 4501 Parser.Lex(); 4502 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4503 Bit = 0; 4504 Parser.Lex(); 4505 } else { 4506 return MatchOperand_NoMatch; 4507 } 4508 break; 4509 } 4510 default: 4511 return MatchOperand_NoMatch; 4512 } 4513 } 4514 4515 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4516 return MatchOperand_ParseFail; 4517 4518 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4519 return MatchOperand_Success; 4520 } 4521 4522 static void addOptionalImmOperand( 4523 MCInst& Inst, const OperandVector& Operands, 4524 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4525 AMDGPUOperand::ImmTy ImmT, 4526 int64_t Default = 0) { 4527 auto i = OptionalIdx.find(ImmT); 4528 if (i != OptionalIdx.end()) { 4529 unsigned Idx = i->second; 4530 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4531 } else { 4532 Inst.addOperand(MCOperand::createImm(Default)); 4533 } 4534 } 4535 4536 OperandMatchResultTy 4537 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4538 if (getLexer().isNot(AsmToken::Identifier)) { 4539 return MatchOperand_NoMatch; 4540 } 4541 StringRef Tok = Parser.getTok().getString(); 4542 if (Tok != Prefix) { 4543 return MatchOperand_NoMatch; 4544 } 4545 4546 Parser.Lex(); 4547 if (getLexer().isNot(AsmToken::Colon)) { 4548 return MatchOperand_ParseFail; 4549 } 4550 4551 Parser.Lex(); 4552 if (getLexer().isNot(AsmToken::Identifier)) { 4553 return MatchOperand_ParseFail; 4554 } 4555 4556 Value = Parser.getTok().getString(); 4557 return MatchOperand_Success; 4558 } 4559 4560 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4561 // values to live in a joint format operand in the MCInst encoding. 4562 OperandMatchResultTy 4563 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4564 SMLoc S = Parser.getTok().getLoc(); 4565 int64_t Dfmt = 0, Nfmt = 0; 4566 // dfmt and nfmt can appear in either order, and each is optional. 4567 bool GotDfmt = false, GotNfmt = false; 4568 while (!GotDfmt || !GotNfmt) { 4569 if (!GotDfmt) { 4570 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4571 if (Res != MatchOperand_NoMatch) { 4572 if (Res != MatchOperand_Success) 4573 return Res; 4574 if (Dfmt >= 16) { 4575 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4576 return MatchOperand_ParseFail; 4577 } 4578 GotDfmt = true; 4579 Parser.Lex(); 4580 continue; 4581 } 4582 } 4583 if (!GotNfmt) { 4584 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4585 if (Res != MatchOperand_NoMatch) { 4586 if (Res != MatchOperand_Success) 4587 return Res; 4588 if (Nfmt >= 8) { 4589 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4590 return MatchOperand_ParseFail; 4591 } 4592 GotNfmt = true; 4593 Parser.Lex(); 4594 continue; 4595 } 4596 } 4597 break; 4598 } 4599 if (!GotDfmt && !GotNfmt) 4600 return MatchOperand_NoMatch; 4601 auto Format = Dfmt | Nfmt << 4; 4602 Operands.push_back( 4603 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4604 return MatchOperand_Success; 4605 } 4606 4607 //===----------------------------------------------------------------------===// 4608 // ds 4609 //===----------------------------------------------------------------------===// 4610 4611 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4612 const OperandVector &Operands) { 4613 OptionalImmIndexMap OptionalIdx; 4614 4615 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4616 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4617 4618 // Add the register arguments 4619 if (Op.isReg()) { 4620 Op.addRegOperands(Inst, 1); 4621 continue; 4622 } 4623 4624 // Handle optional arguments 4625 OptionalIdx[Op.getImmTy()] = i; 4626 } 4627 4628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4631 4632 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4633 } 4634 4635 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4636 bool IsGdsHardcoded) { 4637 OptionalImmIndexMap OptionalIdx; 4638 4639 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4640 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4641 4642 // Add the register arguments 4643 if (Op.isReg()) { 4644 Op.addRegOperands(Inst, 1); 4645 continue; 4646 } 4647 4648 if (Op.isToken() && Op.getToken() == "gds") { 4649 IsGdsHardcoded = true; 4650 continue; 4651 } 4652 4653 // Handle optional arguments 4654 OptionalIdx[Op.getImmTy()] = i; 4655 } 4656 4657 AMDGPUOperand::ImmTy OffsetType = 4658 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4659 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4660 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4661 AMDGPUOperand::ImmTyOffset; 4662 4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4664 4665 if (!IsGdsHardcoded) { 4666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4667 } 4668 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4669 } 4670 4671 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4672 OptionalImmIndexMap OptionalIdx; 4673 4674 unsigned OperandIdx[4]; 4675 unsigned EnMask = 0; 4676 int SrcIdx = 0; 4677 4678 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4679 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4680 4681 // Add the register arguments 4682 if (Op.isReg()) { 4683 assert(SrcIdx < 4); 4684 OperandIdx[SrcIdx] = Inst.size(); 4685 Op.addRegOperands(Inst, 1); 4686 ++SrcIdx; 4687 continue; 4688 } 4689 4690 if (Op.isOff()) { 4691 assert(SrcIdx < 4); 4692 OperandIdx[SrcIdx] = Inst.size(); 4693 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4694 ++SrcIdx; 4695 continue; 4696 } 4697 4698 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4699 Op.addImmOperands(Inst, 1); 4700 continue; 4701 } 4702 4703 if (Op.isToken() && Op.getToken() == "done") 4704 continue; 4705 4706 // Handle optional arguments 4707 OptionalIdx[Op.getImmTy()] = i; 4708 } 4709 4710 assert(SrcIdx == 4); 4711 4712 bool Compr = false; 4713 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4714 Compr = true; 4715 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4716 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4717 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4718 } 4719 4720 for (auto i = 0; i < SrcIdx; ++i) { 4721 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4722 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4723 } 4724 } 4725 4726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4728 4729 Inst.addOperand(MCOperand::createImm(EnMask)); 4730 } 4731 4732 //===----------------------------------------------------------------------===// 4733 // s_waitcnt 4734 //===----------------------------------------------------------------------===// 4735 4736 static bool 4737 encodeCnt( 4738 const AMDGPU::IsaVersion ISA, 4739 int64_t &IntVal, 4740 int64_t CntVal, 4741 bool Saturate, 4742 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4743 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4744 { 4745 bool Failed = false; 4746 4747 IntVal = encode(ISA, IntVal, CntVal); 4748 if (CntVal != decode(ISA, IntVal)) { 4749 if (Saturate) { 4750 IntVal = encode(ISA, IntVal, -1); 4751 } else { 4752 Failed = true; 4753 } 4754 } 4755 return Failed; 4756 } 4757 4758 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4759 4760 SMLoc CntLoc = getLoc(); 4761 StringRef CntName = getTokenStr(); 4762 4763 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4764 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4765 return false; 4766 4767 int64_t CntVal; 4768 SMLoc ValLoc = getLoc(); 4769 if (!parseExpr(CntVal)) 4770 return false; 4771 4772 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4773 4774 bool Failed = true; 4775 bool Sat = CntName.endswith("_sat"); 4776 4777 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4778 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4779 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4780 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4781 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4782 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4783 } else { 4784 Error(CntLoc, "invalid counter name " + CntName); 4785 return false; 4786 } 4787 4788 if (Failed) { 4789 Error(ValLoc, "too large value for " + CntName); 4790 return false; 4791 } 4792 4793 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4794 return false; 4795 4796 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4797 if (isToken(AsmToken::EndOfStatement)) { 4798 Error(getLoc(), "expected a counter name"); 4799 return false; 4800 } 4801 } 4802 4803 return true; 4804 } 4805 4806 OperandMatchResultTy 4807 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4808 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4809 int64_t Waitcnt = getWaitcntBitMask(ISA); 4810 SMLoc S = getLoc(); 4811 4812 // If parse failed, do not return error code 4813 // to avoid excessive error messages. 4814 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4815 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4816 } else { 4817 parseExpr(Waitcnt); 4818 } 4819 4820 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4821 return MatchOperand_Success; 4822 } 4823 4824 bool 4825 AMDGPUOperand::isSWaitCnt() const { 4826 return isImm(); 4827 } 4828 4829 //===----------------------------------------------------------------------===// 4830 // hwreg 4831 //===----------------------------------------------------------------------===// 4832 4833 bool 4834 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4835 int64_t &Offset, 4836 int64_t &Width) { 4837 using namespace llvm::AMDGPU::Hwreg; 4838 4839 // The register may be specified by name or using a numeric code 4840 if (isToken(AsmToken::Identifier) && 4841 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4842 HwReg.IsSymbolic = true; 4843 lex(); // skip message name 4844 } else if (!parseExpr(HwReg.Id)) { 4845 return false; 4846 } 4847 4848 if (trySkipToken(AsmToken::RParen)) 4849 return true; 4850 4851 // parse optional params 4852 return 4853 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4854 parseExpr(Offset) && 4855 skipToken(AsmToken::Comma, "expected a comma") && 4856 parseExpr(Width) && 4857 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4858 } 4859 4860 bool 4861 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4862 const int64_t Offset, 4863 const int64_t Width, 4864 const SMLoc Loc) { 4865 4866 using namespace llvm::AMDGPU::Hwreg; 4867 4868 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4869 Error(Loc, "specified hardware register is not supported on this GPU"); 4870 return false; 4871 } else if (!isValidHwreg(HwReg.Id)) { 4872 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4873 return false; 4874 } else if (!isValidHwregOffset(Offset)) { 4875 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4876 return false; 4877 } else if (!isValidHwregWidth(Width)) { 4878 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4879 return false; 4880 } 4881 return true; 4882 } 4883 4884 OperandMatchResultTy 4885 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4886 using namespace llvm::AMDGPU::Hwreg; 4887 4888 int64_t ImmVal = 0; 4889 SMLoc Loc = getLoc(); 4890 4891 // If parse failed, do not return error code 4892 // to avoid excessive error messages. 4893 if (trySkipId("hwreg", AsmToken::LParen)) { 4894 OperandInfoTy HwReg(ID_UNKNOWN_); 4895 int64_t Offset = OFFSET_DEFAULT_; 4896 int64_t Width = WIDTH_DEFAULT_; 4897 if (parseHwregBody(HwReg, Offset, Width) && 4898 validateHwreg(HwReg, Offset, Width, Loc)) { 4899 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4900 } 4901 } else if (parseExpr(ImmVal)) { 4902 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4903 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4904 } 4905 4906 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4907 return MatchOperand_Success; 4908 } 4909 4910 bool AMDGPUOperand::isHwreg() const { 4911 return isImmTy(ImmTyHwreg); 4912 } 4913 4914 //===----------------------------------------------------------------------===// 4915 // sendmsg 4916 //===----------------------------------------------------------------------===// 4917 4918 bool 4919 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4920 OperandInfoTy &Op, 4921 OperandInfoTy &Stream) { 4922 using namespace llvm::AMDGPU::SendMsg; 4923 4924 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4925 Msg.IsSymbolic = true; 4926 lex(); // skip message name 4927 } else if (!parseExpr(Msg.Id)) { 4928 return false; 4929 } 4930 4931 if (trySkipToken(AsmToken::Comma)) { 4932 Op.IsDefined = true; 4933 if (isToken(AsmToken::Identifier) && 4934 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4935 lex(); // skip operation name 4936 } else if (!parseExpr(Op.Id)) { 4937 return false; 4938 } 4939 4940 if (trySkipToken(AsmToken::Comma)) { 4941 Stream.IsDefined = true; 4942 if (!parseExpr(Stream.Id)) 4943 return false; 4944 } 4945 } 4946 4947 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4948 } 4949 4950 bool 4951 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4952 const OperandInfoTy &Op, 4953 const OperandInfoTy &Stream, 4954 const SMLoc S) { 4955 using namespace llvm::AMDGPU::SendMsg; 4956 4957 // Validation strictness depends on whether message is specified 4958 // in a symbolc or in a numeric form. In the latter case 4959 // only encoding possibility is checked. 4960 bool Strict = Msg.IsSymbolic; 4961 4962 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 4963 Error(S, "invalid message id"); 4964 return false; 4965 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 4966 Error(S, Op.IsDefined ? 4967 "message does not support operations" : 4968 "missing message operation"); 4969 return false; 4970 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 4971 Error(S, "invalid operation id"); 4972 return false; 4973 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 4974 Error(S, "message operation does not support streams"); 4975 return false; 4976 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 4977 Error(S, "invalid message stream id"); 4978 return false; 4979 } 4980 return true; 4981 } 4982 4983 OperandMatchResultTy 4984 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4985 using namespace llvm::AMDGPU::SendMsg; 4986 4987 int64_t ImmVal = 0; 4988 SMLoc Loc = getLoc(); 4989 4990 // If parse failed, do not return error code 4991 // to avoid excessive error messages. 4992 if (trySkipId("sendmsg", AsmToken::LParen)) { 4993 OperandInfoTy Msg(ID_UNKNOWN_); 4994 OperandInfoTy Op(OP_NONE_); 4995 OperandInfoTy Stream(STREAM_ID_NONE_); 4996 if (parseSendMsgBody(Msg, Op, Stream) && 4997 validateSendMsg(Msg, Op, Stream, Loc)) { 4998 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 4999 } 5000 } else if (parseExpr(ImmVal)) { 5001 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5002 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5003 } 5004 5005 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5006 return MatchOperand_Success; 5007 } 5008 5009 bool AMDGPUOperand::isSendMsg() const { 5010 return isImmTy(ImmTySendMsg); 5011 } 5012 5013 //===----------------------------------------------------------------------===// 5014 // v_interp 5015 //===----------------------------------------------------------------------===// 5016 5017 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5018 if (getLexer().getKind() != AsmToken::Identifier) 5019 return MatchOperand_NoMatch; 5020 5021 StringRef Str = Parser.getTok().getString(); 5022 int Slot = StringSwitch<int>(Str) 5023 .Case("p10", 0) 5024 .Case("p20", 1) 5025 .Case("p0", 2) 5026 .Default(-1); 5027 5028 SMLoc S = Parser.getTok().getLoc(); 5029 if (Slot == -1) 5030 return MatchOperand_ParseFail; 5031 5032 Parser.Lex(); 5033 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5034 AMDGPUOperand::ImmTyInterpSlot)); 5035 return MatchOperand_Success; 5036 } 5037 5038 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5039 if (getLexer().getKind() != AsmToken::Identifier) 5040 return MatchOperand_NoMatch; 5041 5042 StringRef Str = Parser.getTok().getString(); 5043 if (!Str.startswith("attr")) 5044 return MatchOperand_NoMatch; 5045 5046 StringRef Chan = Str.take_back(2); 5047 int AttrChan = StringSwitch<int>(Chan) 5048 .Case(".x", 0) 5049 .Case(".y", 1) 5050 .Case(".z", 2) 5051 .Case(".w", 3) 5052 .Default(-1); 5053 if (AttrChan == -1) 5054 return MatchOperand_ParseFail; 5055 5056 Str = Str.drop_back(2).drop_front(4); 5057 5058 uint8_t Attr; 5059 if (Str.getAsInteger(10, Attr)) 5060 return MatchOperand_ParseFail; 5061 5062 SMLoc S = Parser.getTok().getLoc(); 5063 Parser.Lex(); 5064 if (Attr > 63) { 5065 Error(S, "out of bounds attr"); 5066 return MatchOperand_Success; 5067 } 5068 5069 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5070 5071 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5072 AMDGPUOperand::ImmTyInterpAttr)); 5073 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5074 AMDGPUOperand::ImmTyAttrChan)); 5075 return MatchOperand_Success; 5076 } 5077 5078 //===----------------------------------------------------------------------===// 5079 // exp 5080 //===----------------------------------------------------------------------===// 5081 5082 void AMDGPUAsmParser::errorExpTgt() { 5083 Error(Parser.getTok().getLoc(), "invalid exp target"); 5084 } 5085 5086 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5087 uint8_t &Val) { 5088 if (Str == "null") { 5089 Val = 9; 5090 return MatchOperand_Success; 5091 } 5092 5093 if (Str.startswith("mrt")) { 5094 Str = Str.drop_front(3); 5095 if (Str == "z") { // == mrtz 5096 Val = 8; 5097 return MatchOperand_Success; 5098 } 5099 5100 if (Str.getAsInteger(10, Val)) 5101 return MatchOperand_ParseFail; 5102 5103 if (Val > 7) 5104 errorExpTgt(); 5105 5106 return MatchOperand_Success; 5107 } 5108 5109 if (Str.startswith("pos")) { 5110 Str = Str.drop_front(3); 5111 if (Str.getAsInteger(10, Val)) 5112 return MatchOperand_ParseFail; 5113 5114 if (Val > 4 || (Val == 4 && !isGFX10())) 5115 errorExpTgt(); 5116 5117 Val += 12; 5118 return MatchOperand_Success; 5119 } 5120 5121 if (isGFX10() && Str == "prim") { 5122 Val = 20; 5123 return MatchOperand_Success; 5124 } 5125 5126 if (Str.startswith("param")) { 5127 Str = Str.drop_front(5); 5128 if (Str.getAsInteger(10, Val)) 5129 return MatchOperand_ParseFail; 5130 5131 if (Val >= 32) 5132 errorExpTgt(); 5133 5134 Val += 32; 5135 return MatchOperand_Success; 5136 } 5137 5138 if (Str.startswith("invalid_target_")) { 5139 Str = Str.drop_front(15); 5140 if (Str.getAsInteger(10, Val)) 5141 return MatchOperand_ParseFail; 5142 5143 errorExpTgt(); 5144 return MatchOperand_Success; 5145 } 5146 5147 return MatchOperand_NoMatch; 5148 } 5149 5150 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5151 uint8_t Val; 5152 StringRef Str = Parser.getTok().getString(); 5153 5154 auto Res = parseExpTgtImpl(Str, Val); 5155 if (Res != MatchOperand_Success) 5156 return Res; 5157 5158 SMLoc S = Parser.getTok().getLoc(); 5159 Parser.Lex(); 5160 5161 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5162 AMDGPUOperand::ImmTyExpTgt)); 5163 return MatchOperand_Success; 5164 } 5165 5166 //===----------------------------------------------------------------------===// 5167 // parser helpers 5168 //===----------------------------------------------------------------------===// 5169 5170 bool 5171 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5172 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5173 } 5174 5175 bool 5176 AMDGPUAsmParser::isId(const StringRef Id) const { 5177 return isId(getToken(), Id); 5178 } 5179 5180 bool 5181 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5182 return getTokenKind() == Kind; 5183 } 5184 5185 bool 5186 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5187 if (isId(Id)) { 5188 lex(); 5189 return true; 5190 } 5191 return false; 5192 } 5193 5194 bool 5195 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5196 if (isId(Id) && peekToken().is(Kind)) { 5197 lex(); 5198 lex(); 5199 return true; 5200 } 5201 return false; 5202 } 5203 5204 bool 5205 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5206 if (isToken(Kind)) { 5207 lex(); 5208 return true; 5209 } 5210 return false; 5211 } 5212 5213 bool 5214 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5215 const StringRef ErrMsg) { 5216 if (!trySkipToken(Kind)) { 5217 Error(getLoc(), ErrMsg); 5218 return false; 5219 } 5220 return true; 5221 } 5222 5223 bool 5224 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5225 return !getParser().parseAbsoluteExpression(Imm); 5226 } 5227 5228 bool 5229 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5230 if (isToken(AsmToken::String)) { 5231 Val = getToken().getStringContents(); 5232 lex(); 5233 return true; 5234 } else { 5235 Error(getLoc(), ErrMsg); 5236 return false; 5237 } 5238 } 5239 5240 AsmToken 5241 AMDGPUAsmParser::getToken() const { 5242 return Parser.getTok(); 5243 } 5244 5245 AsmToken 5246 AMDGPUAsmParser::peekToken() { 5247 return getLexer().peekTok(); 5248 } 5249 5250 void 5251 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5252 auto TokCount = getLexer().peekTokens(Tokens); 5253 5254 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5255 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5256 } 5257 5258 AsmToken::TokenKind 5259 AMDGPUAsmParser::getTokenKind() const { 5260 return getLexer().getKind(); 5261 } 5262 5263 SMLoc 5264 AMDGPUAsmParser::getLoc() const { 5265 return getToken().getLoc(); 5266 } 5267 5268 StringRef 5269 AMDGPUAsmParser::getTokenStr() const { 5270 return getToken().getString(); 5271 } 5272 5273 void 5274 AMDGPUAsmParser::lex() { 5275 Parser.Lex(); 5276 } 5277 5278 //===----------------------------------------------------------------------===// 5279 // swizzle 5280 //===----------------------------------------------------------------------===// 5281 5282 LLVM_READNONE 5283 static unsigned 5284 encodeBitmaskPerm(const unsigned AndMask, 5285 const unsigned OrMask, 5286 const unsigned XorMask) { 5287 using namespace llvm::AMDGPU::Swizzle; 5288 5289 return BITMASK_PERM_ENC | 5290 (AndMask << BITMASK_AND_SHIFT) | 5291 (OrMask << BITMASK_OR_SHIFT) | 5292 (XorMask << BITMASK_XOR_SHIFT); 5293 } 5294 5295 bool 5296 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5297 const unsigned MinVal, 5298 const unsigned MaxVal, 5299 const StringRef ErrMsg) { 5300 for (unsigned i = 0; i < OpNum; ++i) { 5301 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5302 return false; 5303 } 5304 SMLoc ExprLoc = Parser.getTok().getLoc(); 5305 if (!parseExpr(Op[i])) { 5306 return false; 5307 } 5308 if (Op[i] < MinVal || Op[i] > MaxVal) { 5309 Error(ExprLoc, ErrMsg); 5310 return false; 5311 } 5312 } 5313 5314 return true; 5315 } 5316 5317 bool 5318 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5319 using namespace llvm::AMDGPU::Swizzle; 5320 5321 int64_t Lane[LANE_NUM]; 5322 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5323 "expected a 2-bit lane id")) { 5324 Imm = QUAD_PERM_ENC; 5325 for (unsigned I = 0; I < LANE_NUM; ++I) { 5326 Imm |= Lane[I] << (LANE_SHIFT * I); 5327 } 5328 return true; 5329 } 5330 return false; 5331 } 5332 5333 bool 5334 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5335 using namespace llvm::AMDGPU::Swizzle; 5336 5337 SMLoc S = Parser.getTok().getLoc(); 5338 int64_t GroupSize; 5339 int64_t LaneIdx; 5340 5341 if (!parseSwizzleOperands(1, &GroupSize, 5342 2, 32, 5343 "group size must be in the interval [2,32]")) { 5344 return false; 5345 } 5346 if (!isPowerOf2_64(GroupSize)) { 5347 Error(S, "group size must be a power of two"); 5348 return false; 5349 } 5350 if (parseSwizzleOperands(1, &LaneIdx, 5351 0, GroupSize - 1, 5352 "lane id must be in the interval [0,group size - 1]")) { 5353 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5354 return true; 5355 } 5356 return false; 5357 } 5358 5359 bool 5360 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5361 using namespace llvm::AMDGPU::Swizzle; 5362 5363 SMLoc S = Parser.getTok().getLoc(); 5364 int64_t GroupSize; 5365 5366 if (!parseSwizzleOperands(1, &GroupSize, 5367 2, 32, "group size must be in the interval [2,32]")) { 5368 return false; 5369 } 5370 if (!isPowerOf2_64(GroupSize)) { 5371 Error(S, "group size must be a power of two"); 5372 return false; 5373 } 5374 5375 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5376 return true; 5377 } 5378 5379 bool 5380 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5381 using namespace llvm::AMDGPU::Swizzle; 5382 5383 SMLoc S = Parser.getTok().getLoc(); 5384 int64_t GroupSize; 5385 5386 if (!parseSwizzleOperands(1, &GroupSize, 5387 1, 16, "group size must be in the interval [1,16]")) { 5388 return false; 5389 } 5390 if (!isPowerOf2_64(GroupSize)) { 5391 Error(S, "group size must be a power of two"); 5392 return false; 5393 } 5394 5395 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5396 return true; 5397 } 5398 5399 bool 5400 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5401 using namespace llvm::AMDGPU::Swizzle; 5402 5403 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5404 return false; 5405 } 5406 5407 StringRef Ctl; 5408 SMLoc StrLoc = Parser.getTok().getLoc(); 5409 if (!parseString(Ctl)) { 5410 return false; 5411 } 5412 if (Ctl.size() != BITMASK_WIDTH) { 5413 Error(StrLoc, "expected a 5-character mask"); 5414 return false; 5415 } 5416 5417 unsigned AndMask = 0; 5418 unsigned OrMask = 0; 5419 unsigned XorMask = 0; 5420 5421 for (size_t i = 0; i < Ctl.size(); ++i) { 5422 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5423 switch(Ctl[i]) { 5424 default: 5425 Error(StrLoc, "invalid mask"); 5426 return false; 5427 case '0': 5428 break; 5429 case '1': 5430 OrMask |= Mask; 5431 break; 5432 case 'p': 5433 AndMask |= Mask; 5434 break; 5435 case 'i': 5436 AndMask |= Mask; 5437 XorMask |= Mask; 5438 break; 5439 } 5440 } 5441 5442 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5443 return true; 5444 } 5445 5446 bool 5447 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5448 5449 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5450 5451 if (!parseExpr(Imm)) { 5452 return false; 5453 } 5454 if (!isUInt<16>(Imm)) { 5455 Error(OffsetLoc, "expected a 16-bit offset"); 5456 return false; 5457 } 5458 return true; 5459 } 5460 5461 bool 5462 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5463 using namespace llvm::AMDGPU::Swizzle; 5464 5465 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5466 5467 SMLoc ModeLoc = Parser.getTok().getLoc(); 5468 bool Ok = false; 5469 5470 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5471 Ok = parseSwizzleQuadPerm(Imm); 5472 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5473 Ok = parseSwizzleBitmaskPerm(Imm); 5474 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5475 Ok = parseSwizzleBroadcast(Imm); 5476 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5477 Ok = parseSwizzleSwap(Imm); 5478 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5479 Ok = parseSwizzleReverse(Imm); 5480 } else { 5481 Error(ModeLoc, "expected a swizzle mode"); 5482 } 5483 5484 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5485 } 5486 5487 return false; 5488 } 5489 5490 OperandMatchResultTy 5491 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5492 SMLoc S = Parser.getTok().getLoc(); 5493 int64_t Imm = 0; 5494 5495 if (trySkipId("offset")) { 5496 5497 bool Ok = false; 5498 if (skipToken(AsmToken::Colon, "expected a colon")) { 5499 if (trySkipId("swizzle")) { 5500 Ok = parseSwizzleMacro(Imm); 5501 } else { 5502 Ok = parseSwizzleOffset(Imm); 5503 } 5504 } 5505 5506 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5507 5508 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5509 } else { 5510 // Swizzle "offset" operand is optional. 5511 // If it is omitted, try parsing other optional operands. 5512 return parseOptionalOpr(Operands); 5513 } 5514 } 5515 5516 bool 5517 AMDGPUOperand::isSwizzle() const { 5518 return isImmTy(ImmTySwizzle); 5519 } 5520 5521 //===----------------------------------------------------------------------===// 5522 // VGPR Index Mode 5523 //===----------------------------------------------------------------------===// 5524 5525 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5526 5527 using namespace llvm::AMDGPU::VGPRIndexMode; 5528 5529 if (trySkipToken(AsmToken::RParen)) { 5530 return OFF; 5531 } 5532 5533 int64_t Imm = 0; 5534 5535 while (true) { 5536 unsigned Mode = 0; 5537 SMLoc S = Parser.getTok().getLoc(); 5538 5539 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5540 if (trySkipId(IdSymbolic[ModeId])) { 5541 Mode = 1 << ModeId; 5542 break; 5543 } 5544 } 5545 5546 if (Mode == 0) { 5547 Error(S, (Imm == 0)? 5548 "expected a VGPR index mode or a closing parenthesis" : 5549 "expected a VGPR index mode"); 5550 break; 5551 } 5552 5553 if (Imm & Mode) { 5554 Error(S, "duplicate VGPR index mode"); 5555 break; 5556 } 5557 Imm |= Mode; 5558 5559 if (trySkipToken(AsmToken::RParen)) 5560 break; 5561 if (!skipToken(AsmToken::Comma, 5562 "expected a comma or a closing parenthesis")) 5563 break; 5564 } 5565 5566 return Imm; 5567 } 5568 5569 OperandMatchResultTy 5570 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5571 5572 int64_t Imm = 0; 5573 SMLoc S = Parser.getTok().getLoc(); 5574 5575 if (getLexer().getKind() == AsmToken::Identifier && 5576 Parser.getTok().getString() == "gpr_idx" && 5577 getLexer().peekTok().is(AsmToken::LParen)) { 5578 5579 Parser.Lex(); 5580 Parser.Lex(); 5581 5582 // If parse failed, trigger an error but do not return error code 5583 // to avoid excessive error messages. 5584 Imm = parseGPRIdxMacro(); 5585 5586 } else { 5587 if (getParser().parseAbsoluteExpression(Imm)) 5588 return MatchOperand_NoMatch; 5589 if (Imm < 0 || !isUInt<4>(Imm)) { 5590 Error(S, "invalid immediate: only 4-bit values are legal"); 5591 } 5592 } 5593 5594 Operands.push_back( 5595 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5596 return MatchOperand_Success; 5597 } 5598 5599 bool AMDGPUOperand::isGPRIdxMode() const { 5600 return isImmTy(ImmTyGprIdxMode); 5601 } 5602 5603 //===----------------------------------------------------------------------===// 5604 // sopp branch targets 5605 //===----------------------------------------------------------------------===// 5606 5607 OperandMatchResultTy 5608 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5609 SMLoc S = Parser.getTok().getLoc(); 5610 5611 switch (getLexer().getKind()) { 5612 default: return MatchOperand_ParseFail; 5613 case AsmToken::Integer: { 5614 int64_t Imm; 5615 if (getParser().parseAbsoluteExpression(Imm)) 5616 return MatchOperand_ParseFail; 5617 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5618 return MatchOperand_Success; 5619 } 5620 5621 case AsmToken::Identifier: 5622 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5623 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5624 Parser.getTok().getString()), getContext()), S)); 5625 Parser.Lex(); 5626 return MatchOperand_Success; 5627 } 5628 } 5629 5630 //===----------------------------------------------------------------------===// 5631 // Boolean holding registers 5632 //===----------------------------------------------------------------------===// 5633 5634 OperandMatchResultTy 5635 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5636 return parseReg(Operands); 5637 } 5638 5639 //===----------------------------------------------------------------------===// 5640 // mubuf 5641 //===----------------------------------------------------------------------===// 5642 5643 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5644 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5645 } 5646 5647 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5648 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5649 } 5650 5651 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5652 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5653 } 5654 5655 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5656 const OperandVector &Operands, 5657 bool IsAtomic, 5658 bool IsAtomicReturn, 5659 bool IsLds) { 5660 bool IsLdsOpcode = IsLds; 5661 bool HasLdsModifier = false; 5662 OptionalImmIndexMap OptionalIdx; 5663 assert(IsAtomicReturn ? IsAtomic : true); 5664 unsigned FirstOperandIdx = 1; 5665 5666 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5667 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5668 5669 // Add the register arguments 5670 if (Op.isReg()) { 5671 Op.addRegOperands(Inst, 1); 5672 // Insert a tied src for atomic return dst. 5673 // This cannot be postponed as subsequent calls to 5674 // addImmOperands rely on correct number of MC operands. 5675 if (IsAtomicReturn && i == FirstOperandIdx) 5676 Op.addRegOperands(Inst, 1); 5677 continue; 5678 } 5679 5680 // Handle the case where soffset is an immediate 5681 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5682 Op.addImmOperands(Inst, 1); 5683 continue; 5684 } 5685 5686 HasLdsModifier |= Op.isLDS(); 5687 5688 // Handle tokens like 'offen' which are sometimes hard-coded into the 5689 // asm string. There are no MCInst operands for these. 5690 if (Op.isToken()) { 5691 continue; 5692 } 5693 assert(Op.isImm()); 5694 5695 // Handle optional arguments 5696 OptionalIdx[Op.getImmTy()] = i; 5697 } 5698 5699 // This is a workaround for an llvm quirk which may result in an 5700 // incorrect instruction selection. Lds and non-lds versions of 5701 // MUBUF instructions are identical except that lds versions 5702 // have mandatory 'lds' modifier. However this modifier follows 5703 // optional modifiers and llvm asm matcher regards this 'lds' 5704 // modifier as an optional one. As a result, an lds version 5705 // of opcode may be selected even if it has no 'lds' modifier. 5706 if (IsLdsOpcode && !HasLdsModifier) { 5707 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5708 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5709 Inst.setOpcode(NoLdsOpcode); 5710 IsLdsOpcode = false; 5711 } 5712 } 5713 5714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5715 if (!IsAtomic) { // glc is hard-coded. 5716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5717 } 5718 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5719 5720 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5721 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5722 } 5723 5724 if (isGFX10()) 5725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5726 } 5727 5728 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5729 OptionalImmIndexMap OptionalIdx; 5730 5731 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5732 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5733 5734 // Add the register arguments 5735 if (Op.isReg()) { 5736 Op.addRegOperands(Inst, 1); 5737 continue; 5738 } 5739 5740 // Handle the case where soffset is an immediate 5741 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5742 Op.addImmOperands(Inst, 1); 5743 continue; 5744 } 5745 5746 // Handle tokens like 'offen' which are sometimes hard-coded into the 5747 // asm string. There are no MCInst operands for these. 5748 if (Op.isToken()) { 5749 continue; 5750 } 5751 assert(Op.isImm()); 5752 5753 // Handle optional arguments 5754 OptionalIdx[Op.getImmTy()] = i; 5755 } 5756 5757 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5758 AMDGPUOperand::ImmTyOffset); 5759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5763 5764 if (isGFX10()) 5765 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5766 } 5767 5768 //===----------------------------------------------------------------------===// 5769 // mimg 5770 //===----------------------------------------------------------------------===// 5771 5772 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5773 bool IsAtomic) { 5774 unsigned I = 1; 5775 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5776 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5777 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5778 } 5779 5780 if (IsAtomic) { 5781 // Add src, same as dst 5782 assert(Desc.getNumDefs() == 1); 5783 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5784 } 5785 5786 OptionalImmIndexMap OptionalIdx; 5787 5788 for (unsigned E = Operands.size(); I != E; ++I) { 5789 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5790 5791 // Add the register arguments 5792 if (Op.isReg()) { 5793 Op.addRegOperands(Inst, 1); 5794 } else if (Op.isImmModifier()) { 5795 OptionalIdx[Op.getImmTy()] = I; 5796 } else if (!Op.isToken()) { 5797 llvm_unreachable("unexpected operand type"); 5798 } 5799 } 5800 5801 bool IsGFX10 = isGFX10(); 5802 5803 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5804 if (IsGFX10) 5805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5807 if (IsGFX10) 5808 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5809 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5814 if (!IsGFX10) 5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5817 } 5818 5819 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5820 cvtMIMG(Inst, Operands, true); 5821 } 5822 5823 //===----------------------------------------------------------------------===// 5824 // smrd 5825 //===----------------------------------------------------------------------===// 5826 5827 bool AMDGPUOperand::isSMRDOffset8() const { 5828 return isImm() && isUInt<8>(getImm()); 5829 } 5830 5831 bool AMDGPUOperand::isSMRDOffset20() const { 5832 return isImm() && isUInt<20>(getImm()); 5833 } 5834 5835 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5836 // 32-bit literals are only supported on CI and we only want to use them 5837 // when the offset is > 8-bits. 5838 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5839 } 5840 5841 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5842 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5843 } 5844 5845 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5846 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5847 } 5848 5849 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5850 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5851 } 5852 5853 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5854 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5855 } 5856 5857 //===----------------------------------------------------------------------===// 5858 // vop3 5859 //===----------------------------------------------------------------------===// 5860 5861 static bool ConvertOmodMul(int64_t &Mul) { 5862 if (Mul != 1 && Mul != 2 && Mul != 4) 5863 return false; 5864 5865 Mul >>= 1; 5866 return true; 5867 } 5868 5869 static bool ConvertOmodDiv(int64_t &Div) { 5870 if (Div == 1) { 5871 Div = 0; 5872 return true; 5873 } 5874 5875 if (Div == 2) { 5876 Div = 3; 5877 return true; 5878 } 5879 5880 return false; 5881 } 5882 5883 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5884 if (BoundCtrl == 0) { 5885 BoundCtrl = 1; 5886 return true; 5887 } 5888 5889 if (BoundCtrl == -1) { 5890 BoundCtrl = 0; 5891 return true; 5892 } 5893 5894 return false; 5895 } 5896 5897 // Note: the order in this table matches the order of operands in AsmString. 5898 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5899 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5900 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5901 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5902 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5903 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5904 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5905 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5906 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5907 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5908 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5909 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5910 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5911 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5912 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5913 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5914 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5915 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5916 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5917 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5918 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5919 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5920 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5921 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5922 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5923 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5924 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5925 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5926 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5927 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5928 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5929 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5930 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5931 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5932 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5933 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5934 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5935 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5936 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5937 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5938 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 5939 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 5940 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 5941 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 5942 }; 5943 5944 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5945 unsigned size = Operands.size(); 5946 assert(size > 0); 5947 5948 OperandMatchResultTy res = parseOptionalOpr(Operands); 5949 5950 // This is a hack to enable hardcoded mandatory operands which follow 5951 // optional operands. 5952 // 5953 // Current design assumes that all operands after the first optional operand 5954 // are also optional. However implementation of some instructions violates 5955 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5956 // 5957 // To alleviate this problem, we have to (implicitly) parse extra operands 5958 // to make sure autogenerated parser of custom operands never hit hardcoded 5959 // mandatory operands. 5960 5961 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5962 5963 // We have parsed the first optional operand. 5964 // Parse as many operands as necessary to skip all mandatory operands. 5965 5966 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5967 if (res != MatchOperand_Success || 5968 getLexer().is(AsmToken::EndOfStatement)) break; 5969 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5970 res = parseOptionalOpr(Operands); 5971 } 5972 } 5973 5974 return res; 5975 } 5976 5977 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5978 OperandMatchResultTy res; 5979 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5980 // try to parse any optional operand here 5981 if (Op.IsBit) { 5982 res = parseNamedBit(Op.Name, Operands, Op.Type); 5983 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5984 res = parseOModOperand(Operands); 5985 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5986 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5987 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5988 res = parseSDWASel(Operands, Op.Name, Op.Type); 5989 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5990 res = parseSDWADstUnused(Operands); 5991 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5992 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5993 Op.Type == AMDGPUOperand::ImmTyNegLo || 5994 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5995 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5996 Op.ConvertResult); 5997 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 5998 res = parseDim(Operands); 5999 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6000 res = parseDfmtNfmt(Operands); 6001 } else { 6002 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6003 } 6004 if (res != MatchOperand_NoMatch) { 6005 return res; 6006 } 6007 } 6008 return MatchOperand_NoMatch; 6009 } 6010 6011 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6012 StringRef Name = Parser.getTok().getString(); 6013 if (Name == "mul") { 6014 return parseIntWithPrefix("mul", Operands, 6015 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6016 } 6017 6018 if (Name == "div") { 6019 return parseIntWithPrefix("div", Operands, 6020 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6021 } 6022 6023 return MatchOperand_NoMatch; 6024 } 6025 6026 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6027 cvtVOP3P(Inst, Operands); 6028 6029 int Opc = Inst.getOpcode(); 6030 6031 int SrcNum; 6032 const int Ops[] = { AMDGPU::OpName::src0, 6033 AMDGPU::OpName::src1, 6034 AMDGPU::OpName::src2 }; 6035 for (SrcNum = 0; 6036 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6037 ++SrcNum); 6038 assert(SrcNum > 0); 6039 6040 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6041 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6042 6043 if ((OpSel & (1 << SrcNum)) != 0) { 6044 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6045 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6046 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6047 } 6048 } 6049 6050 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6051 // 1. This operand is input modifiers 6052 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6053 // 2. This is not last operand 6054 && Desc.NumOperands > (OpNum + 1) 6055 // 3. Next operand is register class 6056 && Desc.OpInfo[OpNum + 1].RegClass != -1 6057 // 4. Next register is not tied to any other operand 6058 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6059 } 6060 6061 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6062 { 6063 OptionalImmIndexMap OptionalIdx; 6064 unsigned Opc = Inst.getOpcode(); 6065 6066 unsigned I = 1; 6067 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6068 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6069 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6070 } 6071 6072 for (unsigned E = Operands.size(); I != E; ++I) { 6073 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6074 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6075 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6076 } else if (Op.isInterpSlot() || 6077 Op.isInterpAttr() || 6078 Op.isAttrChan()) { 6079 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6080 } else if (Op.isImmModifier()) { 6081 OptionalIdx[Op.getImmTy()] = I; 6082 } else { 6083 llvm_unreachable("unhandled operand type"); 6084 } 6085 } 6086 6087 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6089 } 6090 6091 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6092 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6093 } 6094 6095 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6097 } 6098 } 6099 6100 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6101 OptionalImmIndexMap &OptionalIdx) { 6102 unsigned Opc = Inst.getOpcode(); 6103 6104 unsigned I = 1; 6105 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6106 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6107 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6108 } 6109 6110 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6111 // This instruction has src modifiers 6112 for (unsigned E = Operands.size(); I != E; ++I) { 6113 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6114 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6115 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6116 } else if (Op.isImmModifier()) { 6117 OptionalIdx[Op.getImmTy()] = I; 6118 } else if (Op.isRegOrImm()) { 6119 Op.addRegOrImmOperands(Inst, 1); 6120 } else { 6121 llvm_unreachable("unhandled operand type"); 6122 } 6123 } 6124 } else { 6125 // No src modifiers 6126 for (unsigned E = Operands.size(); I != E; ++I) { 6127 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6128 if (Op.isMod()) { 6129 OptionalIdx[Op.getImmTy()] = I; 6130 } else { 6131 Op.addRegOrImmOperands(Inst, 1); 6132 } 6133 } 6134 } 6135 6136 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6138 } 6139 6140 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6141 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6142 } 6143 6144 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6145 // it has src2 register operand that is tied to dst operand 6146 // we don't allow modifiers for this operand in assembler so src2_modifiers 6147 // should be 0. 6148 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6149 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6150 Opc == AMDGPU::V_MAC_F32_e64_vi || 6151 Opc == AMDGPU::V_MAC_F16_e64_vi || 6152 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6153 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6154 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6155 auto it = Inst.begin(); 6156 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6157 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6158 ++it; 6159 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6160 } 6161 } 6162 6163 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6164 OptionalImmIndexMap OptionalIdx; 6165 cvtVOP3(Inst, Operands, OptionalIdx); 6166 } 6167 6168 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6169 const OperandVector &Operands) { 6170 OptionalImmIndexMap OptIdx; 6171 const int Opc = Inst.getOpcode(); 6172 const MCInstrDesc &Desc = MII.get(Opc); 6173 6174 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6175 6176 cvtVOP3(Inst, Operands, OptIdx); 6177 6178 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6179 assert(!IsPacked); 6180 Inst.addOperand(Inst.getOperand(0)); 6181 } 6182 6183 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6184 // instruction, and then figure out where to actually put the modifiers 6185 6186 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6187 6188 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6189 if (OpSelHiIdx != -1) { 6190 int DefaultVal = IsPacked ? -1 : 0; 6191 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6192 DefaultVal); 6193 } 6194 6195 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6196 if (NegLoIdx != -1) { 6197 assert(IsPacked); 6198 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6199 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6200 } 6201 6202 const int Ops[] = { AMDGPU::OpName::src0, 6203 AMDGPU::OpName::src1, 6204 AMDGPU::OpName::src2 }; 6205 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6206 AMDGPU::OpName::src1_modifiers, 6207 AMDGPU::OpName::src2_modifiers }; 6208 6209 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6210 6211 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6212 unsigned OpSelHi = 0; 6213 unsigned NegLo = 0; 6214 unsigned NegHi = 0; 6215 6216 if (OpSelHiIdx != -1) { 6217 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6218 } 6219 6220 if (NegLoIdx != -1) { 6221 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6222 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6223 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6224 } 6225 6226 for (int J = 0; J < 3; ++J) { 6227 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6228 if (OpIdx == -1) 6229 break; 6230 6231 uint32_t ModVal = 0; 6232 6233 if ((OpSel & (1 << J)) != 0) 6234 ModVal |= SISrcMods::OP_SEL_0; 6235 6236 if ((OpSelHi & (1 << J)) != 0) 6237 ModVal |= SISrcMods::OP_SEL_1; 6238 6239 if ((NegLo & (1 << J)) != 0) 6240 ModVal |= SISrcMods::NEG; 6241 6242 if ((NegHi & (1 << J)) != 0) 6243 ModVal |= SISrcMods::NEG_HI; 6244 6245 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6246 6247 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6248 } 6249 } 6250 6251 //===----------------------------------------------------------------------===// 6252 // dpp 6253 //===----------------------------------------------------------------------===// 6254 6255 bool AMDGPUOperand::isDPP8() const { 6256 return isImmTy(ImmTyDPP8); 6257 } 6258 6259 bool AMDGPUOperand::isDPPCtrl() const { 6260 using namespace AMDGPU::DPP; 6261 6262 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6263 if (result) { 6264 int64_t Imm = getImm(); 6265 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6266 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6267 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6268 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6269 (Imm == DppCtrl::WAVE_SHL1) || 6270 (Imm == DppCtrl::WAVE_ROL1) || 6271 (Imm == DppCtrl::WAVE_SHR1) || 6272 (Imm == DppCtrl::WAVE_ROR1) || 6273 (Imm == DppCtrl::ROW_MIRROR) || 6274 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6275 (Imm == DppCtrl::BCAST15) || 6276 (Imm == DppCtrl::BCAST31) || 6277 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6278 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6279 } 6280 return false; 6281 } 6282 6283 //===----------------------------------------------------------------------===// 6284 // mAI 6285 //===----------------------------------------------------------------------===// 6286 6287 bool AMDGPUOperand::isBLGP() const { 6288 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6289 } 6290 6291 bool AMDGPUOperand::isCBSZ() const { 6292 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6293 } 6294 6295 bool AMDGPUOperand::isABID() const { 6296 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6297 } 6298 6299 bool AMDGPUOperand::isS16Imm() const { 6300 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6301 } 6302 6303 bool AMDGPUOperand::isU16Imm() const { 6304 return isImm() && isUInt<16>(getImm()); 6305 } 6306 6307 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6308 if (!isGFX10()) 6309 return MatchOperand_NoMatch; 6310 6311 SMLoc S = Parser.getTok().getLoc(); 6312 6313 if (getLexer().isNot(AsmToken::Identifier)) 6314 return MatchOperand_NoMatch; 6315 if (getLexer().getTok().getString() != "dim") 6316 return MatchOperand_NoMatch; 6317 6318 Parser.Lex(); 6319 if (getLexer().isNot(AsmToken::Colon)) 6320 return MatchOperand_ParseFail; 6321 6322 Parser.Lex(); 6323 6324 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6325 // integer. 6326 std::string Token; 6327 if (getLexer().is(AsmToken::Integer)) { 6328 SMLoc Loc = getLexer().getTok().getEndLoc(); 6329 Token = getLexer().getTok().getString(); 6330 Parser.Lex(); 6331 if (getLexer().getTok().getLoc() != Loc) 6332 return MatchOperand_ParseFail; 6333 } 6334 if (getLexer().isNot(AsmToken::Identifier)) 6335 return MatchOperand_ParseFail; 6336 Token += getLexer().getTok().getString(); 6337 6338 StringRef DimId = Token; 6339 if (DimId.startswith("SQ_RSRC_IMG_")) 6340 DimId = DimId.substr(12); 6341 6342 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6343 if (!DimInfo) 6344 return MatchOperand_ParseFail; 6345 6346 Parser.Lex(); 6347 6348 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6349 AMDGPUOperand::ImmTyDim)); 6350 return MatchOperand_Success; 6351 } 6352 6353 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6354 SMLoc S = Parser.getTok().getLoc(); 6355 StringRef Prefix; 6356 6357 if (getLexer().getKind() == AsmToken::Identifier) { 6358 Prefix = Parser.getTok().getString(); 6359 } else { 6360 return MatchOperand_NoMatch; 6361 } 6362 6363 if (Prefix != "dpp8") 6364 return parseDPPCtrl(Operands); 6365 if (!isGFX10()) 6366 return MatchOperand_NoMatch; 6367 6368 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6369 6370 int64_t Sels[8]; 6371 6372 Parser.Lex(); 6373 if (getLexer().isNot(AsmToken::Colon)) 6374 return MatchOperand_ParseFail; 6375 6376 Parser.Lex(); 6377 if (getLexer().isNot(AsmToken::LBrac)) 6378 return MatchOperand_ParseFail; 6379 6380 Parser.Lex(); 6381 if (getParser().parseAbsoluteExpression(Sels[0])) 6382 return MatchOperand_ParseFail; 6383 if (0 > Sels[0] || 7 < Sels[0]) 6384 return MatchOperand_ParseFail; 6385 6386 for (size_t i = 1; i < 8; ++i) { 6387 if (getLexer().isNot(AsmToken::Comma)) 6388 return MatchOperand_ParseFail; 6389 6390 Parser.Lex(); 6391 if (getParser().parseAbsoluteExpression(Sels[i])) 6392 return MatchOperand_ParseFail; 6393 if (0 > Sels[i] || 7 < Sels[i]) 6394 return MatchOperand_ParseFail; 6395 } 6396 6397 if (getLexer().isNot(AsmToken::RBrac)) 6398 return MatchOperand_ParseFail; 6399 Parser.Lex(); 6400 6401 unsigned DPP8 = 0; 6402 for (size_t i = 0; i < 8; ++i) 6403 DPP8 |= (Sels[i] << (i * 3)); 6404 6405 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6406 return MatchOperand_Success; 6407 } 6408 6409 OperandMatchResultTy 6410 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6411 using namespace AMDGPU::DPP; 6412 6413 SMLoc S = Parser.getTok().getLoc(); 6414 StringRef Prefix; 6415 int64_t Int; 6416 6417 if (getLexer().getKind() == AsmToken::Identifier) { 6418 Prefix = Parser.getTok().getString(); 6419 } else { 6420 return MatchOperand_NoMatch; 6421 } 6422 6423 if (Prefix == "row_mirror") { 6424 Int = DppCtrl::ROW_MIRROR; 6425 Parser.Lex(); 6426 } else if (Prefix == "row_half_mirror") { 6427 Int = DppCtrl::ROW_HALF_MIRROR; 6428 Parser.Lex(); 6429 } else { 6430 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6431 if (Prefix != "quad_perm" 6432 && Prefix != "row_shl" 6433 && Prefix != "row_shr" 6434 && Prefix != "row_ror" 6435 && Prefix != "wave_shl" 6436 && Prefix != "wave_rol" 6437 && Prefix != "wave_shr" 6438 && Prefix != "wave_ror" 6439 && Prefix != "row_bcast" 6440 && Prefix != "row_share" 6441 && Prefix != "row_xmask") { 6442 return MatchOperand_NoMatch; 6443 } 6444 6445 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6446 return MatchOperand_NoMatch; 6447 6448 if (!isVI() && !isGFX9() && 6449 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6450 Prefix == "wave_rol" || Prefix == "wave_ror" || 6451 Prefix == "row_bcast")) 6452 return MatchOperand_NoMatch; 6453 6454 Parser.Lex(); 6455 if (getLexer().isNot(AsmToken::Colon)) 6456 return MatchOperand_ParseFail; 6457 6458 if (Prefix == "quad_perm") { 6459 // quad_perm:[%d,%d,%d,%d] 6460 Parser.Lex(); 6461 if (getLexer().isNot(AsmToken::LBrac)) 6462 return MatchOperand_ParseFail; 6463 Parser.Lex(); 6464 6465 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6466 return MatchOperand_ParseFail; 6467 6468 for (int i = 0; i < 3; ++i) { 6469 if (getLexer().isNot(AsmToken::Comma)) 6470 return MatchOperand_ParseFail; 6471 Parser.Lex(); 6472 6473 int64_t Temp; 6474 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6475 return MatchOperand_ParseFail; 6476 const int shift = i*2 + 2; 6477 Int += (Temp << shift); 6478 } 6479 6480 if (getLexer().isNot(AsmToken::RBrac)) 6481 return MatchOperand_ParseFail; 6482 Parser.Lex(); 6483 } else { 6484 // sel:%d 6485 Parser.Lex(); 6486 if (getParser().parseAbsoluteExpression(Int)) 6487 return MatchOperand_ParseFail; 6488 6489 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6490 Int |= DppCtrl::ROW_SHL0; 6491 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6492 Int |= DppCtrl::ROW_SHR0; 6493 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6494 Int |= DppCtrl::ROW_ROR0; 6495 } else if (Prefix == "wave_shl" && 1 == Int) { 6496 Int = DppCtrl::WAVE_SHL1; 6497 } else if (Prefix == "wave_rol" && 1 == Int) { 6498 Int = DppCtrl::WAVE_ROL1; 6499 } else if (Prefix == "wave_shr" && 1 == Int) { 6500 Int = DppCtrl::WAVE_SHR1; 6501 } else if (Prefix == "wave_ror" && 1 == Int) { 6502 Int = DppCtrl::WAVE_ROR1; 6503 } else if (Prefix == "row_bcast") { 6504 if (Int == 15) { 6505 Int = DppCtrl::BCAST15; 6506 } else if (Int == 31) { 6507 Int = DppCtrl::BCAST31; 6508 } else { 6509 return MatchOperand_ParseFail; 6510 } 6511 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6512 Int |= DppCtrl::ROW_SHARE_FIRST; 6513 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6514 Int |= DppCtrl::ROW_XMASK_FIRST; 6515 } else { 6516 return MatchOperand_ParseFail; 6517 } 6518 } 6519 } 6520 6521 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6522 return MatchOperand_Success; 6523 } 6524 6525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6526 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6527 } 6528 6529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6530 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6531 } 6532 6533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6534 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6535 } 6536 6537 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6538 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6539 } 6540 6541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6542 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6543 } 6544 6545 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6546 OptionalImmIndexMap OptionalIdx; 6547 6548 unsigned I = 1; 6549 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6550 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6551 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6552 } 6553 6554 int Fi = 0; 6555 for (unsigned E = Operands.size(); I != E; ++I) { 6556 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6557 MCOI::TIED_TO); 6558 if (TiedTo != -1) { 6559 assert((unsigned)TiedTo < Inst.getNumOperands()); 6560 // handle tied old or src2 for MAC instructions 6561 Inst.addOperand(Inst.getOperand(TiedTo)); 6562 } 6563 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6564 // Add the register arguments 6565 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6566 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6567 // Skip it. 6568 continue; 6569 } 6570 6571 if (IsDPP8) { 6572 if (Op.isDPP8()) { 6573 Op.addImmOperands(Inst, 1); 6574 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6575 Op.addRegWithFPInputModsOperands(Inst, 2); 6576 } else if (Op.isFI()) { 6577 Fi = Op.getImm(); 6578 } else if (Op.isReg()) { 6579 Op.addRegOperands(Inst, 1); 6580 } else { 6581 llvm_unreachable("Invalid operand type"); 6582 } 6583 } else { 6584 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6585 Op.addRegWithFPInputModsOperands(Inst, 2); 6586 } else if (Op.isDPPCtrl()) { 6587 Op.addImmOperands(Inst, 1); 6588 } else if (Op.isImm()) { 6589 // Handle optional arguments 6590 OptionalIdx[Op.getImmTy()] = I; 6591 } else { 6592 llvm_unreachable("Invalid operand type"); 6593 } 6594 } 6595 } 6596 6597 if (IsDPP8) { 6598 using namespace llvm::AMDGPU::DPP; 6599 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6600 } else { 6601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6604 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6606 } 6607 } 6608 } 6609 6610 //===----------------------------------------------------------------------===// 6611 // sdwa 6612 //===----------------------------------------------------------------------===// 6613 6614 OperandMatchResultTy 6615 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6616 AMDGPUOperand::ImmTy Type) { 6617 using namespace llvm::AMDGPU::SDWA; 6618 6619 SMLoc S = Parser.getTok().getLoc(); 6620 StringRef Value; 6621 OperandMatchResultTy res; 6622 6623 res = parseStringWithPrefix(Prefix, Value); 6624 if (res != MatchOperand_Success) { 6625 return res; 6626 } 6627 6628 int64_t Int; 6629 Int = StringSwitch<int64_t>(Value) 6630 .Case("BYTE_0", SdwaSel::BYTE_0) 6631 .Case("BYTE_1", SdwaSel::BYTE_1) 6632 .Case("BYTE_2", SdwaSel::BYTE_2) 6633 .Case("BYTE_3", SdwaSel::BYTE_3) 6634 .Case("WORD_0", SdwaSel::WORD_0) 6635 .Case("WORD_1", SdwaSel::WORD_1) 6636 .Case("DWORD", SdwaSel::DWORD) 6637 .Default(0xffffffff); 6638 Parser.Lex(); // eat last token 6639 6640 if (Int == 0xffffffff) { 6641 return MatchOperand_ParseFail; 6642 } 6643 6644 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6645 return MatchOperand_Success; 6646 } 6647 6648 OperandMatchResultTy 6649 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6650 using namespace llvm::AMDGPU::SDWA; 6651 6652 SMLoc S = Parser.getTok().getLoc(); 6653 StringRef Value; 6654 OperandMatchResultTy res; 6655 6656 res = parseStringWithPrefix("dst_unused", Value); 6657 if (res != MatchOperand_Success) { 6658 return res; 6659 } 6660 6661 int64_t Int; 6662 Int = StringSwitch<int64_t>(Value) 6663 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6664 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6665 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6666 .Default(0xffffffff); 6667 Parser.Lex(); // eat last token 6668 6669 if (Int == 0xffffffff) { 6670 return MatchOperand_ParseFail; 6671 } 6672 6673 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6674 return MatchOperand_Success; 6675 } 6676 6677 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6678 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6679 } 6680 6681 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6682 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6683 } 6684 6685 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6686 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6687 } 6688 6689 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6690 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6691 } 6692 6693 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6694 uint64_t BasicInstType, bool skipVcc) { 6695 using namespace llvm::AMDGPU::SDWA; 6696 6697 OptionalImmIndexMap OptionalIdx; 6698 bool skippedVcc = false; 6699 6700 unsigned I = 1; 6701 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6702 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6703 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6704 } 6705 6706 for (unsigned E = Operands.size(); I != E; ++I) { 6707 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6708 if (skipVcc && !skippedVcc && Op.isReg() && 6709 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6710 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6711 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6712 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6713 // Skip VCC only if we didn't skip it on previous iteration. 6714 if (BasicInstType == SIInstrFlags::VOP2 && 6715 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6716 skippedVcc = true; 6717 continue; 6718 } else if (BasicInstType == SIInstrFlags::VOPC && 6719 Inst.getNumOperands() == 0) { 6720 skippedVcc = true; 6721 continue; 6722 } 6723 } 6724 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6725 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6726 } else if (Op.isImm()) { 6727 // Handle optional arguments 6728 OptionalIdx[Op.getImmTy()] = I; 6729 } else { 6730 llvm_unreachable("Invalid operand type"); 6731 } 6732 skippedVcc = false; 6733 } 6734 6735 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6736 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6737 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6738 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6739 switch (BasicInstType) { 6740 case SIInstrFlags::VOP1: 6741 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6742 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6744 } 6745 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6748 break; 6749 6750 case SIInstrFlags::VOP2: 6751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6752 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6753 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6754 } 6755 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6758 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6759 break; 6760 6761 case SIInstrFlags::VOPC: 6762 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6765 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6766 break; 6767 6768 default: 6769 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6770 } 6771 } 6772 6773 // special case v_mac_{f16, f32}: 6774 // it has src2 register operand that is tied to dst operand 6775 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6776 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6777 auto it = Inst.begin(); 6778 std::advance( 6779 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6780 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6781 } 6782 } 6783 6784 //===----------------------------------------------------------------------===// 6785 // mAI 6786 //===----------------------------------------------------------------------===// 6787 6788 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6789 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6790 } 6791 6792 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6793 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6794 } 6795 6796 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6797 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6798 } 6799 6800 /// Force static initialization. 6801 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6802 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6803 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6804 } 6805 6806 #define GET_REGISTER_MATCHER 6807 #define GET_MATCHER_IMPLEMENTATION 6808 #define GET_MNEMONIC_SPELL_CHECKER 6809 #include "AMDGPUGenAsmMatcher.inc" 6810 6811 // This fuction should be defined after auto-generated include so that we have 6812 // MatchClassKind enum defined 6813 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6814 unsigned Kind) { 6815 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6816 // But MatchInstructionImpl() expects to meet token and fails to validate 6817 // operand. This method checks if we are given immediate operand but expect to 6818 // get corresponding token. 6819 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6820 switch (Kind) { 6821 case MCK_addr64: 6822 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6823 case MCK_gds: 6824 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6825 case MCK_lds: 6826 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6827 case MCK_glc: 6828 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6829 case MCK_idxen: 6830 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6831 case MCK_offen: 6832 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6833 case MCK_SSrcB32: 6834 // When operands have expression values, they will return true for isToken, 6835 // because it is not possible to distinguish between a token and an 6836 // expression at parse time. MatchInstructionImpl() will always try to 6837 // match an operand as a token, when isToken returns true, and when the 6838 // name of the expression is not a valid token, the match will fail, 6839 // so we need to handle it here. 6840 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6841 case MCK_SSrcF32: 6842 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6843 case MCK_SoppBrTarget: 6844 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6845 case MCK_VReg32OrOff: 6846 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6847 case MCK_InterpSlot: 6848 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6849 case MCK_Attr: 6850 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6851 case MCK_AttrChan: 6852 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6853 default: 6854 return Match_InvalidOperand; 6855 } 6856 } 6857 6858 //===----------------------------------------------------------------------===// 6859 // endpgm 6860 //===----------------------------------------------------------------------===// 6861 6862 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6863 SMLoc S = Parser.getTok().getLoc(); 6864 int64_t Imm = 0; 6865 6866 if (!parseExpr(Imm)) { 6867 // The operand is optional, if not present default to 0 6868 Imm = 0; 6869 } 6870 6871 if (!isUInt<16>(Imm)) { 6872 Error(S, "expected a 16-bit value"); 6873 return MatchOperand_ParseFail; 6874 } 6875 6876 Operands.push_back( 6877 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6878 return MatchOperand_Success; 6879 } 6880 6881 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6882