1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 ImmTy getImmTy() const { 693 assert(isImm()); 694 return Imm.Type; 695 } 696 697 unsigned getReg() const override { 698 assert(isRegKind()); 699 return Reg.RegNo; 700 } 701 702 SMLoc getStartLoc() const override { 703 return StartLoc; 704 } 705 706 SMLoc getEndLoc() const override { 707 return EndLoc; 708 } 709 710 SMRange getLocRange() const { 711 return SMRange(StartLoc, EndLoc); 712 } 713 714 Modifiers getModifiers() const { 715 assert(isRegKind() || isImmTy(ImmTyNone)); 716 return isRegKind() ? Reg.Mods : Imm.Mods; 717 } 718 719 void setModifiers(Modifiers Mods) { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 if (isRegKind()) 722 Reg.Mods = Mods; 723 else 724 Imm.Mods = Mods; 725 } 726 727 bool hasModifiers() const { 728 return getModifiers().hasModifiers(); 729 } 730 731 bool hasFPModifiers() const { 732 return getModifiers().hasFPModifiers(); 733 } 734 735 bool hasIntModifiers() const { 736 return getModifiers().hasIntModifiers(); 737 } 738 739 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 740 741 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 742 743 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 744 745 template <unsigned Bitwidth> 746 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 747 748 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 749 addKImmFPOperands<16>(Inst, N); 750 } 751 752 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 753 addKImmFPOperands<32>(Inst, N); 754 } 755 756 void addRegOperands(MCInst &Inst, unsigned N) const; 757 758 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 759 addRegOperands(Inst, N); 760 } 761 762 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 763 if (isRegKind()) 764 addRegOperands(Inst, N); 765 else if (isExpr()) 766 Inst.addOperand(MCOperand::createExpr(Expr)); 767 else 768 addImmOperands(Inst, N); 769 } 770 771 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 772 Modifiers Mods = getModifiers(); 773 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 774 if (isRegKind()) { 775 addRegOperands(Inst, N); 776 } else { 777 addImmOperands(Inst, N, false); 778 } 779 } 780 781 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 782 assert(!hasIntModifiers()); 783 addRegOrImmWithInputModsOperands(Inst, N); 784 } 785 786 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasFPModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 792 Modifiers Mods = getModifiers(); 793 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 794 assert(isRegKind()); 795 addRegOperands(Inst, N); 796 } 797 798 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 799 assert(!hasIntModifiers()); 800 addRegWithInputModsOperands(Inst, N); 801 } 802 803 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasFPModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 809 if (isImm()) 810 addImmOperands(Inst, N); 811 else { 812 assert(isExpr()); 813 Inst.addOperand(MCOperand::createExpr(Expr)); 814 } 815 } 816 817 static void printImmTy(raw_ostream& OS, ImmTy Type) { 818 switch (Type) { 819 case ImmTyNone: OS << "None"; break; 820 case ImmTyGDS: OS << "GDS"; break; 821 case ImmTyLDS: OS << "LDS"; break; 822 case ImmTyOffen: OS << "Offen"; break; 823 case ImmTyIdxen: OS << "Idxen"; break; 824 case ImmTyAddr64: OS << "Addr64"; break; 825 case ImmTyOffset: OS << "Offset"; break; 826 case ImmTyInstOffset: OS << "InstOffset"; break; 827 case ImmTyOffset0: OS << "Offset0"; break; 828 case ImmTyOffset1: OS << "Offset1"; break; 829 case ImmTyDLC: OS << "DLC"; break; 830 case ImmTyGLC: OS << "GLC"; break; 831 case ImmTySLC: OS << "SLC"; break; 832 case ImmTySWZ: OS << "SWZ"; break; 833 case ImmTyTFE: OS << "TFE"; break; 834 case ImmTyD16: OS << "D16"; break; 835 case ImmTyFORMAT: OS << "FORMAT"; break; 836 case ImmTyClampSI: OS << "ClampSI"; break; 837 case ImmTyOModSI: OS << "OModSI"; break; 838 case ImmTyDPP8: OS << "DPP8"; break; 839 case ImmTyDppCtrl: OS << "DppCtrl"; break; 840 case ImmTyDppRowMask: OS << "DppRowMask"; break; 841 case ImmTyDppBankMask: OS << "DppBankMask"; break; 842 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 843 case ImmTyDppFi: OS << "FI"; break; 844 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 845 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 846 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 847 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 848 case ImmTyDMask: OS << "DMask"; break; 849 case ImmTyDim: OS << "Dim"; break; 850 case ImmTyUNorm: OS << "UNorm"; break; 851 case ImmTyDA: OS << "DA"; break; 852 case ImmTyR128A16: OS << "R128A16"; break; 853 case ImmTyA16: OS << "A16"; break; 854 case ImmTyLWE: OS << "LWE"; break; 855 case ImmTyOff: OS << "Off"; break; 856 case ImmTyExpTgt: OS << "ExpTgt"; break; 857 case ImmTyExpCompr: OS << "ExpCompr"; break; 858 case ImmTyExpVM: OS << "ExpVM"; break; 859 case ImmTyHwreg: OS << "Hwreg"; break; 860 case ImmTySendMsg: OS << "SendMsg"; break; 861 case ImmTyInterpSlot: OS << "InterpSlot"; break; 862 case ImmTyInterpAttr: OS << "InterpAttr"; break; 863 case ImmTyAttrChan: OS << "AttrChan"; break; 864 case ImmTyOpSel: OS << "OpSel"; break; 865 case ImmTyOpSelHi: OS << "OpSelHi"; break; 866 case ImmTyNegLo: OS << "NegLo"; break; 867 case ImmTyNegHi: OS << "NegHi"; break; 868 case ImmTySwizzle: OS << "Swizzle"; break; 869 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 870 case ImmTyHigh: OS << "High"; break; 871 case ImmTyBLGP: OS << "BLGP"; break; 872 case ImmTyCBSZ: OS << "CBSZ"; break; 873 case ImmTyABID: OS << "ABID"; break; 874 case ImmTyEndpgm: OS << "Endpgm"; break; 875 } 876 } 877 878 void print(raw_ostream &OS) const override { 879 switch (Kind) { 880 case Register: 881 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 882 break; 883 case Immediate: 884 OS << '<' << getImm(); 885 if (getImmTy() != ImmTyNone) { 886 OS << " type: "; printImmTy(OS, getImmTy()); 887 } 888 OS << " mods: " << Imm.Mods << '>'; 889 break; 890 case Token: 891 OS << '\'' << getToken() << '\''; 892 break; 893 case Expression: 894 OS << "<expr " << *Expr << '>'; 895 break; 896 } 897 } 898 899 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 900 int64_t Val, SMLoc Loc, 901 ImmTy Type = ImmTyNone, 902 bool IsFPImm = false) { 903 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 904 Op->Imm.Val = Val; 905 Op->Imm.IsFPImm = IsFPImm; 906 Op->Imm.Type = Type; 907 Op->Imm.Mods = Modifiers(); 908 Op->StartLoc = Loc; 909 Op->EndLoc = Loc; 910 return Op; 911 } 912 913 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 914 StringRef Str, SMLoc Loc, 915 bool HasExplicitEncodingSize = true) { 916 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 917 Res->Tok.Data = Str.data(); 918 Res->Tok.Length = Str.size(); 919 Res->StartLoc = Loc; 920 Res->EndLoc = Loc; 921 return Res; 922 } 923 924 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 925 unsigned RegNo, SMLoc S, 926 SMLoc E) { 927 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 928 Op->Reg.RegNo = RegNo; 929 Op->Reg.Mods = Modifiers(); 930 Op->StartLoc = S; 931 Op->EndLoc = E; 932 return Op; 933 } 934 935 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 936 const class MCExpr *Expr, SMLoc S) { 937 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 938 Op->Expr = Expr; 939 Op->StartLoc = S; 940 Op->EndLoc = S; 941 return Op; 942 } 943 }; 944 945 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 946 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 947 return OS; 948 } 949 950 //===----------------------------------------------------------------------===// 951 // AsmParser 952 //===----------------------------------------------------------------------===// 953 954 // Holds info related to the current kernel, e.g. count of SGPRs used. 955 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 956 // .amdgpu_hsa_kernel or at EOF. 957 class KernelScopeInfo { 958 int SgprIndexUnusedMin = -1; 959 int VgprIndexUnusedMin = -1; 960 MCContext *Ctx = nullptr; 961 962 void usesSgprAt(int i) { 963 if (i >= SgprIndexUnusedMin) { 964 SgprIndexUnusedMin = ++i; 965 if (Ctx) { 966 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 967 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 968 } 969 } 970 } 971 972 void usesVgprAt(int i) { 973 if (i >= VgprIndexUnusedMin) { 974 VgprIndexUnusedMin = ++i; 975 if (Ctx) { 976 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 977 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 978 } 979 } 980 } 981 982 public: 983 KernelScopeInfo() = default; 984 985 void initialize(MCContext &Context) { 986 Ctx = &Context; 987 usesSgprAt(SgprIndexUnusedMin = -1); 988 usesVgprAt(VgprIndexUnusedMin = -1); 989 } 990 991 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 992 switch (RegKind) { 993 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 994 case IS_AGPR: // fall through 995 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 996 default: break; 997 } 998 } 999 }; 1000 1001 class AMDGPUAsmParser : public MCTargetAsmParser { 1002 MCAsmParser &Parser; 1003 1004 // Number of extra operands parsed after the first optional operand. 1005 // This may be necessary to skip hardcoded mandatory operands. 1006 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1007 1008 unsigned ForcedEncodingSize = 0; 1009 bool ForcedDPP = false; 1010 bool ForcedSDWA = false; 1011 KernelScopeInfo KernelScope; 1012 1013 /// @name Auto-generated Match Functions 1014 /// { 1015 1016 #define GET_ASSEMBLER_HEADER 1017 #include "AMDGPUGenAsmMatcher.inc" 1018 1019 /// } 1020 1021 private: 1022 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1023 bool OutOfRangeError(SMRange Range); 1024 /// Calculate VGPR/SGPR blocks required for given target, reserved 1025 /// registers, and user-specified NextFreeXGPR values. 1026 /// 1027 /// \param Features [in] Target features, used for bug corrections. 1028 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1029 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1030 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1031 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1032 /// descriptor field, if valid. 1033 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1034 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1035 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1036 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1037 /// \param VGPRBlocks [out] Result VGPR block count. 1038 /// \param SGPRBlocks [out] Result SGPR block count. 1039 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1040 bool FlatScrUsed, bool XNACKUsed, 1041 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1042 SMRange VGPRRange, unsigned NextFreeSGPR, 1043 SMRange SGPRRange, unsigned &VGPRBlocks, 1044 unsigned &SGPRBlocks); 1045 bool ParseDirectiveAMDGCNTarget(); 1046 bool ParseDirectiveAMDHSAKernel(); 1047 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1048 bool ParseDirectiveHSACodeObjectVersion(); 1049 bool ParseDirectiveHSACodeObjectISA(); 1050 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1051 bool ParseDirectiveAMDKernelCodeT(); 1052 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1053 bool ParseDirectiveAMDGPUHsaKernel(); 1054 1055 bool ParseDirectiveISAVersion(); 1056 bool ParseDirectiveHSAMetadata(); 1057 bool ParseDirectivePALMetadataBegin(); 1058 bool ParseDirectivePALMetadata(); 1059 bool ParseDirectiveAMDGPULDS(); 1060 1061 /// Common code to parse out a block of text (typically YAML) between start and 1062 /// end directives. 1063 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1064 const char *AssemblerDirectiveEnd, 1065 std::string &CollectString); 1066 1067 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1068 RegisterKind RegKind, unsigned Reg1); 1069 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1070 unsigned &RegNum, unsigned &RegWidth, 1071 bool RestoreOnFailure = false); 1072 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1073 unsigned &RegNum, unsigned &RegWidth, 1074 SmallVectorImpl<AsmToken> &Tokens); 1075 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1076 unsigned &RegWidth, 1077 SmallVectorImpl<AsmToken> &Tokens); 1078 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1079 unsigned &RegWidth, 1080 SmallVectorImpl<AsmToken> &Tokens); 1081 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1082 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1083 bool ParseRegRange(unsigned& Num, unsigned& Width); 1084 unsigned getRegularReg(RegisterKind RegKind, 1085 unsigned RegNum, 1086 unsigned RegWidth); 1087 1088 bool isRegister(); 1089 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1090 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1091 void initializeGprCountSymbol(RegisterKind RegKind); 1092 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1093 unsigned RegWidth); 1094 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1095 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1096 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1097 bool IsGdsHardcoded); 1098 1099 public: 1100 enum AMDGPUMatchResultTy { 1101 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1102 }; 1103 enum OperandMode { 1104 OperandMode_Default, 1105 OperandMode_NSA, 1106 }; 1107 1108 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1109 1110 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1111 const MCInstrInfo &MII, 1112 const MCTargetOptions &Options) 1113 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1114 MCAsmParserExtension::Initialize(Parser); 1115 1116 if (getFeatureBits().none()) { 1117 // Set default features. 1118 copySTI().ToggleFeature("southern-islands"); 1119 } 1120 1121 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1122 1123 { 1124 // TODO: make those pre-defined variables read-only. 1125 // Currently there is none suitable machinery in the core llvm-mc for this. 1126 // MCSymbol::isRedefinable is intended for another purpose, and 1127 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1129 MCContext &Ctx = getContext(); 1130 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1131 MCSymbol *Sym = 1132 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1134 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1135 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1136 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1138 } else { 1139 MCSymbol *Sym = 1140 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1144 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1145 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1146 } 1147 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1148 initializeGprCountSymbol(IS_VGPR); 1149 initializeGprCountSymbol(IS_SGPR); 1150 } else 1151 KernelScope.initialize(getContext()); 1152 } 1153 } 1154 1155 bool hasXNACK() const { 1156 return AMDGPU::hasXNACK(getSTI()); 1157 } 1158 1159 bool hasMIMG_R128() const { 1160 return AMDGPU::hasMIMG_R128(getSTI()); 1161 } 1162 1163 bool hasPackedD16() const { 1164 return AMDGPU::hasPackedD16(getSTI()); 1165 } 1166 1167 bool hasGFX10A16() const { 1168 return AMDGPU::hasGFX10A16(getSTI()); 1169 } 1170 1171 bool isSI() const { 1172 return AMDGPU::isSI(getSTI()); 1173 } 1174 1175 bool isCI() const { 1176 return AMDGPU::isCI(getSTI()); 1177 } 1178 1179 bool isVI() const { 1180 return AMDGPU::isVI(getSTI()); 1181 } 1182 1183 bool isGFX9() const { 1184 return AMDGPU::isGFX9(getSTI()); 1185 } 1186 1187 bool isGFX10() const { 1188 return AMDGPU::isGFX10(getSTI()); 1189 } 1190 1191 bool isGFX10_BEncoding() const { 1192 return AMDGPU::isGFX10_BEncoding(getSTI()); 1193 } 1194 1195 bool hasInv2PiInlineImm() const { 1196 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1197 } 1198 1199 bool hasFlatOffsets() const { 1200 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1201 } 1202 1203 bool hasSGPR102_SGPR103() const { 1204 return !isVI() && !isGFX9(); 1205 } 1206 1207 bool hasSGPR104_SGPR105() const { 1208 return isGFX10(); 1209 } 1210 1211 bool hasIntClamp() const { 1212 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1213 } 1214 1215 AMDGPUTargetStreamer &getTargetStreamer() { 1216 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1217 return static_cast<AMDGPUTargetStreamer &>(TS); 1218 } 1219 1220 const MCRegisterInfo *getMRI() const { 1221 // We need this const_cast because for some reason getContext() is not const 1222 // in MCAsmParser. 1223 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1224 } 1225 1226 const MCInstrInfo *getMII() const { 1227 return &MII; 1228 } 1229 1230 const FeatureBitset &getFeatureBits() const { 1231 return getSTI().getFeatureBits(); 1232 } 1233 1234 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1235 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1236 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1237 1238 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1239 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1240 bool isForcedDPP() const { return ForcedDPP; } 1241 bool isForcedSDWA() const { return ForcedSDWA; } 1242 ArrayRef<unsigned> getMatchedVariants() const; 1243 1244 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1245 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1246 bool RestoreOnFailure); 1247 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1248 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1249 SMLoc &EndLoc) override; 1250 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1251 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1252 unsigned Kind) override; 1253 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1254 OperandVector &Operands, MCStreamer &Out, 1255 uint64_t &ErrorInfo, 1256 bool MatchingInlineAsm) override; 1257 bool ParseDirective(AsmToken DirectiveID) override; 1258 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1259 OperandMode Mode = OperandMode_Default); 1260 StringRef parseMnemonicSuffix(StringRef Name); 1261 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1262 SMLoc NameLoc, OperandVector &Operands) override; 1263 //bool ProcessInstruction(MCInst &Inst); 1264 1265 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1266 1267 OperandMatchResultTy 1268 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1269 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1270 bool (*ConvertResult)(int64_t &) = nullptr); 1271 1272 OperandMatchResultTy 1273 parseOperandArrayWithPrefix(const char *Prefix, 1274 OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1276 bool (*ConvertResult)(int64_t&) = nullptr); 1277 1278 OperandMatchResultTy 1279 parseNamedBit(const char *Name, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1281 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1282 StringRef &Value); 1283 1284 bool isModifier(); 1285 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1286 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1287 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1288 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1289 bool parseSP3NegModifier(); 1290 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1291 OperandMatchResultTy parseReg(OperandVector &Operands); 1292 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1293 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1294 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1295 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1296 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1297 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1298 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1299 OperandMatchResultTy parseUfmt(int64_t &Format); 1300 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1301 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1302 1303 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1304 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1305 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1306 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1307 1308 bool parseCnt(int64_t &IntVal); 1309 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1310 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1311 1312 private: 1313 struct OperandInfoTy { 1314 int64_t Id; 1315 bool IsSymbolic = false; 1316 bool IsDefined = false; 1317 1318 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1319 }; 1320 1321 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1322 bool validateSendMsg(const OperandInfoTy &Msg, 1323 const OperandInfoTy &Op, 1324 const OperandInfoTy &Stream, 1325 const SMLoc Loc); 1326 1327 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1328 bool validateHwreg(const OperandInfoTy &HwReg, 1329 const int64_t Offset, 1330 const int64_t Width, 1331 const SMLoc Loc); 1332 1333 void errorExpTgt(); 1334 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1335 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1336 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1337 1338 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1339 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1340 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1341 bool validateSOPLiteral(const MCInst &Inst) const; 1342 bool validateConstantBusLimitations(const MCInst &Inst); 1343 bool validateEarlyClobberLimitations(const MCInst &Inst); 1344 bool validateIntClampSupported(const MCInst &Inst); 1345 bool validateMIMGAtomicDMask(const MCInst &Inst); 1346 bool validateMIMGGatherDMask(const MCInst &Inst); 1347 bool validateMovrels(const MCInst &Inst); 1348 bool validateMIMGDataSize(const MCInst &Inst); 1349 bool validateMIMGAddrSize(const MCInst &Inst); 1350 bool validateMIMGD16(const MCInst &Inst); 1351 bool validateMIMGDim(const MCInst &Inst); 1352 bool validateLdsDirect(const MCInst &Inst); 1353 bool validateOpSel(const MCInst &Inst); 1354 bool validateVccOperand(unsigned Reg) const; 1355 bool validateVOP3Literal(const MCInst &Inst) const; 1356 bool validateMAIAccWrite(const MCInst &Inst); 1357 unsigned getConstantBusLimit(unsigned Opcode) const; 1358 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1359 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1360 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1361 1362 bool isId(const StringRef Id) const; 1363 bool isId(const AsmToken &Token, const StringRef Id) const; 1364 bool isToken(const AsmToken::TokenKind Kind) const; 1365 bool trySkipId(const StringRef Id); 1366 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1367 bool trySkipToken(const AsmToken::TokenKind Kind); 1368 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1369 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1370 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1371 AsmToken::TokenKind getTokenKind() const; 1372 bool parseExpr(int64_t &Imm); 1373 bool parseExpr(OperandVector &Operands); 1374 StringRef getTokenStr() const; 1375 AsmToken peekToken(); 1376 AsmToken getToken() const; 1377 SMLoc getLoc() const; 1378 void lex(); 1379 1380 public: 1381 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1382 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1383 1384 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1385 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1386 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1387 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1388 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1389 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1390 1391 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1392 const unsigned MinVal, 1393 const unsigned MaxVal, 1394 const StringRef ErrMsg); 1395 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1396 bool parseSwizzleOffset(int64_t &Imm); 1397 bool parseSwizzleMacro(int64_t &Imm); 1398 bool parseSwizzleQuadPerm(int64_t &Imm); 1399 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1400 bool parseSwizzleBroadcast(int64_t &Imm); 1401 bool parseSwizzleSwap(int64_t &Imm); 1402 bool parseSwizzleReverse(int64_t &Imm); 1403 1404 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1405 int64_t parseGPRIdxMacro(); 1406 1407 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1408 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1409 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1410 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1411 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1412 1413 AMDGPUOperand::Ptr defaultDLC() const; 1414 AMDGPUOperand::Ptr defaultGLC() const; 1415 AMDGPUOperand::Ptr defaultSLC() const; 1416 1417 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1418 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1419 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1420 AMDGPUOperand::Ptr defaultFlatOffset() const; 1421 1422 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1423 1424 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1425 OptionalImmIndexMap &OptionalIdx); 1426 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1427 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1428 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1429 1430 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1431 1432 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1433 bool IsAtomic = false); 1434 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1435 1436 OperandMatchResultTy parseDim(OperandVector &Operands); 1437 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1438 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1439 AMDGPUOperand::Ptr defaultRowMask() const; 1440 AMDGPUOperand::Ptr defaultBankMask() const; 1441 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1442 AMDGPUOperand::Ptr defaultFI() const; 1443 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1444 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1445 1446 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1447 AMDGPUOperand::ImmTy Type); 1448 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1449 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1450 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1451 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1452 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1453 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1454 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1455 uint64_t BasicInstType, 1456 bool SkipDstVcc = false, 1457 bool SkipSrcVcc = false); 1458 1459 AMDGPUOperand::Ptr defaultBLGP() const; 1460 AMDGPUOperand::Ptr defaultCBSZ() const; 1461 AMDGPUOperand::Ptr defaultABID() const; 1462 1463 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1464 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1465 }; 1466 1467 struct OptionalOperand { 1468 const char *Name; 1469 AMDGPUOperand::ImmTy Type; 1470 bool IsBit; 1471 bool (*ConvertResult)(int64_t&); 1472 }; 1473 1474 } // end anonymous namespace 1475 1476 // May be called with integer type with equivalent bitwidth. 1477 static const fltSemantics *getFltSemantics(unsigned Size) { 1478 switch (Size) { 1479 case 4: 1480 return &APFloat::IEEEsingle(); 1481 case 8: 1482 return &APFloat::IEEEdouble(); 1483 case 2: 1484 return &APFloat::IEEEhalf(); 1485 default: 1486 llvm_unreachable("unsupported fp type"); 1487 } 1488 } 1489 1490 static const fltSemantics *getFltSemantics(MVT VT) { 1491 return getFltSemantics(VT.getSizeInBits() / 8); 1492 } 1493 1494 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1495 switch (OperandType) { 1496 case AMDGPU::OPERAND_REG_IMM_INT32: 1497 case AMDGPU::OPERAND_REG_IMM_FP32: 1498 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1499 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1500 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1501 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1502 return &APFloat::IEEEsingle(); 1503 case AMDGPU::OPERAND_REG_IMM_INT64: 1504 case AMDGPU::OPERAND_REG_IMM_FP64: 1505 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1506 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1507 return &APFloat::IEEEdouble(); 1508 case AMDGPU::OPERAND_REG_IMM_INT16: 1509 case AMDGPU::OPERAND_REG_IMM_FP16: 1510 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1511 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1512 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1513 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1514 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1515 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1516 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1517 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1518 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1519 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1520 return &APFloat::IEEEhalf(); 1521 default: 1522 llvm_unreachable("unsupported fp type"); 1523 } 1524 } 1525 1526 //===----------------------------------------------------------------------===// 1527 // Operand 1528 //===----------------------------------------------------------------------===// 1529 1530 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1531 bool Lost; 1532 1533 // Convert literal to single precision 1534 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1535 APFloat::rmNearestTiesToEven, 1536 &Lost); 1537 // We allow precision lost but not overflow or underflow 1538 if (Status != APFloat::opOK && 1539 Lost && 1540 ((Status & APFloat::opOverflow) != 0 || 1541 (Status & APFloat::opUnderflow) != 0)) { 1542 return false; 1543 } 1544 1545 return true; 1546 } 1547 1548 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1549 return isUIntN(Size, Val) || isIntN(Size, Val); 1550 } 1551 1552 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1553 if (VT.getScalarType() == MVT::i16) { 1554 // FP immediate values are broken. 1555 return isInlinableIntLiteral(Val); 1556 } 1557 1558 // f16/v2f16 operands work correctly for all values. 1559 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1560 } 1561 1562 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1563 1564 // This is a hack to enable named inline values like 1565 // shared_base with both 32-bit and 64-bit operands. 1566 // Note that these values are defined as 1567 // 32-bit operands only. 1568 if (isInlineValue()) { 1569 return true; 1570 } 1571 1572 if (!isImmTy(ImmTyNone)) { 1573 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1574 return false; 1575 } 1576 // TODO: We should avoid using host float here. It would be better to 1577 // check the float bit values which is what a few other places do. 1578 // We've had bot failures before due to weird NaN support on mips hosts. 1579 1580 APInt Literal(64, Imm.Val); 1581 1582 if (Imm.IsFPImm) { // We got fp literal token 1583 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1584 return AMDGPU::isInlinableLiteral64(Imm.Val, 1585 AsmParser->hasInv2PiInlineImm()); 1586 } 1587 1588 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1589 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1590 return false; 1591 1592 if (type.getScalarSizeInBits() == 16) { 1593 return isInlineableLiteralOp16( 1594 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1595 type, AsmParser->hasInv2PiInlineImm()); 1596 } 1597 1598 // Check if single precision literal is inlinable 1599 return AMDGPU::isInlinableLiteral32( 1600 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1601 AsmParser->hasInv2PiInlineImm()); 1602 } 1603 1604 // We got int literal token. 1605 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1606 return AMDGPU::isInlinableLiteral64(Imm.Val, 1607 AsmParser->hasInv2PiInlineImm()); 1608 } 1609 1610 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1611 return false; 1612 } 1613 1614 if (type.getScalarSizeInBits() == 16) { 1615 return isInlineableLiteralOp16( 1616 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1617 type, AsmParser->hasInv2PiInlineImm()); 1618 } 1619 1620 return AMDGPU::isInlinableLiteral32( 1621 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1622 AsmParser->hasInv2PiInlineImm()); 1623 } 1624 1625 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1626 // Check that this immediate can be added as literal 1627 if (!isImmTy(ImmTyNone)) { 1628 return false; 1629 } 1630 1631 if (!Imm.IsFPImm) { 1632 // We got int literal token. 1633 1634 if (type == MVT::f64 && hasFPModifiers()) { 1635 // Cannot apply fp modifiers to int literals preserving the same semantics 1636 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1637 // disable these cases. 1638 return false; 1639 } 1640 1641 unsigned Size = type.getSizeInBits(); 1642 if (Size == 64) 1643 Size = 32; 1644 1645 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1646 // types. 1647 return isSafeTruncation(Imm.Val, Size); 1648 } 1649 1650 // We got fp literal token 1651 if (type == MVT::f64) { // Expected 64-bit fp operand 1652 // We would set low 64-bits of literal to zeroes but we accept this literals 1653 return true; 1654 } 1655 1656 if (type == MVT::i64) { // Expected 64-bit int operand 1657 // We don't allow fp literals in 64-bit integer instructions. It is 1658 // unclear how we should encode them. 1659 return false; 1660 } 1661 1662 // We allow fp literals with f16x2 operands assuming that the specified 1663 // literal goes into the lower half and the upper half is zero. We also 1664 // require that the literal may be losslesly converted to f16. 1665 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1666 (type == MVT::v2i16)? MVT::i16 : type; 1667 1668 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1669 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1670 } 1671 1672 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1673 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1674 } 1675 1676 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1677 if (AsmParser->isVI()) 1678 return isVReg32(); 1679 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1680 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1681 else 1682 return false; 1683 } 1684 1685 bool AMDGPUOperand::isSDWAFP16Operand() const { 1686 return isSDWAOperand(MVT::f16); 1687 } 1688 1689 bool AMDGPUOperand::isSDWAFP32Operand() const { 1690 return isSDWAOperand(MVT::f32); 1691 } 1692 1693 bool AMDGPUOperand::isSDWAInt16Operand() const { 1694 return isSDWAOperand(MVT::i16); 1695 } 1696 1697 bool AMDGPUOperand::isSDWAInt32Operand() const { 1698 return isSDWAOperand(MVT::i32); 1699 } 1700 1701 bool AMDGPUOperand::isBoolReg() const { 1702 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1703 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1704 } 1705 1706 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1707 { 1708 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1709 assert(Size == 2 || Size == 4 || Size == 8); 1710 1711 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1712 1713 if (Imm.Mods.Abs) { 1714 Val &= ~FpSignMask; 1715 } 1716 if (Imm.Mods.Neg) { 1717 Val ^= FpSignMask; 1718 } 1719 1720 return Val; 1721 } 1722 1723 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1724 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1725 Inst.getNumOperands())) { 1726 addLiteralImmOperand(Inst, Imm.Val, 1727 ApplyModifiers & 1728 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1729 } else { 1730 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1731 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1732 } 1733 } 1734 1735 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1736 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1737 auto OpNum = Inst.getNumOperands(); 1738 // Check that this operand accepts literals 1739 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1740 1741 if (ApplyModifiers) { 1742 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1743 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1744 Val = applyInputFPModifiers(Val, Size); 1745 } 1746 1747 APInt Literal(64, Val); 1748 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1749 1750 if (Imm.IsFPImm) { // We got fp literal token 1751 switch (OpTy) { 1752 case AMDGPU::OPERAND_REG_IMM_INT64: 1753 case AMDGPU::OPERAND_REG_IMM_FP64: 1754 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1755 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1756 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1757 AsmParser->hasInv2PiInlineImm())) { 1758 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1759 return; 1760 } 1761 1762 // Non-inlineable 1763 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1764 // For fp operands we check if low 32 bits are zeros 1765 if (Literal.getLoBits(32) != 0) { 1766 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1767 "Can't encode literal as exact 64-bit floating-point operand. " 1768 "Low 32-bits will be set to zero"); 1769 } 1770 1771 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1772 return; 1773 } 1774 1775 // We don't allow fp literals in 64-bit integer instructions. It is 1776 // unclear how we should encode them. This case should be checked earlier 1777 // in predicate methods (isLiteralImm()) 1778 llvm_unreachable("fp literal in 64-bit integer instruction."); 1779 1780 case AMDGPU::OPERAND_REG_IMM_INT32: 1781 case AMDGPU::OPERAND_REG_IMM_FP32: 1782 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1783 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1785 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1786 case AMDGPU::OPERAND_REG_IMM_INT16: 1787 case AMDGPU::OPERAND_REG_IMM_FP16: 1788 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1789 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1790 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1791 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1792 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1793 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1794 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1796 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1797 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1798 bool lost; 1799 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1800 // Convert literal to single precision 1801 FPLiteral.convert(*getOpFltSemantics(OpTy), 1802 APFloat::rmNearestTiesToEven, &lost); 1803 // We allow precision lost but not overflow or underflow. This should be 1804 // checked earlier in isLiteralImm() 1805 1806 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1807 Inst.addOperand(MCOperand::createImm(ImmVal)); 1808 return; 1809 } 1810 default: 1811 llvm_unreachable("invalid operand size"); 1812 } 1813 1814 return; 1815 } 1816 1817 // We got int literal token. 1818 // Only sign extend inline immediates. 1819 switch (OpTy) { 1820 case AMDGPU::OPERAND_REG_IMM_INT32: 1821 case AMDGPU::OPERAND_REG_IMM_FP32: 1822 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1823 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1824 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1826 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1827 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1828 if (isSafeTruncation(Val, 32) && 1829 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1830 AsmParser->hasInv2PiInlineImm())) { 1831 Inst.addOperand(MCOperand::createImm(Val)); 1832 return; 1833 } 1834 1835 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1836 return; 1837 1838 case AMDGPU::OPERAND_REG_IMM_INT64: 1839 case AMDGPU::OPERAND_REG_IMM_FP64: 1840 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1841 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1842 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1843 Inst.addOperand(MCOperand::createImm(Val)); 1844 return; 1845 } 1846 1847 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1848 return; 1849 1850 case AMDGPU::OPERAND_REG_IMM_INT16: 1851 case AMDGPU::OPERAND_REG_IMM_FP16: 1852 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1853 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1854 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1856 if (isSafeTruncation(Val, 16) && 1857 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1858 AsmParser->hasInv2PiInlineImm())) { 1859 Inst.addOperand(MCOperand::createImm(Val)); 1860 return; 1861 } 1862 1863 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1864 return; 1865 1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1870 assert(isSafeTruncation(Val, 16)); 1871 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1872 AsmParser->hasInv2PiInlineImm())); 1873 1874 Inst.addOperand(MCOperand::createImm(Val)); 1875 return; 1876 } 1877 default: 1878 llvm_unreachable("invalid operand size"); 1879 } 1880 } 1881 1882 template <unsigned Bitwidth> 1883 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1884 APInt Literal(64, Imm.Val); 1885 1886 if (!Imm.IsFPImm) { 1887 // We got int literal token. 1888 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1889 return; 1890 } 1891 1892 bool Lost; 1893 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1894 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1895 APFloat::rmNearestTiesToEven, &Lost); 1896 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1897 } 1898 1899 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1900 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1901 } 1902 1903 static bool isInlineValue(unsigned Reg) { 1904 switch (Reg) { 1905 case AMDGPU::SRC_SHARED_BASE: 1906 case AMDGPU::SRC_SHARED_LIMIT: 1907 case AMDGPU::SRC_PRIVATE_BASE: 1908 case AMDGPU::SRC_PRIVATE_LIMIT: 1909 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1910 return true; 1911 case AMDGPU::SRC_VCCZ: 1912 case AMDGPU::SRC_EXECZ: 1913 case AMDGPU::SRC_SCC: 1914 return true; 1915 case AMDGPU::SGPR_NULL: 1916 return true; 1917 default: 1918 return false; 1919 } 1920 } 1921 1922 bool AMDGPUOperand::isInlineValue() const { 1923 return isRegKind() && ::isInlineValue(getReg()); 1924 } 1925 1926 //===----------------------------------------------------------------------===// 1927 // AsmParser 1928 //===----------------------------------------------------------------------===// 1929 1930 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1931 if (Is == IS_VGPR) { 1932 switch (RegWidth) { 1933 default: return -1; 1934 case 1: return AMDGPU::VGPR_32RegClassID; 1935 case 2: return AMDGPU::VReg_64RegClassID; 1936 case 3: return AMDGPU::VReg_96RegClassID; 1937 case 4: return AMDGPU::VReg_128RegClassID; 1938 case 5: return AMDGPU::VReg_160RegClassID; 1939 case 6: return AMDGPU::VReg_192RegClassID; 1940 case 8: return AMDGPU::VReg_256RegClassID; 1941 case 16: return AMDGPU::VReg_512RegClassID; 1942 case 32: return AMDGPU::VReg_1024RegClassID; 1943 } 1944 } else if (Is == IS_TTMP) { 1945 switch (RegWidth) { 1946 default: return -1; 1947 case 1: return AMDGPU::TTMP_32RegClassID; 1948 case 2: return AMDGPU::TTMP_64RegClassID; 1949 case 4: return AMDGPU::TTMP_128RegClassID; 1950 case 8: return AMDGPU::TTMP_256RegClassID; 1951 case 16: return AMDGPU::TTMP_512RegClassID; 1952 } 1953 } else if (Is == IS_SGPR) { 1954 switch (RegWidth) { 1955 default: return -1; 1956 case 1: return AMDGPU::SGPR_32RegClassID; 1957 case 2: return AMDGPU::SGPR_64RegClassID; 1958 case 3: return AMDGPU::SGPR_96RegClassID; 1959 case 4: return AMDGPU::SGPR_128RegClassID; 1960 case 5: return AMDGPU::SGPR_160RegClassID; 1961 case 6: return AMDGPU::SGPR_192RegClassID; 1962 case 8: return AMDGPU::SGPR_256RegClassID; 1963 case 16: return AMDGPU::SGPR_512RegClassID; 1964 } 1965 } else if (Is == IS_AGPR) { 1966 switch (RegWidth) { 1967 default: return -1; 1968 case 1: return AMDGPU::AGPR_32RegClassID; 1969 case 2: return AMDGPU::AReg_64RegClassID; 1970 case 3: return AMDGPU::AReg_96RegClassID; 1971 case 4: return AMDGPU::AReg_128RegClassID; 1972 case 5: return AMDGPU::AReg_160RegClassID; 1973 case 6: return AMDGPU::AReg_192RegClassID; 1974 case 8: return AMDGPU::AReg_256RegClassID; 1975 case 16: return AMDGPU::AReg_512RegClassID; 1976 case 32: return AMDGPU::AReg_1024RegClassID; 1977 } 1978 } 1979 return -1; 1980 } 1981 1982 static unsigned getSpecialRegForName(StringRef RegName) { 1983 return StringSwitch<unsigned>(RegName) 1984 .Case("exec", AMDGPU::EXEC) 1985 .Case("vcc", AMDGPU::VCC) 1986 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1987 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1988 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1989 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1990 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1991 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1992 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1993 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1994 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1995 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1996 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1997 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1998 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1999 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2000 .Case("m0", AMDGPU::M0) 2001 .Case("vccz", AMDGPU::SRC_VCCZ) 2002 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2003 .Case("execz", AMDGPU::SRC_EXECZ) 2004 .Case("src_execz", AMDGPU::SRC_EXECZ) 2005 .Case("scc", AMDGPU::SRC_SCC) 2006 .Case("src_scc", AMDGPU::SRC_SCC) 2007 .Case("tba", AMDGPU::TBA) 2008 .Case("tma", AMDGPU::TMA) 2009 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2010 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2011 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2012 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2013 .Case("vcc_lo", AMDGPU::VCC_LO) 2014 .Case("vcc_hi", AMDGPU::VCC_HI) 2015 .Case("exec_lo", AMDGPU::EXEC_LO) 2016 .Case("exec_hi", AMDGPU::EXEC_HI) 2017 .Case("tma_lo", AMDGPU::TMA_LO) 2018 .Case("tma_hi", AMDGPU::TMA_HI) 2019 .Case("tba_lo", AMDGPU::TBA_LO) 2020 .Case("tba_hi", AMDGPU::TBA_HI) 2021 .Case("pc", AMDGPU::PC_REG) 2022 .Case("null", AMDGPU::SGPR_NULL) 2023 .Default(AMDGPU::NoRegister); 2024 } 2025 2026 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2027 SMLoc &EndLoc, bool RestoreOnFailure) { 2028 auto R = parseRegister(); 2029 if (!R) return true; 2030 assert(R->isReg()); 2031 RegNo = R->getReg(); 2032 StartLoc = R->getStartLoc(); 2033 EndLoc = R->getEndLoc(); 2034 return false; 2035 } 2036 2037 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2038 SMLoc &EndLoc) { 2039 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2040 } 2041 2042 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2043 SMLoc &StartLoc, 2044 SMLoc &EndLoc) { 2045 bool Result = 2046 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2047 bool PendingErrors = getParser().hasPendingError(); 2048 getParser().clearPendingErrors(); 2049 if (PendingErrors) 2050 return MatchOperand_ParseFail; 2051 if (Result) 2052 return MatchOperand_NoMatch; 2053 return MatchOperand_Success; 2054 } 2055 2056 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2057 RegisterKind RegKind, unsigned Reg1) { 2058 switch (RegKind) { 2059 case IS_SPECIAL: 2060 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2061 Reg = AMDGPU::EXEC; 2062 RegWidth = 2; 2063 return true; 2064 } 2065 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2066 Reg = AMDGPU::FLAT_SCR; 2067 RegWidth = 2; 2068 return true; 2069 } 2070 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2071 Reg = AMDGPU::XNACK_MASK; 2072 RegWidth = 2; 2073 return true; 2074 } 2075 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2076 Reg = AMDGPU::VCC; 2077 RegWidth = 2; 2078 return true; 2079 } 2080 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2081 Reg = AMDGPU::TBA; 2082 RegWidth = 2; 2083 return true; 2084 } 2085 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2086 Reg = AMDGPU::TMA; 2087 RegWidth = 2; 2088 return true; 2089 } 2090 return false; 2091 case IS_VGPR: 2092 case IS_SGPR: 2093 case IS_AGPR: 2094 case IS_TTMP: 2095 if (Reg1 != Reg + RegWidth) { 2096 return false; 2097 } 2098 RegWidth++; 2099 return true; 2100 default: 2101 llvm_unreachable("unexpected register kind"); 2102 } 2103 } 2104 2105 struct RegInfo { 2106 StringLiteral Name; 2107 RegisterKind Kind; 2108 }; 2109 2110 static constexpr RegInfo RegularRegisters[] = { 2111 {{"v"}, IS_VGPR}, 2112 {{"s"}, IS_SGPR}, 2113 {{"ttmp"}, IS_TTMP}, 2114 {{"acc"}, IS_AGPR}, 2115 {{"a"}, IS_AGPR}, 2116 }; 2117 2118 static bool isRegularReg(RegisterKind Kind) { 2119 return Kind == IS_VGPR || 2120 Kind == IS_SGPR || 2121 Kind == IS_TTMP || 2122 Kind == IS_AGPR; 2123 } 2124 2125 static const RegInfo* getRegularRegInfo(StringRef Str) { 2126 for (const RegInfo &Reg : RegularRegisters) 2127 if (Str.startswith(Reg.Name)) 2128 return &Reg; 2129 return nullptr; 2130 } 2131 2132 static bool getRegNum(StringRef Str, unsigned& Num) { 2133 return !Str.getAsInteger(10, Num); 2134 } 2135 2136 bool 2137 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2138 const AsmToken &NextToken) const { 2139 2140 // A list of consecutive registers: [s0,s1,s2,s3] 2141 if (Token.is(AsmToken::LBrac)) 2142 return true; 2143 2144 if (!Token.is(AsmToken::Identifier)) 2145 return false; 2146 2147 // A single register like s0 or a range of registers like s[0:1] 2148 2149 StringRef Str = Token.getString(); 2150 const RegInfo *Reg = getRegularRegInfo(Str); 2151 if (Reg) { 2152 StringRef RegName = Reg->Name; 2153 StringRef RegSuffix = Str.substr(RegName.size()); 2154 if (!RegSuffix.empty()) { 2155 unsigned Num; 2156 // A single register with an index: rXX 2157 if (getRegNum(RegSuffix, Num)) 2158 return true; 2159 } else { 2160 // A range of registers: r[XX:YY]. 2161 if (NextToken.is(AsmToken::LBrac)) 2162 return true; 2163 } 2164 } 2165 2166 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2167 } 2168 2169 bool 2170 AMDGPUAsmParser::isRegister() 2171 { 2172 return isRegister(getToken(), peekToken()); 2173 } 2174 2175 unsigned 2176 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2177 unsigned RegNum, 2178 unsigned RegWidth) { 2179 2180 assert(isRegularReg(RegKind)); 2181 2182 unsigned AlignSize = 1; 2183 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2184 // SGPR and TTMP registers must be aligned. 2185 // Max required alignment is 4 dwords. 2186 AlignSize = std::min(RegWidth, 4u); 2187 } 2188 2189 if (RegNum % AlignSize != 0) 2190 return AMDGPU::NoRegister; 2191 2192 unsigned RegIdx = RegNum / AlignSize; 2193 int RCID = getRegClass(RegKind, RegWidth); 2194 if (RCID == -1) 2195 return AMDGPU::NoRegister; 2196 2197 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2198 const MCRegisterClass RC = TRI->getRegClass(RCID); 2199 if (RegIdx >= RC.getNumRegs()) 2200 return AMDGPU::NoRegister; 2201 2202 return RC.getRegister(RegIdx); 2203 } 2204 2205 bool 2206 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2207 int64_t RegLo, RegHi; 2208 if (!trySkipToken(AsmToken::LBrac)) 2209 return false; 2210 2211 if (!parseExpr(RegLo)) 2212 return false; 2213 2214 if (trySkipToken(AsmToken::Colon)) { 2215 if (!parseExpr(RegHi)) 2216 return false; 2217 } else { 2218 RegHi = RegLo; 2219 } 2220 2221 if (!trySkipToken(AsmToken::RBrac)) 2222 return false; 2223 2224 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2225 return false; 2226 2227 Num = static_cast<unsigned>(RegLo); 2228 Width = (RegHi - RegLo) + 1; 2229 return true; 2230 } 2231 2232 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2233 unsigned &RegNum, unsigned &RegWidth, 2234 SmallVectorImpl<AsmToken> &Tokens) { 2235 assert(isToken(AsmToken::Identifier)); 2236 unsigned Reg = getSpecialRegForName(getTokenStr()); 2237 if (Reg) { 2238 RegNum = 0; 2239 RegWidth = 1; 2240 RegKind = IS_SPECIAL; 2241 Tokens.push_back(getToken()); 2242 lex(); // skip register name 2243 } 2244 return Reg; 2245 } 2246 2247 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2248 unsigned &RegNum, unsigned &RegWidth, 2249 SmallVectorImpl<AsmToken> &Tokens) { 2250 assert(isToken(AsmToken::Identifier)); 2251 StringRef RegName = getTokenStr(); 2252 2253 const RegInfo *RI = getRegularRegInfo(RegName); 2254 if (!RI) 2255 return AMDGPU::NoRegister; 2256 Tokens.push_back(getToken()); 2257 lex(); // skip register name 2258 2259 RegKind = RI->Kind; 2260 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2261 if (!RegSuffix.empty()) { 2262 // Single 32-bit register: vXX. 2263 if (!getRegNum(RegSuffix, RegNum)) 2264 return AMDGPU::NoRegister; 2265 RegWidth = 1; 2266 } else { 2267 // Range of registers: v[XX:YY]. ":YY" is optional. 2268 if (!ParseRegRange(RegNum, RegWidth)) 2269 return AMDGPU::NoRegister; 2270 } 2271 2272 return getRegularReg(RegKind, RegNum, RegWidth); 2273 } 2274 2275 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2276 unsigned &RegWidth, 2277 SmallVectorImpl<AsmToken> &Tokens) { 2278 unsigned Reg = AMDGPU::NoRegister; 2279 2280 if (!trySkipToken(AsmToken::LBrac)) 2281 return AMDGPU::NoRegister; 2282 2283 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2284 2285 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2286 return AMDGPU::NoRegister; 2287 if (RegWidth != 1) 2288 return AMDGPU::NoRegister; 2289 2290 for (; trySkipToken(AsmToken::Comma); ) { 2291 RegisterKind NextRegKind; 2292 unsigned NextReg, NextRegNum, NextRegWidth; 2293 2294 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2295 Tokens)) 2296 return AMDGPU::NoRegister; 2297 if (NextRegWidth != 1) 2298 return AMDGPU::NoRegister; 2299 if (NextRegKind != RegKind) 2300 return AMDGPU::NoRegister; 2301 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2302 return AMDGPU::NoRegister; 2303 } 2304 2305 if (!trySkipToken(AsmToken::RBrac)) 2306 return AMDGPU::NoRegister; 2307 2308 if (isRegularReg(RegKind)) 2309 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2310 2311 return Reg; 2312 } 2313 2314 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2315 unsigned &RegNum, unsigned &RegWidth, 2316 SmallVectorImpl<AsmToken> &Tokens) { 2317 Reg = AMDGPU::NoRegister; 2318 2319 if (isToken(AsmToken::Identifier)) { 2320 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2321 if (Reg == AMDGPU::NoRegister) 2322 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2323 } else { 2324 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2325 } 2326 2327 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2328 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2329 } 2330 2331 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2332 unsigned &RegNum, unsigned &RegWidth, 2333 bool RestoreOnFailure) { 2334 Reg = AMDGPU::NoRegister; 2335 2336 SmallVector<AsmToken, 1> Tokens; 2337 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2338 if (RestoreOnFailure) { 2339 while (!Tokens.empty()) { 2340 getLexer().UnLex(Tokens.pop_back_val()); 2341 } 2342 } 2343 return true; 2344 } 2345 return false; 2346 } 2347 2348 Optional<StringRef> 2349 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2350 switch (RegKind) { 2351 case IS_VGPR: 2352 return StringRef(".amdgcn.next_free_vgpr"); 2353 case IS_SGPR: 2354 return StringRef(".amdgcn.next_free_sgpr"); 2355 default: 2356 return None; 2357 } 2358 } 2359 2360 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2361 auto SymbolName = getGprCountSymbolName(RegKind); 2362 assert(SymbolName && "initializing invalid register kind"); 2363 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2364 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2365 } 2366 2367 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2368 unsigned DwordRegIndex, 2369 unsigned RegWidth) { 2370 // Symbols are only defined for GCN targets 2371 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2372 return true; 2373 2374 auto SymbolName = getGprCountSymbolName(RegKind); 2375 if (!SymbolName) 2376 return true; 2377 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2378 2379 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2380 int64_t OldCount; 2381 2382 if (!Sym->isVariable()) 2383 return !Error(getParser().getTok().getLoc(), 2384 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2385 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2386 return !Error( 2387 getParser().getTok().getLoc(), 2388 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2389 2390 if (OldCount <= NewMax) 2391 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2392 2393 return true; 2394 } 2395 2396 std::unique_ptr<AMDGPUOperand> 2397 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2398 const auto &Tok = Parser.getTok(); 2399 SMLoc StartLoc = Tok.getLoc(); 2400 SMLoc EndLoc = Tok.getEndLoc(); 2401 RegisterKind RegKind; 2402 unsigned Reg, RegNum, RegWidth; 2403 2404 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2405 //FIXME: improve error messages (bug 41303). 2406 Error(StartLoc, "not a valid operand."); 2407 return nullptr; 2408 } 2409 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2410 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2411 return nullptr; 2412 } else 2413 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2414 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2415 } 2416 2417 OperandMatchResultTy 2418 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2419 // TODO: add syntactic sugar for 1/(2*PI) 2420 2421 assert(!isRegister()); 2422 assert(!isModifier()); 2423 2424 const auto& Tok = getToken(); 2425 const auto& NextTok = peekToken(); 2426 bool IsReal = Tok.is(AsmToken::Real); 2427 SMLoc S = getLoc(); 2428 bool Negate = false; 2429 2430 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2431 lex(); 2432 IsReal = true; 2433 Negate = true; 2434 } 2435 2436 if (IsReal) { 2437 // Floating-point expressions are not supported. 2438 // Can only allow floating-point literals with an 2439 // optional sign. 2440 2441 StringRef Num = getTokenStr(); 2442 lex(); 2443 2444 APFloat RealVal(APFloat::IEEEdouble()); 2445 auto roundMode = APFloat::rmNearestTiesToEven; 2446 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2447 return MatchOperand_ParseFail; 2448 } 2449 if (Negate) 2450 RealVal.changeSign(); 2451 2452 Operands.push_back( 2453 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2454 AMDGPUOperand::ImmTyNone, true)); 2455 2456 return MatchOperand_Success; 2457 2458 } else { 2459 int64_t IntVal; 2460 const MCExpr *Expr; 2461 SMLoc S = getLoc(); 2462 2463 if (HasSP3AbsModifier) { 2464 // This is a workaround for handling expressions 2465 // as arguments of SP3 'abs' modifier, for example: 2466 // |1.0| 2467 // |-1| 2468 // |1+x| 2469 // This syntax is not compatible with syntax of standard 2470 // MC expressions (due to the trailing '|'). 2471 SMLoc EndLoc; 2472 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2473 return MatchOperand_ParseFail; 2474 } else { 2475 if (Parser.parseExpression(Expr)) 2476 return MatchOperand_ParseFail; 2477 } 2478 2479 if (Expr->evaluateAsAbsolute(IntVal)) { 2480 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2481 } else { 2482 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2483 } 2484 2485 return MatchOperand_Success; 2486 } 2487 2488 return MatchOperand_NoMatch; 2489 } 2490 2491 OperandMatchResultTy 2492 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2493 if (!isRegister()) 2494 return MatchOperand_NoMatch; 2495 2496 if (auto R = parseRegister()) { 2497 assert(R->isReg()); 2498 Operands.push_back(std::move(R)); 2499 return MatchOperand_Success; 2500 } 2501 return MatchOperand_ParseFail; 2502 } 2503 2504 OperandMatchResultTy 2505 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2506 auto res = parseReg(Operands); 2507 if (res != MatchOperand_NoMatch) { 2508 return res; 2509 } else if (isModifier()) { 2510 return MatchOperand_NoMatch; 2511 } else { 2512 return parseImm(Operands, HasSP3AbsMod); 2513 } 2514 } 2515 2516 bool 2517 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2518 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2519 const auto &str = Token.getString(); 2520 return str == "abs" || str == "neg" || str == "sext"; 2521 } 2522 return false; 2523 } 2524 2525 bool 2526 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2527 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2528 } 2529 2530 bool 2531 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2532 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2533 } 2534 2535 bool 2536 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2537 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2538 } 2539 2540 // Check if this is an operand modifier or an opcode modifier 2541 // which may look like an expression but it is not. We should 2542 // avoid parsing these modifiers as expressions. Currently 2543 // recognized sequences are: 2544 // |...| 2545 // abs(...) 2546 // neg(...) 2547 // sext(...) 2548 // -reg 2549 // -|...| 2550 // -abs(...) 2551 // name:... 2552 // Note that simple opcode modifiers like 'gds' may be parsed as 2553 // expressions; this is a special case. See getExpressionAsToken. 2554 // 2555 bool 2556 AMDGPUAsmParser::isModifier() { 2557 2558 AsmToken Tok = getToken(); 2559 AsmToken NextToken[2]; 2560 peekTokens(NextToken); 2561 2562 return isOperandModifier(Tok, NextToken[0]) || 2563 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2564 isOpcodeModifierWithVal(Tok, NextToken[0]); 2565 } 2566 2567 // Check if the current token is an SP3 'neg' modifier. 2568 // Currently this modifier is allowed in the following context: 2569 // 2570 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2571 // 2. Before an 'abs' modifier: -abs(...) 2572 // 3. Before an SP3 'abs' modifier: -|...| 2573 // 2574 // In all other cases "-" is handled as a part 2575 // of an expression that follows the sign. 2576 // 2577 // Note: When "-" is followed by an integer literal, 2578 // this is interpreted as integer negation rather 2579 // than a floating-point NEG modifier applied to N. 2580 // Beside being contr-intuitive, such use of floating-point 2581 // NEG modifier would have resulted in different meaning 2582 // of integer literals used with VOP1/2/C and VOP3, 2583 // for example: 2584 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2585 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2586 // Negative fp literals with preceding "-" are 2587 // handled likewise for unifomtity 2588 // 2589 bool 2590 AMDGPUAsmParser::parseSP3NegModifier() { 2591 2592 AsmToken NextToken[2]; 2593 peekTokens(NextToken); 2594 2595 if (isToken(AsmToken::Minus) && 2596 (isRegister(NextToken[0], NextToken[1]) || 2597 NextToken[0].is(AsmToken::Pipe) || 2598 isId(NextToken[0], "abs"))) { 2599 lex(); 2600 return true; 2601 } 2602 2603 return false; 2604 } 2605 2606 OperandMatchResultTy 2607 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2608 bool AllowImm) { 2609 bool Neg, SP3Neg; 2610 bool Abs, SP3Abs; 2611 SMLoc Loc; 2612 2613 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2614 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2615 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2616 return MatchOperand_ParseFail; 2617 } 2618 2619 SP3Neg = parseSP3NegModifier(); 2620 2621 Loc = getLoc(); 2622 Neg = trySkipId("neg"); 2623 if (Neg && SP3Neg) { 2624 Error(Loc, "expected register or immediate"); 2625 return MatchOperand_ParseFail; 2626 } 2627 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2628 return MatchOperand_ParseFail; 2629 2630 Abs = trySkipId("abs"); 2631 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2632 return MatchOperand_ParseFail; 2633 2634 Loc = getLoc(); 2635 SP3Abs = trySkipToken(AsmToken::Pipe); 2636 if (Abs && SP3Abs) { 2637 Error(Loc, "expected register or immediate"); 2638 return MatchOperand_ParseFail; 2639 } 2640 2641 OperandMatchResultTy Res; 2642 if (AllowImm) { 2643 Res = parseRegOrImm(Operands, SP3Abs); 2644 } else { 2645 Res = parseReg(Operands); 2646 } 2647 if (Res != MatchOperand_Success) { 2648 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2649 } 2650 2651 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2652 return MatchOperand_ParseFail; 2653 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2654 return MatchOperand_ParseFail; 2655 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2656 return MatchOperand_ParseFail; 2657 2658 AMDGPUOperand::Modifiers Mods; 2659 Mods.Abs = Abs || SP3Abs; 2660 Mods.Neg = Neg || SP3Neg; 2661 2662 if (Mods.hasFPModifiers()) { 2663 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2664 if (Op.isExpr()) { 2665 Error(Op.getStartLoc(), "expected an absolute expression"); 2666 return MatchOperand_ParseFail; 2667 } 2668 Op.setModifiers(Mods); 2669 } 2670 return MatchOperand_Success; 2671 } 2672 2673 OperandMatchResultTy 2674 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2675 bool AllowImm) { 2676 bool Sext = trySkipId("sext"); 2677 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2678 return MatchOperand_ParseFail; 2679 2680 OperandMatchResultTy Res; 2681 if (AllowImm) { 2682 Res = parseRegOrImm(Operands); 2683 } else { 2684 Res = parseReg(Operands); 2685 } 2686 if (Res != MatchOperand_Success) { 2687 return Sext? MatchOperand_ParseFail : Res; 2688 } 2689 2690 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2691 return MatchOperand_ParseFail; 2692 2693 AMDGPUOperand::Modifiers Mods; 2694 Mods.Sext = Sext; 2695 2696 if (Mods.hasIntModifiers()) { 2697 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2698 if (Op.isExpr()) { 2699 Error(Op.getStartLoc(), "expected an absolute expression"); 2700 return MatchOperand_ParseFail; 2701 } 2702 Op.setModifiers(Mods); 2703 } 2704 2705 return MatchOperand_Success; 2706 } 2707 2708 OperandMatchResultTy 2709 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2710 return parseRegOrImmWithFPInputMods(Operands, false); 2711 } 2712 2713 OperandMatchResultTy 2714 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2715 return parseRegOrImmWithIntInputMods(Operands, false); 2716 } 2717 2718 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2719 auto Loc = getLoc(); 2720 if (trySkipId("off")) { 2721 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2722 AMDGPUOperand::ImmTyOff, false)); 2723 return MatchOperand_Success; 2724 } 2725 2726 if (!isRegister()) 2727 return MatchOperand_NoMatch; 2728 2729 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2730 if (Reg) { 2731 Operands.push_back(std::move(Reg)); 2732 return MatchOperand_Success; 2733 } 2734 2735 return MatchOperand_ParseFail; 2736 2737 } 2738 2739 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2740 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2741 2742 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2743 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2744 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2745 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2746 return Match_InvalidOperand; 2747 2748 if ((TSFlags & SIInstrFlags::VOP3) && 2749 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2750 getForcedEncodingSize() != 64) 2751 return Match_PreferE32; 2752 2753 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2754 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2755 // v_mac_f32/16 allow only dst_sel == DWORD; 2756 auto OpNum = 2757 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2758 const auto &Op = Inst.getOperand(OpNum); 2759 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2760 return Match_InvalidOperand; 2761 } 2762 } 2763 2764 return Match_Success; 2765 } 2766 2767 // What asm variants we should check 2768 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2769 if (getForcedEncodingSize() == 32) { 2770 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2771 return makeArrayRef(Variants); 2772 } 2773 2774 if (isForcedVOP3()) { 2775 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2776 return makeArrayRef(Variants); 2777 } 2778 2779 if (isForcedSDWA()) { 2780 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2781 AMDGPUAsmVariants::SDWA9}; 2782 return makeArrayRef(Variants); 2783 } 2784 2785 if (isForcedDPP()) { 2786 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2787 return makeArrayRef(Variants); 2788 } 2789 2790 static const unsigned Variants[] = { 2791 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2792 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2793 }; 2794 2795 return makeArrayRef(Variants); 2796 } 2797 2798 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2799 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2800 const unsigned Num = Desc.getNumImplicitUses(); 2801 for (unsigned i = 0; i < Num; ++i) { 2802 unsigned Reg = Desc.ImplicitUses[i]; 2803 switch (Reg) { 2804 case AMDGPU::FLAT_SCR: 2805 case AMDGPU::VCC: 2806 case AMDGPU::VCC_LO: 2807 case AMDGPU::VCC_HI: 2808 case AMDGPU::M0: 2809 return Reg; 2810 default: 2811 break; 2812 } 2813 } 2814 return AMDGPU::NoRegister; 2815 } 2816 2817 // NB: This code is correct only when used to check constant 2818 // bus limitations because GFX7 support no f16 inline constants. 2819 // Note that there are no cases when a GFX7 opcode violates 2820 // constant bus limitations due to the use of an f16 constant. 2821 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2822 unsigned OpIdx) const { 2823 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2824 2825 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2826 return false; 2827 } 2828 2829 const MCOperand &MO = Inst.getOperand(OpIdx); 2830 2831 int64_t Val = MO.getImm(); 2832 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2833 2834 switch (OpSize) { // expected operand size 2835 case 8: 2836 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2837 case 4: 2838 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2839 case 2: { 2840 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2841 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2842 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2843 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2844 return AMDGPU::isInlinableIntLiteral(Val); 2845 2846 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2847 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2848 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2849 return AMDGPU::isInlinableIntLiteralV216(Val); 2850 2851 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2852 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2853 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2854 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2855 2856 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2857 } 2858 default: 2859 llvm_unreachable("invalid operand size"); 2860 } 2861 } 2862 2863 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2864 if (!isGFX10()) 2865 return 1; 2866 2867 switch (Opcode) { 2868 // 64-bit shift instructions can use only one scalar value input 2869 case AMDGPU::V_LSHLREV_B64: 2870 case AMDGPU::V_LSHLREV_B64_gfx10: 2871 case AMDGPU::V_LSHL_B64: 2872 case AMDGPU::V_LSHRREV_B64: 2873 case AMDGPU::V_LSHRREV_B64_gfx10: 2874 case AMDGPU::V_LSHR_B64: 2875 case AMDGPU::V_ASHRREV_I64: 2876 case AMDGPU::V_ASHRREV_I64_gfx10: 2877 case AMDGPU::V_ASHR_I64: 2878 return 1; 2879 default: 2880 return 2; 2881 } 2882 } 2883 2884 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2885 const MCOperand &MO = Inst.getOperand(OpIdx); 2886 if (MO.isImm()) { 2887 return !isInlineConstant(Inst, OpIdx); 2888 } else if (MO.isReg()) { 2889 auto Reg = MO.getReg(); 2890 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2891 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2892 } else { 2893 return true; 2894 } 2895 } 2896 2897 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2898 const unsigned Opcode = Inst.getOpcode(); 2899 const MCInstrDesc &Desc = MII.get(Opcode); 2900 unsigned ConstantBusUseCount = 0; 2901 unsigned NumLiterals = 0; 2902 unsigned LiteralSize; 2903 2904 if (Desc.TSFlags & 2905 (SIInstrFlags::VOPC | 2906 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2907 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2908 SIInstrFlags::SDWA)) { 2909 // Check special imm operands (used by madmk, etc) 2910 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2911 ++ConstantBusUseCount; 2912 } 2913 2914 SmallDenseSet<unsigned> SGPRsUsed; 2915 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2916 if (SGPRUsed != AMDGPU::NoRegister) { 2917 SGPRsUsed.insert(SGPRUsed); 2918 ++ConstantBusUseCount; 2919 } 2920 2921 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2922 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2923 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2924 2925 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2926 2927 for (int OpIdx : OpIndices) { 2928 if (OpIdx == -1) break; 2929 2930 const MCOperand &MO = Inst.getOperand(OpIdx); 2931 if (usesConstantBus(Inst, OpIdx)) { 2932 if (MO.isReg()) { 2933 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2934 // Pairs of registers with a partial intersections like these 2935 // s0, s[0:1] 2936 // flat_scratch_lo, flat_scratch 2937 // flat_scratch_lo, flat_scratch_hi 2938 // are theoretically valid but they are disabled anyway. 2939 // Note that this code mimics SIInstrInfo::verifyInstruction 2940 if (!SGPRsUsed.count(Reg)) { 2941 SGPRsUsed.insert(Reg); 2942 ++ConstantBusUseCount; 2943 } 2944 } else { // Expression or a literal 2945 2946 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2947 continue; // special operand like VINTERP attr_chan 2948 2949 // An instruction may use only one literal. 2950 // This has been validated on the previous step. 2951 // See validateVOP3Literal. 2952 // This literal may be used as more than one operand. 2953 // If all these operands are of the same size, 2954 // this literal counts as one scalar value. 2955 // Otherwise it counts as 2 scalar values. 2956 // See "GFX10 Shader Programming", section 3.6.2.3. 2957 2958 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2959 if (Size < 4) Size = 4; 2960 2961 if (NumLiterals == 0) { 2962 NumLiterals = 1; 2963 LiteralSize = Size; 2964 } else if (LiteralSize != Size) { 2965 NumLiterals = 2; 2966 } 2967 } 2968 } 2969 } 2970 } 2971 ConstantBusUseCount += NumLiterals; 2972 2973 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2974 } 2975 2976 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2977 const unsigned Opcode = Inst.getOpcode(); 2978 const MCInstrDesc &Desc = MII.get(Opcode); 2979 2980 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2981 if (DstIdx == -1 || 2982 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2983 return true; 2984 } 2985 2986 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2987 2988 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2989 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2990 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2991 2992 assert(DstIdx != -1); 2993 const MCOperand &Dst = Inst.getOperand(DstIdx); 2994 assert(Dst.isReg()); 2995 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2996 2997 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2998 2999 for (int SrcIdx : SrcIndices) { 3000 if (SrcIdx == -1) break; 3001 const MCOperand &Src = Inst.getOperand(SrcIdx); 3002 if (Src.isReg()) { 3003 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3004 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3005 return false; 3006 } 3007 } 3008 } 3009 3010 return true; 3011 } 3012 3013 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3014 3015 const unsigned Opc = Inst.getOpcode(); 3016 const MCInstrDesc &Desc = MII.get(Opc); 3017 3018 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3019 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3020 assert(ClampIdx != -1); 3021 return Inst.getOperand(ClampIdx).getImm() == 0; 3022 } 3023 3024 return true; 3025 } 3026 3027 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3028 3029 const unsigned Opc = Inst.getOpcode(); 3030 const MCInstrDesc &Desc = MII.get(Opc); 3031 3032 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3033 return true; 3034 3035 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3036 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3037 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3038 3039 assert(VDataIdx != -1); 3040 assert(DMaskIdx != -1); 3041 assert(TFEIdx != -1); 3042 3043 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3044 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3045 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3046 if (DMask == 0) 3047 DMask = 1; 3048 3049 unsigned DataSize = 3050 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3051 if (hasPackedD16()) { 3052 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3053 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3054 DataSize = (DataSize + 1) / 2; 3055 } 3056 3057 return (VDataSize / 4) == DataSize + TFESize; 3058 } 3059 3060 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3061 const unsigned Opc = Inst.getOpcode(); 3062 const MCInstrDesc &Desc = MII.get(Opc); 3063 3064 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3065 return true; 3066 3067 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3068 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3069 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3070 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3071 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3072 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3073 3074 assert(VAddr0Idx != -1); 3075 assert(SrsrcIdx != -1); 3076 assert(DimIdx != -1); 3077 assert(SrsrcIdx > VAddr0Idx); 3078 3079 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3080 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3081 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3082 unsigned VAddrSize = 3083 IsNSA ? SrsrcIdx - VAddr0Idx 3084 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3085 3086 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3087 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3088 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3089 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3090 if (!IsNSA) { 3091 if (AddrSize > 8) 3092 AddrSize = 16; 3093 else if (AddrSize > 4) 3094 AddrSize = 8; 3095 } 3096 3097 return VAddrSize == AddrSize; 3098 } 3099 3100 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3101 3102 const unsigned Opc = Inst.getOpcode(); 3103 const MCInstrDesc &Desc = MII.get(Opc); 3104 3105 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3106 return true; 3107 if (!Desc.mayLoad() || !Desc.mayStore()) 3108 return true; // Not atomic 3109 3110 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3111 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3112 3113 // This is an incomplete check because image_atomic_cmpswap 3114 // may only use 0x3 and 0xf while other atomic operations 3115 // may use 0x1 and 0x3. However these limitations are 3116 // verified when we check that dmask matches dst size. 3117 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3118 } 3119 3120 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3121 3122 const unsigned Opc = Inst.getOpcode(); 3123 const MCInstrDesc &Desc = MII.get(Opc); 3124 3125 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3126 return true; 3127 3128 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3129 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3130 3131 // GATHER4 instructions use dmask in a different fashion compared to 3132 // other MIMG instructions. The only useful DMASK values are 3133 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3134 // (red,red,red,red) etc.) The ISA document doesn't mention 3135 // this. 3136 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3137 } 3138 3139 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3140 { 3141 switch (Opcode) { 3142 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3143 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3144 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3145 return true; 3146 default: 3147 return false; 3148 } 3149 } 3150 3151 // movrels* opcodes should only allow VGPRS as src0. 3152 // This is specified in .td description for vop1/vop3, 3153 // but sdwa is handled differently. See isSDWAOperand. 3154 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3155 3156 const unsigned Opc = Inst.getOpcode(); 3157 const MCInstrDesc &Desc = MII.get(Opc); 3158 3159 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3160 return true; 3161 3162 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3163 assert(Src0Idx != -1); 3164 3165 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3166 if (!Src0.isReg()) 3167 return false; 3168 3169 auto Reg = Src0.getReg(); 3170 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3171 return !isSGPR(mc2PseudoReg(Reg), TRI); 3172 } 3173 3174 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3175 3176 const unsigned Opc = Inst.getOpcode(); 3177 3178 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3179 return true; 3180 3181 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3182 assert(Src0Idx != -1); 3183 3184 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3185 if (!Src0.isReg()) 3186 return true; 3187 3188 auto Reg = Src0.getReg(); 3189 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3190 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3191 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3192 return false; 3193 } 3194 3195 return true; 3196 } 3197 3198 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3199 3200 const unsigned Opc = Inst.getOpcode(); 3201 const MCInstrDesc &Desc = MII.get(Opc); 3202 3203 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3204 return true; 3205 3206 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3207 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3208 if (isCI() || isSI()) 3209 return false; 3210 } 3211 3212 return true; 3213 } 3214 3215 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3216 const unsigned Opc = Inst.getOpcode(); 3217 const MCInstrDesc &Desc = MII.get(Opc); 3218 3219 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3220 return true; 3221 3222 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3223 if (DimIdx < 0) 3224 return true; 3225 3226 long Imm = Inst.getOperand(DimIdx).getImm(); 3227 if (Imm < 0 || Imm >= 8) 3228 return false; 3229 3230 return true; 3231 } 3232 3233 static bool IsRevOpcode(const unsigned Opcode) 3234 { 3235 switch (Opcode) { 3236 case AMDGPU::V_SUBREV_F32_e32: 3237 case AMDGPU::V_SUBREV_F32_e64: 3238 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3239 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3240 case AMDGPU::V_SUBREV_F32_e32_vi: 3241 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3242 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3243 case AMDGPU::V_SUBREV_F32_e64_vi: 3244 3245 case AMDGPU::V_SUBREV_CO_U32_e32: 3246 case AMDGPU::V_SUBREV_CO_U32_e64: 3247 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3248 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3249 3250 case AMDGPU::V_SUBBREV_U32_e32: 3251 case AMDGPU::V_SUBBREV_U32_e64: 3252 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3253 case AMDGPU::V_SUBBREV_U32_e32_vi: 3254 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3255 case AMDGPU::V_SUBBREV_U32_e64_vi: 3256 3257 case AMDGPU::V_SUBREV_U32_e32: 3258 case AMDGPU::V_SUBREV_U32_e64: 3259 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3260 case AMDGPU::V_SUBREV_U32_e32_vi: 3261 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3262 case AMDGPU::V_SUBREV_U32_e64_vi: 3263 3264 case AMDGPU::V_SUBREV_F16_e32: 3265 case AMDGPU::V_SUBREV_F16_e64: 3266 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3267 case AMDGPU::V_SUBREV_F16_e32_vi: 3268 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3269 case AMDGPU::V_SUBREV_F16_e64_vi: 3270 3271 case AMDGPU::V_SUBREV_U16_e32: 3272 case AMDGPU::V_SUBREV_U16_e64: 3273 case AMDGPU::V_SUBREV_U16_e32_vi: 3274 case AMDGPU::V_SUBREV_U16_e64_vi: 3275 3276 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3277 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3278 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3279 3280 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3281 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3282 3283 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3284 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3285 3286 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3287 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3288 3289 case AMDGPU::V_LSHRREV_B32_e32: 3290 case AMDGPU::V_LSHRREV_B32_e64: 3291 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3292 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3293 case AMDGPU::V_LSHRREV_B32_e32_vi: 3294 case AMDGPU::V_LSHRREV_B32_e64_vi: 3295 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3296 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3297 3298 case AMDGPU::V_ASHRREV_I32_e32: 3299 case AMDGPU::V_ASHRREV_I32_e64: 3300 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3301 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3302 case AMDGPU::V_ASHRREV_I32_e32_vi: 3303 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3304 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3305 case AMDGPU::V_ASHRREV_I32_e64_vi: 3306 3307 case AMDGPU::V_LSHLREV_B32_e32: 3308 case AMDGPU::V_LSHLREV_B32_e64: 3309 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3310 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3311 case AMDGPU::V_LSHLREV_B32_e32_vi: 3312 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3313 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3314 case AMDGPU::V_LSHLREV_B32_e64_vi: 3315 3316 case AMDGPU::V_LSHLREV_B16_e32: 3317 case AMDGPU::V_LSHLREV_B16_e64: 3318 case AMDGPU::V_LSHLREV_B16_e32_vi: 3319 case AMDGPU::V_LSHLREV_B16_e64_vi: 3320 case AMDGPU::V_LSHLREV_B16_gfx10: 3321 3322 case AMDGPU::V_LSHRREV_B16_e32: 3323 case AMDGPU::V_LSHRREV_B16_e64: 3324 case AMDGPU::V_LSHRREV_B16_e32_vi: 3325 case AMDGPU::V_LSHRREV_B16_e64_vi: 3326 case AMDGPU::V_LSHRREV_B16_gfx10: 3327 3328 case AMDGPU::V_ASHRREV_I16_e32: 3329 case AMDGPU::V_ASHRREV_I16_e64: 3330 case AMDGPU::V_ASHRREV_I16_e32_vi: 3331 case AMDGPU::V_ASHRREV_I16_e64_vi: 3332 case AMDGPU::V_ASHRREV_I16_gfx10: 3333 3334 case AMDGPU::V_LSHLREV_B64: 3335 case AMDGPU::V_LSHLREV_B64_gfx10: 3336 case AMDGPU::V_LSHLREV_B64_vi: 3337 3338 case AMDGPU::V_LSHRREV_B64: 3339 case AMDGPU::V_LSHRREV_B64_gfx10: 3340 case AMDGPU::V_LSHRREV_B64_vi: 3341 3342 case AMDGPU::V_ASHRREV_I64: 3343 case AMDGPU::V_ASHRREV_I64_gfx10: 3344 case AMDGPU::V_ASHRREV_I64_vi: 3345 3346 case AMDGPU::V_PK_LSHLREV_B16: 3347 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3348 case AMDGPU::V_PK_LSHLREV_B16_vi: 3349 3350 case AMDGPU::V_PK_LSHRREV_B16: 3351 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3352 case AMDGPU::V_PK_LSHRREV_B16_vi: 3353 case AMDGPU::V_PK_ASHRREV_I16: 3354 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3355 case AMDGPU::V_PK_ASHRREV_I16_vi: 3356 return true; 3357 default: 3358 return false; 3359 } 3360 } 3361 3362 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3363 3364 using namespace SIInstrFlags; 3365 const unsigned Opcode = Inst.getOpcode(); 3366 const MCInstrDesc &Desc = MII.get(Opcode); 3367 3368 // lds_direct register is defined so that it can be used 3369 // with 9-bit operands only. Ignore encodings which do not accept these. 3370 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3371 return true; 3372 3373 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3374 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3375 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3376 3377 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3378 3379 // lds_direct cannot be specified as either src1 or src2. 3380 for (int SrcIdx : SrcIndices) { 3381 if (SrcIdx == -1) break; 3382 const MCOperand &Src = Inst.getOperand(SrcIdx); 3383 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3384 return false; 3385 } 3386 } 3387 3388 if (Src0Idx == -1) 3389 return true; 3390 3391 const MCOperand &Src = Inst.getOperand(Src0Idx); 3392 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3393 return true; 3394 3395 // lds_direct is specified as src0. Check additional limitations. 3396 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3397 } 3398 3399 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3400 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3401 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3402 if (Op.isFlatOffset()) 3403 return Op.getStartLoc(); 3404 } 3405 return getLoc(); 3406 } 3407 3408 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3409 const OperandVector &Operands) { 3410 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3411 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3412 return true; 3413 3414 auto Opcode = Inst.getOpcode(); 3415 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3416 assert(OpNum != -1); 3417 3418 const auto &Op = Inst.getOperand(OpNum); 3419 if (!hasFlatOffsets() && Op.getImm() != 0) { 3420 Error(getFlatOffsetLoc(Operands), 3421 "flat offset modifier is not supported on this GPU"); 3422 return false; 3423 } 3424 3425 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3426 // For FLAT segment the offset must be positive; 3427 // MSB is ignored and forced to zero. 3428 unsigned OffsetSize = isGFX9() ? 13 : 12; 3429 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3430 if (!isIntN(OffsetSize, Op.getImm())) { 3431 Error(getFlatOffsetLoc(Operands), 3432 isGFX9() ? "expected a 13-bit signed offset" : 3433 "expected a 12-bit signed offset"); 3434 return false; 3435 } 3436 } else { 3437 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3438 Error(getFlatOffsetLoc(Operands), 3439 isGFX9() ? "expected a 12-bit unsigned offset" : 3440 "expected an 11-bit unsigned offset"); 3441 return false; 3442 } 3443 } 3444 3445 return true; 3446 } 3447 3448 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3449 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3451 if (Op.isSMEMOffset()) 3452 return Op.getStartLoc(); 3453 } 3454 return getLoc(); 3455 } 3456 3457 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3458 const OperandVector &Operands) { 3459 if (isCI() || isSI()) 3460 return true; 3461 3462 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3463 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3464 return true; 3465 3466 auto Opcode = Inst.getOpcode(); 3467 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3468 if (OpNum == -1) 3469 return true; 3470 3471 const auto &Op = Inst.getOperand(OpNum); 3472 if (!Op.isImm()) 3473 return true; 3474 3475 uint64_t Offset = Op.getImm(); 3476 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3477 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3478 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3479 return true; 3480 3481 Error(getSMEMOffsetLoc(Operands), 3482 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3483 "expected a 21-bit signed offset"); 3484 3485 return false; 3486 } 3487 3488 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3489 unsigned Opcode = Inst.getOpcode(); 3490 const MCInstrDesc &Desc = MII.get(Opcode); 3491 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3492 return true; 3493 3494 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3495 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3496 3497 const int OpIndices[] = { Src0Idx, Src1Idx }; 3498 3499 unsigned NumExprs = 0; 3500 unsigned NumLiterals = 0; 3501 uint32_t LiteralValue; 3502 3503 for (int OpIdx : OpIndices) { 3504 if (OpIdx == -1) break; 3505 3506 const MCOperand &MO = Inst.getOperand(OpIdx); 3507 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3508 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3509 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3510 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3511 if (NumLiterals == 0 || LiteralValue != Value) { 3512 LiteralValue = Value; 3513 ++NumLiterals; 3514 } 3515 } else if (MO.isExpr()) { 3516 ++NumExprs; 3517 } 3518 } 3519 } 3520 3521 return NumLiterals + NumExprs <= 1; 3522 } 3523 3524 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3525 const unsigned Opc = Inst.getOpcode(); 3526 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3527 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3528 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3529 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3530 3531 if (OpSel & ~3) 3532 return false; 3533 } 3534 return true; 3535 } 3536 3537 // Check if VCC register matches wavefront size 3538 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3539 auto FB = getFeatureBits(); 3540 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3541 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3542 } 3543 3544 // VOP3 literal is only allowed in GFX10+ and only one can be used 3545 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3546 unsigned Opcode = Inst.getOpcode(); 3547 const MCInstrDesc &Desc = MII.get(Opcode); 3548 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3549 return true; 3550 3551 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3552 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3553 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3554 3555 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3556 3557 unsigned NumExprs = 0; 3558 unsigned NumLiterals = 0; 3559 uint32_t LiteralValue; 3560 3561 for (int OpIdx : OpIndices) { 3562 if (OpIdx == -1) break; 3563 3564 const MCOperand &MO = Inst.getOperand(OpIdx); 3565 if (!MO.isImm() && !MO.isExpr()) 3566 continue; 3567 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3568 continue; 3569 3570 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3571 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3572 return false; 3573 3574 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3575 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3576 if (NumLiterals == 0 || LiteralValue != Value) { 3577 LiteralValue = Value; 3578 ++NumLiterals; 3579 } 3580 } else if (MO.isExpr()) { 3581 ++NumExprs; 3582 } 3583 } 3584 NumLiterals += NumExprs; 3585 3586 return !NumLiterals || 3587 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3588 } 3589 3590 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3591 const SMLoc &IDLoc, 3592 const OperandVector &Operands) { 3593 if (!validateLdsDirect(Inst)) { 3594 Error(IDLoc, 3595 "invalid use of lds_direct"); 3596 return false; 3597 } 3598 if (!validateSOPLiteral(Inst)) { 3599 Error(IDLoc, 3600 "only one literal operand is allowed"); 3601 return false; 3602 } 3603 if (!validateVOP3Literal(Inst)) { 3604 Error(IDLoc, 3605 "invalid literal operand"); 3606 return false; 3607 } 3608 if (!validateConstantBusLimitations(Inst)) { 3609 Error(IDLoc, 3610 "invalid operand (violates constant bus restrictions)"); 3611 return false; 3612 } 3613 if (!validateEarlyClobberLimitations(Inst)) { 3614 Error(IDLoc, 3615 "destination must be different than all sources"); 3616 return false; 3617 } 3618 if (!validateIntClampSupported(Inst)) { 3619 Error(IDLoc, 3620 "integer clamping is not supported on this GPU"); 3621 return false; 3622 } 3623 if (!validateOpSel(Inst)) { 3624 Error(IDLoc, 3625 "invalid op_sel operand"); 3626 return false; 3627 } 3628 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3629 if (!validateMIMGD16(Inst)) { 3630 Error(IDLoc, 3631 "d16 modifier is not supported on this GPU"); 3632 return false; 3633 } 3634 if (!validateMIMGDim(Inst)) { 3635 Error(IDLoc, "dim modifier is required on this GPU"); 3636 return false; 3637 } 3638 if (!validateMIMGDataSize(Inst)) { 3639 Error(IDLoc, 3640 "image data size does not match dmask and tfe"); 3641 return false; 3642 } 3643 if (!validateMIMGAddrSize(Inst)) { 3644 Error(IDLoc, 3645 "image address size does not match dim and a16"); 3646 return false; 3647 } 3648 if (!validateMIMGAtomicDMask(Inst)) { 3649 Error(IDLoc, 3650 "invalid atomic image dmask"); 3651 return false; 3652 } 3653 if (!validateMIMGGatherDMask(Inst)) { 3654 Error(IDLoc, 3655 "invalid image_gather dmask: only one bit must be set"); 3656 return false; 3657 } 3658 if (!validateMovrels(Inst)) { 3659 Error(IDLoc, "source operand must be a VGPR"); 3660 return false; 3661 } 3662 if (!validateFlatOffset(Inst, Operands)) { 3663 return false; 3664 } 3665 if (!validateSMEMOffset(Inst, Operands)) { 3666 return false; 3667 } 3668 if (!validateMAIAccWrite(Inst)) { 3669 return false; 3670 } 3671 3672 return true; 3673 } 3674 3675 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3676 const FeatureBitset &FBS, 3677 unsigned VariantID = 0); 3678 3679 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3680 OperandVector &Operands, 3681 MCStreamer &Out, 3682 uint64_t &ErrorInfo, 3683 bool MatchingInlineAsm) { 3684 MCInst Inst; 3685 unsigned Result = Match_Success; 3686 for (auto Variant : getMatchedVariants()) { 3687 uint64_t EI; 3688 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3689 Variant); 3690 // We order match statuses from least to most specific. We use most specific 3691 // status as resulting 3692 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3693 if ((R == Match_Success) || 3694 (R == Match_PreferE32) || 3695 (R == Match_MissingFeature && Result != Match_PreferE32) || 3696 (R == Match_InvalidOperand && Result != Match_MissingFeature 3697 && Result != Match_PreferE32) || 3698 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3699 && Result != Match_MissingFeature 3700 && Result != Match_PreferE32)) { 3701 Result = R; 3702 ErrorInfo = EI; 3703 } 3704 if (R == Match_Success) 3705 break; 3706 } 3707 3708 switch (Result) { 3709 default: break; 3710 case Match_Success: 3711 if (!validateInstruction(Inst, IDLoc, Operands)) { 3712 return true; 3713 } 3714 Inst.setLoc(IDLoc); 3715 Out.emitInstruction(Inst, getSTI()); 3716 return false; 3717 3718 case Match_MissingFeature: 3719 return Error(IDLoc, "instruction not supported on this GPU"); 3720 3721 case Match_MnemonicFail: { 3722 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3723 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3724 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3725 return Error(IDLoc, "invalid instruction" + Suggestion, 3726 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3727 } 3728 3729 case Match_InvalidOperand: { 3730 SMLoc ErrorLoc = IDLoc; 3731 if (ErrorInfo != ~0ULL) { 3732 if (ErrorInfo >= Operands.size()) { 3733 return Error(IDLoc, "too few operands for instruction"); 3734 } 3735 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3736 if (ErrorLoc == SMLoc()) 3737 ErrorLoc = IDLoc; 3738 } 3739 return Error(ErrorLoc, "invalid operand for instruction"); 3740 } 3741 3742 case Match_PreferE32: 3743 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3744 "should be encoded as e32"); 3745 } 3746 llvm_unreachable("Implement any new match types added!"); 3747 } 3748 3749 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3750 int64_t Tmp = -1; 3751 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3752 return true; 3753 } 3754 if (getParser().parseAbsoluteExpression(Tmp)) { 3755 return true; 3756 } 3757 Ret = static_cast<uint32_t>(Tmp); 3758 return false; 3759 } 3760 3761 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3762 uint32_t &Minor) { 3763 if (ParseAsAbsoluteExpression(Major)) 3764 return TokError("invalid major version"); 3765 3766 if (getLexer().isNot(AsmToken::Comma)) 3767 return TokError("minor version number required, comma expected"); 3768 Lex(); 3769 3770 if (ParseAsAbsoluteExpression(Minor)) 3771 return TokError("invalid minor version"); 3772 3773 return false; 3774 } 3775 3776 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3777 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3778 return TokError("directive only supported for amdgcn architecture"); 3779 3780 std::string Target; 3781 3782 SMLoc TargetStart = getTok().getLoc(); 3783 if (getParser().parseEscapedString(Target)) 3784 return true; 3785 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3786 3787 std::string ExpectedTarget; 3788 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3789 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3790 3791 if (Target != ExpectedTargetOS.str()) 3792 return getParser().Error(TargetRange.Start, "target must match options", 3793 TargetRange); 3794 3795 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3796 return false; 3797 } 3798 3799 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3800 return getParser().Error(Range.Start, "value out of range", Range); 3801 } 3802 3803 bool AMDGPUAsmParser::calculateGPRBlocks( 3804 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3805 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3806 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3807 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3808 // TODO(scott.linder): These calculations are duplicated from 3809 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3810 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3811 3812 unsigned NumVGPRs = NextFreeVGPR; 3813 unsigned NumSGPRs = NextFreeSGPR; 3814 3815 if (Version.Major >= 10) 3816 NumSGPRs = 0; 3817 else { 3818 unsigned MaxAddressableNumSGPRs = 3819 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3820 3821 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3822 NumSGPRs > MaxAddressableNumSGPRs) 3823 return OutOfRangeError(SGPRRange); 3824 3825 NumSGPRs += 3826 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3827 3828 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3829 NumSGPRs > MaxAddressableNumSGPRs) 3830 return OutOfRangeError(SGPRRange); 3831 3832 if (Features.test(FeatureSGPRInitBug)) 3833 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3834 } 3835 3836 VGPRBlocks = 3837 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3838 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3839 3840 return false; 3841 } 3842 3843 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3844 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3845 return TokError("directive only supported for amdgcn architecture"); 3846 3847 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3848 return TokError("directive only supported for amdhsa OS"); 3849 3850 StringRef KernelName; 3851 if (getParser().parseIdentifier(KernelName)) 3852 return true; 3853 3854 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3855 3856 StringSet<> Seen; 3857 3858 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3859 3860 SMRange VGPRRange; 3861 uint64_t NextFreeVGPR = 0; 3862 SMRange SGPRRange; 3863 uint64_t NextFreeSGPR = 0; 3864 unsigned UserSGPRCount = 0; 3865 bool ReserveVCC = true; 3866 bool ReserveFlatScr = true; 3867 bool ReserveXNACK = hasXNACK(); 3868 Optional<bool> EnableWavefrontSize32; 3869 3870 while (true) { 3871 while (getLexer().is(AsmToken::EndOfStatement)) 3872 Lex(); 3873 3874 if (getLexer().isNot(AsmToken::Identifier)) 3875 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3876 3877 StringRef ID = getTok().getIdentifier(); 3878 SMRange IDRange = getTok().getLocRange(); 3879 Lex(); 3880 3881 if (ID == ".end_amdhsa_kernel") 3882 break; 3883 3884 if (Seen.find(ID) != Seen.end()) 3885 return TokError(".amdhsa_ directives cannot be repeated"); 3886 Seen.insert(ID); 3887 3888 SMLoc ValStart = getTok().getLoc(); 3889 int64_t IVal; 3890 if (getParser().parseAbsoluteExpression(IVal)) 3891 return true; 3892 SMLoc ValEnd = getTok().getLoc(); 3893 SMRange ValRange = SMRange(ValStart, ValEnd); 3894 3895 if (IVal < 0) 3896 return OutOfRangeError(ValRange); 3897 3898 uint64_t Val = IVal; 3899 3900 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3901 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3902 return OutOfRangeError(RANGE); \ 3903 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3904 3905 if (ID == ".amdhsa_group_segment_fixed_size") { 3906 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3907 return OutOfRangeError(ValRange); 3908 KD.group_segment_fixed_size = Val; 3909 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3910 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3911 return OutOfRangeError(ValRange); 3912 KD.private_segment_fixed_size = Val; 3913 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3914 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3915 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3916 Val, ValRange); 3917 if (Val) 3918 UserSGPRCount += 4; 3919 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3920 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3921 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3922 ValRange); 3923 if (Val) 3924 UserSGPRCount += 2; 3925 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3926 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3927 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3928 ValRange); 3929 if (Val) 3930 UserSGPRCount += 2; 3931 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3932 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3933 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3934 Val, ValRange); 3935 if (Val) 3936 UserSGPRCount += 2; 3937 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3938 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3939 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3940 ValRange); 3941 if (Val) 3942 UserSGPRCount += 2; 3943 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3944 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3945 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3946 ValRange); 3947 if (Val) 3948 UserSGPRCount += 2; 3949 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3950 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3951 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3952 Val, ValRange); 3953 if (Val) 3954 UserSGPRCount += 1; 3955 } else if (ID == ".amdhsa_wavefront_size32") { 3956 if (IVersion.Major < 10) 3957 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3958 IDRange); 3959 EnableWavefrontSize32 = Val; 3960 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3961 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3962 Val, ValRange); 3963 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3964 PARSE_BITS_ENTRY( 3965 KD.compute_pgm_rsrc2, 3966 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3967 ValRange); 3968 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3969 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3970 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3971 ValRange); 3972 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3973 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3974 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3975 ValRange); 3976 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3977 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3978 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3979 ValRange); 3980 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3981 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3982 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3983 ValRange); 3984 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3985 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3986 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3987 ValRange); 3988 } else if (ID == ".amdhsa_next_free_vgpr") { 3989 VGPRRange = ValRange; 3990 NextFreeVGPR = Val; 3991 } else if (ID == ".amdhsa_next_free_sgpr") { 3992 SGPRRange = ValRange; 3993 NextFreeSGPR = Val; 3994 } else if (ID == ".amdhsa_reserve_vcc") { 3995 if (!isUInt<1>(Val)) 3996 return OutOfRangeError(ValRange); 3997 ReserveVCC = Val; 3998 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3999 if (IVersion.Major < 7) 4000 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4001 IDRange); 4002 if (!isUInt<1>(Val)) 4003 return OutOfRangeError(ValRange); 4004 ReserveFlatScr = Val; 4005 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4006 if (IVersion.Major < 8) 4007 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4008 IDRange); 4009 if (!isUInt<1>(Val)) 4010 return OutOfRangeError(ValRange); 4011 ReserveXNACK = Val; 4012 } else if (ID == ".amdhsa_float_round_mode_32") { 4013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4014 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4015 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4016 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4017 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4018 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4020 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4021 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4023 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4024 ValRange); 4025 } else if (ID == ".amdhsa_dx10_clamp") { 4026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4027 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4028 } else if (ID == ".amdhsa_ieee_mode") { 4029 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4030 Val, ValRange); 4031 } else if (ID == ".amdhsa_fp16_overflow") { 4032 if (IVersion.Major < 9) 4033 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4034 IDRange); 4035 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4036 ValRange); 4037 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4038 if (IVersion.Major < 10) 4039 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4040 IDRange); 4041 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4042 ValRange); 4043 } else if (ID == ".amdhsa_memory_ordered") { 4044 if (IVersion.Major < 10) 4045 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4046 IDRange); 4047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4048 ValRange); 4049 } else if (ID == ".amdhsa_forward_progress") { 4050 if (IVersion.Major < 10) 4051 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4052 IDRange); 4053 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4054 ValRange); 4055 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4056 PARSE_BITS_ENTRY( 4057 KD.compute_pgm_rsrc2, 4058 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4059 ValRange); 4060 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4061 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4062 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4063 Val, ValRange); 4064 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4065 PARSE_BITS_ENTRY( 4066 KD.compute_pgm_rsrc2, 4067 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4068 ValRange); 4069 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4070 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4071 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4072 Val, ValRange); 4073 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4074 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4075 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4076 Val, ValRange); 4077 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4079 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4080 Val, ValRange); 4081 } else if (ID == ".amdhsa_exception_int_div_zero") { 4082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4083 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4084 Val, ValRange); 4085 } else { 4086 return getParser().Error(IDRange.Start, 4087 "unknown .amdhsa_kernel directive", IDRange); 4088 } 4089 4090 #undef PARSE_BITS_ENTRY 4091 } 4092 4093 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4094 return TokError(".amdhsa_next_free_vgpr directive is required"); 4095 4096 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4097 return TokError(".amdhsa_next_free_sgpr directive is required"); 4098 4099 unsigned VGPRBlocks; 4100 unsigned SGPRBlocks; 4101 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4102 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4103 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4104 SGPRBlocks)) 4105 return true; 4106 4107 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4108 VGPRBlocks)) 4109 return OutOfRangeError(VGPRRange); 4110 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4111 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4112 4113 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4114 SGPRBlocks)) 4115 return OutOfRangeError(SGPRRange); 4116 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4117 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4118 SGPRBlocks); 4119 4120 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4121 return TokError("too many user SGPRs enabled"); 4122 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4123 UserSGPRCount); 4124 4125 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4126 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4127 ReserveFlatScr, ReserveXNACK); 4128 return false; 4129 } 4130 4131 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4132 uint32_t Major; 4133 uint32_t Minor; 4134 4135 if (ParseDirectiveMajorMinor(Major, Minor)) 4136 return true; 4137 4138 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4139 return false; 4140 } 4141 4142 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4143 uint32_t Major; 4144 uint32_t Minor; 4145 uint32_t Stepping; 4146 StringRef VendorName; 4147 StringRef ArchName; 4148 4149 // If this directive has no arguments, then use the ISA version for the 4150 // targeted GPU. 4151 if (getLexer().is(AsmToken::EndOfStatement)) { 4152 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4153 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4154 ISA.Stepping, 4155 "AMD", "AMDGPU"); 4156 return false; 4157 } 4158 4159 if (ParseDirectiveMajorMinor(Major, Minor)) 4160 return true; 4161 4162 if (getLexer().isNot(AsmToken::Comma)) 4163 return TokError("stepping version number required, comma expected"); 4164 Lex(); 4165 4166 if (ParseAsAbsoluteExpression(Stepping)) 4167 return TokError("invalid stepping version"); 4168 4169 if (getLexer().isNot(AsmToken::Comma)) 4170 return TokError("vendor name required, comma expected"); 4171 Lex(); 4172 4173 if (getLexer().isNot(AsmToken::String)) 4174 return TokError("invalid vendor name"); 4175 4176 VendorName = getLexer().getTok().getStringContents(); 4177 Lex(); 4178 4179 if (getLexer().isNot(AsmToken::Comma)) 4180 return TokError("arch name required, comma expected"); 4181 Lex(); 4182 4183 if (getLexer().isNot(AsmToken::String)) 4184 return TokError("invalid arch name"); 4185 4186 ArchName = getLexer().getTok().getStringContents(); 4187 Lex(); 4188 4189 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4190 VendorName, ArchName); 4191 return false; 4192 } 4193 4194 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4195 amd_kernel_code_t &Header) { 4196 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4197 // assembly for backwards compatibility. 4198 if (ID == "max_scratch_backing_memory_byte_size") { 4199 Parser.eatToEndOfStatement(); 4200 return false; 4201 } 4202 4203 SmallString<40> ErrStr; 4204 raw_svector_ostream Err(ErrStr); 4205 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4206 return TokError(Err.str()); 4207 } 4208 Lex(); 4209 4210 if (ID == "enable_wavefront_size32") { 4211 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4212 if (!isGFX10()) 4213 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4214 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4215 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4216 } else { 4217 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4218 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4219 } 4220 } 4221 4222 if (ID == "wavefront_size") { 4223 if (Header.wavefront_size == 5) { 4224 if (!isGFX10()) 4225 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4226 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4227 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4228 } else if (Header.wavefront_size == 6) { 4229 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4230 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4231 } 4232 } 4233 4234 if (ID == "enable_wgp_mode") { 4235 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4236 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4237 } 4238 4239 if (ID == "enable_mem_ordered") { 4240 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4241 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4242 } 4243 4244 if (ID == "enable_fwd_progress") { 4245 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4246 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4247 } 4248 4249 return false; 4250 } 4251 4252 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4253 amd_kernel_code_t Header; 4254 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4255 4256 while (true) { 4257 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4258 // will set the current token to EndOfStatement. 4259 while(getLexer().is(AsmToken::EndOfStatement)) 4260 Lex(); 4261 4262 if (getLexer().isNot(AsmToken::Identifier)) 4263 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4264 4265 StringRef ID = getLexer().getTok().getIdentifier(); 4266 Lex(); 4267 4268 if (ID == ".end_amd_kernel_code_t") 4269 break; 4270 4271 if (ParseAMDKernelCodeTValue(ID, Header)) 4272 return true; 4273 } 4274 4275 getTargetStreamer().EmitAMDKernelCodeT(Header); 4276 4277 return false; 4278 } 4279 4280 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4281 if (getLexer().isNot(AsmToken::Identifier)) 4282 return TokError("expected symbol name"); 4283 4284 StringRef KernelName = Parser.getTok().getString(); 4285 4286 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4287 ELF::STT_AMDGPU_HSA_KERNEL); 4288 Lex(); 4289 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4290 KernelScope.initialize(getContext()); 4291 return false; 4292 } 4293 4294 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4295 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4296 return Error(getParser().getTok().getLoc(), 4297 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4298 "architectures"); 4299 } 4300 4301 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4302 4303 std::string ISAVersionStringFromSTI; 4304 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4305 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4306 4307 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4308 return Error(getParser().getTok().getLoc(), 4309 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4310 "arguments specified through the command line"); 4311 } 4312 4313 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4314 Lex(); 4315 4316 return false; 4317 } 4318 4319 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4320 const char *AssemblerDirectiveBegin; 4321 const char *AssemblerDirectiveEnd; 4322 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4323 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4324 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4325 HSAMD::V3::AssemblerDirectiveEnd) 4326 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4327 HSAMD::AssemblerDirectiveEnd); 4328 4329 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4330 return Error(getParser().getTok().getLoc(), 4331 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4332 "not available on non-amdhsa OSes")).str()); 4333 } 4334 4335 std::string HSAMetadataString; 4336 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4337 HSAMetadataString)) 4338 return true; 4339 4340 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4341 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4342 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4343 } else { 4344 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4345 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4346 } 4347 4348 return false; 4349 } 4350 4351 /// Common code to parse out a block of text (typically YAML) between start and 4352 /// end directives. 4353 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4354 const char *AssemblerDirectiveEnd, 4355 std::string &CollectString) { 4356 4357 raw_string_ostream CollectStream(CollectString); 4358 4359 getLexer().setSkipSpace(false); 4360 4361 bool FoundEnd = false; 4362 while (!getLexer().is(AsmToken::Eof)) { 4363 while (getLexer().is(AsmToken::Space)) { 4364 CollectStream << getLexer().getTok().getString(); 4365 Lex(); 4366 } 4367 4368 if (getLexer().is(AsmToken::Identifier)) { 4369 StringRef ID = getLexer().getTok().getIdentifier(); 4370 if (ID == AssemblerDirectiveEnd) { 4371 Lex(); 4372 FoundEnd = true; 4373 break; 4374 } 4375 } 4376 4377 CollectStream << Parser.parseStringToEndOfStatement() 4378 << getContext().getAsmInfo()->getSeparatorString(); 4379 4380 Parser.eatToEndOfStatement(); 4381 } 4382 4383 getLexer().setSkipSpace(true); 4384 4385 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4386 return TokError(Twine("expected directive ") + 4387 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4388 } 4389 4390 CollectStream.flush(); 4391 return false; 4392 } 4393 4394 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4395 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4396 std::string String; 4397 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4398 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4399 return true; 4400 4401 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4402 if (!PALMetadata->setFromString(String)) 4403 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4404 return false; 4405 } 4406 4407 /// Parse the assembler directive for old linear-format PAL metadata. 4408 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4409 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4410 return Error(getParser().getTok().getLoc(), 4411 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4412 "not available on non-amdpal OSes")).str()); 4413 } 4414 4415 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4416 PALMetadata->setLegacy(); 4417 for (;;) { 4418 uint32_t Key, Value; 4419 if (ParseAsAbsoluteExpression(Key)) { 4420 return TokError(Twine("invalid value in ") + 4421 Twine(PALMD::AssemblerDirective)); 4422 } 4423 if (getLexer().isNot(AsmToken::Comma)) { 4424 return TokError(Twine("expected an even number of values in ") + 4425 Twine(PALMD::AssemblerDirective)); 4426 } 4427 Lex(); 4428 if (ParseAsAbsoluteExpression(Value)) { 4429 return TokError(Twine("invalid value in ") + 4430 Twine(PALMD::AssemblerDirective)); 4431 } 4432 PALMetadata->setRegister(Key, Value); 4433 if (getLexer().isNot(AsmToken::Comma)) 4434 break; 4435 Lex(); 4436 } 4437 return false; 4438 } 4439 4440 /// ParseDirectiveAMDGPULDS 4441 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4442 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4443 if (getParser().checkForValidSection()) 4444 return true; 4445 4446 StringRef Name; 4447 SMLoc NameLoc = getLexer().getLoc(); 4448 if (getParser().parseIdentifier(Name)) 4449 return TokError("expected identifier in directive"); 4450 4451 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4452 if (parseToken(AsmToken::Comma, "expected ','")) 4453 return true; 4454 4455 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4456 4457 int64_t Size; 4458 SMLoc SizeLoc = getLexer().getLoc(); 4459 if (getParser().parseAbsoluteExpression(Size)) 4460 return true; 4461 if (Size < 0) 4462 return Error(SizeLoc, "size must be non-negative"); 4463 if (Size > LocalMemorySize) 4464 return Error(SizeLoc, "size is too large"); 4465 4466 int64_t Alignment = 4; 4467 if (getLexer().is(AsmToken::Comma)) { 4468 Lex(); 4469 SMLoc AlignLoc = getLexer().getLoc(); 4470 if (getParser().parseAbsoluteExpression(Alignment)) 4471 return true; 4472 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4473 return Error(AlignLoc, "alignment must be a power of two"); 4474 4475 // Alignment larger than the size of LDS is possible in theory, as long 4476 // as the linker manages to place to symbol at address 0, but we do want 4477 // to make sure the alignment fits nicely into a 32-bit integer. 4478 if (Alignment >= 1u << 31) 4479 return Error(AlignLoc, "alignment is too large"); 4480 } 4481 4482 if (parseToken(AsmToken::EndOfStatement, 4483 "unexpected token in '.amdgpu_lds' directive")) 4484 return true; 4485 4486 Symbol->redefineIfPossible(); 4487 if (!Symbol->isUndefined()) 4488 return Error(NameLoc, "invalid symbol redefinition"); 4489 4490 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4491 return false; 4492 } 4493 4494 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4495 StringRef IDVal = DirectiveID.getString(); 4496 4497 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4498 if (IDVal == ".amdgcn_target") 4499 return ParseDirectiveAMDGCNTarget(); 4500 4501 if (IDVal == ".amdhsa_kernel") 4502 return ParseDirectiveAMDHSAKernel(); 4503 4504 // TODO: Restructure/combine with PAL metadata directive. 4505 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4506 return ParseDirectiveHSAMetadata(); 4507 } else { 4508 if (IDVal == ".hsa_code_object_version") 4509 return ParseDirectiveHSACodeObjectVersion(); 4510 4511 if (IDVal == ".hsa_code_object_isa") 4512 return ParseDirectiveHSACodeObjectISA(); 4513 4514 if (IDVal == ".amd_kernel_code_t") 4515 return ParseDirectiveAMDKernelCodeT(); 4516 4517 if (IDVal == ".amdgpu_hsa_kernel") 4518 return ParseDirectiveAMDGPUHsaKernel(); 4519 4520 if (IDVal == ".amd_amdgpu_isa") 4521 return ParseDirectiveISAVersion(); 4522 4523 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4524 return ParseDirectiveHSAMetadata(); 4525 } 4526 4527 if (IDVal == ".amdgpu_lds") 4528 return ParseDirectiveAMDGPULDS(); 4529 4530 if (IDVal == PALMD::AssemblerDirectiveBegin) 4531 return ParseDirectivePALMetadataBegin(); 4532 4533 if (IDVal == PALMD::AssemblerDirective) 4534 return ParseDirectivePALMetadata(); 4535 4536 return true; 4537 } 4538 4539 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4540 unsigned RegNo) const { 4541 4542 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4543 R.isValid(); ++R) { 4544 if (*R == RegNo) 4545 return isGFX9() || isGFX10(); 4546 } 4547 4548 // GFX10 has 2 more SGPRs 104 and 105. 4549 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4550 R.isValid(); ++R) { 4551 if (*R == RegNo) 4552 return hasSGPR104_SGPR105(); 4553 } 4554 4555 switch (RegNo) { 4556 case AMDGPU::SRC_SHARED_BASE: 4557 case AMDGPU::SRC_SHARED_LIMIT: 4558 case AMDGPU::SRC_PRIVATE_BASE: 4559 case AMDGPU::SRC_PRIVATE_LIMIT: 4560 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4561 return !isCI() && !isSI() && !isVI(); 4562 case AMDGPU::TBA: 4563 case AMDGPU::TBA_LO: 4564 case AMDGPU::TBA_HI: 4565 case AMDGPU::TMA: 4566 case AMDGPU::TMA_LO: 4567 case AMDGPU::TMA_HI: 4568 return !isGFX9() && !isGFX10(); 4569 case AMDGPU::XNACK_MASK: 4570 case AMDGPU::XNACK_MASK_LO: 4571 case AMDGPU::XNACK_MASK_HI: 4572 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4573 case AMDGPU::SGPR_NULL: 4574 return isGFX10(); 4575 default: 4576 break; 4577 } 4578 4579 if (isCI()) 4580 return true; 4581 4582 if (isSI() || isGFX10()) { 4583 // No flat_scr on SI. 4584 // On GFX10 flat scratch is not a valid register operand and can only be 4585 // accessed with s_setreg/s_getreg. 4586 switch (RegNo) { 4587 case AMDGPU::FLAT_SCR: 4588 case AMDGPU::FLAT_SCR_LO: 4589 case AMDGPU::FLAT_SCR_HI: 4590 return false; 4591 default: 4592 return true; 4593 } 4594 } 4595 4596 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4597 // SI/CI have. 4598 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4599 R.isValid(); ++R) { 4600 if (*R == RegNo) 4601 return hasSGPR102_SGPR103(); 4602 } 4603 4604 return true; 4605 } 4606 4607 OperandMatchResultTy 4608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4609 OperandMode Mode) { 4610 // Try to parse with a custom parser 4611 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4612 4613 // If we successfully parsed the operand or if there as an error parsing, 4614 // we are done. 4615 // 4616 // If we are parsing after we reach EndOfStatement then this means we 4617 // are appending default values to the Operands list. This is only done 4618 // by custom parser, so we shouldn't continue on to the generic parsing. 4619 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4620 getLexer().is(AsmToken::EndOfStatement)) 4621 return ResTy; 4622 4623 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4624 unsigned Prefix = Operands.size(); 4625 SMLoc LBraceLoc = getTok().getLoc(); 4626 Parser.Lex(); // eat the '[' 4627 4628 for (;;) { 4629 ResTy = parseReg(Operands); 4630 if (ResTy != MatchOperand_Success) 4631 return ResTy; 4632 4633 if (getLexer().is(AsmToken::RBrac)) 4634 break; 4635 4636 if (getLexer().isNot(AsmToken::Comma)) 4637 return MatchOperand_ParseFail; 4638 Parser.Lex(); 4639 } 4640 4641 if (Operands.size() - Prefix > 1) { 4642 Operands.insert(Operands.begin() + Prefix, 4643 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4644 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4645 getTok().getLoc())); 4646 } 4647 4648 Parser.Lex(); // eat the ']' 4649 return MatchOperand_Success; 4650 } 4651 4652 return parseRegOrImm(Operands); 4653 } 4654 4655 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4656 // Clear any forced encodings from the previous instruction. 4657 setForcedEncodingSize(0); 4658 setForcedDPP(false); 4659 setForcedSDWA(false); 4660 4661 if (Name.endswith("_e64")) { 4662 setForcedEncodingSize(64); 4663 return Name.substr(0, Name.size() - 4); 4664 } else if (Name.endswith("_e32")) { 4665 setForcedEncodingSize(32); 4666 return Name.substr(0, Name.size() - 4); 4667 } else if (Name.endswith("_dpp")) { 4668 setForcedDPP(true); 4669 return Name.substr(0, Name.size() - 4); 4670 } else if (Name.endswith("_sdwa")) { 4671 setForcedSDWA(true); 4672 return Name.substr(0, Name.size() - 5); 4673 } 4674 return Name; 4675 } 4676 4677 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4678 StringRef Name, 4679 SMLoc NameLoc, OperandVector &Operands) { 4680 // Add the instruction mnemonic 4681 Name = parseMnemonicSuffix(Name); 4682 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4683 4684 bool IsMIMG = Name.startswith("image_"); 4685 4686 while (!getLexer().is(AsmToken::EndOfStatement)) { 4687 OperandMode Mode = OperandMode_Default; 4688 if (IsMIMG && isGFX10() && Operands.size() == 2) 4689 Mode = OperandMode_NSA; 4690 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4691 4692 // Eat the comma or space if there is one. 4693 if (getLexer().is(AsmToken::Comma)) 4694 Parser.Lex(); 4695 4696 switch (Res) { 4697 case MatchOperand_Success: break; 4698 case MatchOperand_ParseFail: 4699 // FIXME: use real operand location rather than the current location. 4700 Error(getLexer().getLoc(), "failed parsing operand."); 4701 while (!getLexer().is(AsmToken::EndOfStatement)) { 4702 Parser.Lex(); 4703 } 4704 return true; 4705 case MatchOperand_NoMatch: 4706 // FIXME: use real operand location rather than the current location. 4707 Error(getLexer().getLoc(), "not a valid operand."); 4708 while (!getLexer().is(AsmToken::EndOfStatement)) { 4709 Parser.Lex(); 4710 } 4711 return true; 4712 } 4713 } 4714 4715 return false; 4716 } 4717 4718 //===----------------------------------------------------------------------===// 4719 // Utility functions 4720 //===----------------------------------------------------------------------===// 4721 4722 OperandMatchResultTy 4723 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4724 4725 if (!trySkipId(Prefix, AsmToken::Colon)) 4726 return MatchOperand_NoMatch; 4727 4728 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4729 } 4730 4731 OperandMatchResultTy 4732 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4733 AMDGPUOperand::ImmTy ImmTy, 4734 bool (*ConvertResult)(int64_t&)) { 4735 SMLoc S = getLoc(); 4736 int64_t Value = 0; 4737 4738 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4739 if (Res != MatchOperand_Success) 4740 return Res; 4741 4742 if (ConvertResult && !ConvertResult(Value)) { 4743 Error(S, "invalid " + StringRef(Prefix) + " value."); 4744 } 4745 4746 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4747 return MatchOperand_Success; 4748 } 4749 4750 OperandMatchResultTy 4751 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4752 OperandVector &Operands, 4753 AMDGPUOperand::ImmTy ImmTy, 4754 bool (*ConvertResult)(int64_t&)) { 4755 SMLoc S = getLoc(); 4756 if (!trySkipId(Prefix, AsmToken::Colon)) 4757 return MatchOperand_NoMatch; 4758 4759 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4760 return MatchOperand_ParseFail; 4761 4762 unsigned Val = 0; 4763 const unsigned MaxSize = 4; 4764 4765 // FIXME: How to verify the number of elements matches the number of src 4766 // operands? 4767 for (int I = 0; ; ++I) { 4768 int64_t Op; 4769 SMLoc Loc = getLoc(); 4770 if (!parseExpr(Op)) 4771 return MatchOperand_ParseFail; 4772 4773 if (Op != 0 && Op != 1) { 4774 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4775 return MatchOperand_ParseFail; 4776 } 4777 4778 Val |= (Op << I); 4779 4780 if (trySkipToken(AsmToken::RBrac)) 4781 break; 4782 4783 if (I + 1 == MaxSize) { 4784 Error(getLoc(), "expected a closing square bracket"); 4785 return MatchOperand_ParseFail; 4786 } 4787 4788 if (!skipToken(AsmToken::Comma, "expected a comma")) 4789 return MatchOperand_ParseFail; 4790 } 4791 4792 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4793 return MatchOperand_Success; 4794 } 4795 4796 OperandMatchResultTy 4797 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4798 AMDGPUOperand::ImmTy ImmTy) { 4799 int64_t Bit = 0; 4800 SMLoc S = Parser.getTok().getLoc(); 4801 4802 // We are at the end of the statement, and this is a default argument, so 4803 // use a default value. 4804 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4805 switch(getLexer().getKind()) { 4806 case AsmToken::Identifier: { 4807 StringRef Tok = Parser.getTok().getString(); 4808 if (Tok == Name) { 4809 if (Tok == "r128" && !hasMIMG_R128()) 4810 Error(S, "r128 modifier is not supported on this GPU"); 4811 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4812 Error(S, "a16 modifier is not supported on this GPU"); 4813 Bit = 1; 4814 Parser.Lex(); 4815 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4816 Bit = 0; 4817 Parser.Lex(); 4818 } else { 4819 return MatchOperand_NoMatch; 4820 } 4821 break; 4822 } 4823 default: 4824 return MatchOperand_NoMatch; 4825 } 4826 } 4827 4828 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4829 return MatchOperand_ParseFail; 4830 4831 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4832 ImmTy = AMDGPUOperand::ImmTyR128A16; 4833 4834 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4835 return MatchOperand_Success; 4836 } 4837 4838 static void addOptionalImmOperand( 4839 MCInst& Inst, const OperandVector& Operands, 4840 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4841 AMDGPUOperand::ImmTy ImmT, 4842 int64_t Default = 0) { 4843 auto i = OptionalIdx.find(ImmT); 4844 if (i != OptionalIdx.end()) { 4845 unsigned Idx = i->second; 4846 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4847 } else { 4848 Inst.addOperand(MCOperand::createImm(Default)); 4849 } 4850 } 4851 4852 OperandMatchResultTy 4853 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4854 if (getLexer().isNot(AsmToken::Identifier)) { 4855 return MatchOperand_NoMatch; 4856 } 4857 StringRef Tok = Parser.getTok().getString(); 4858 if (Tok != Prefix) { 4859 return MatchOperand_NoMatch; 4860 } 4861 4862 Parser.Lex(); 4863 if (getLexer().isNot(AsmToken::Colon)) { 4864 return MatchOperand_ParseFail; 4865 } 4866 4867 Parser.Lex(); 4868 if (getLexer().isNot(AsmToken::Identifier)) { 4869 return MatchOperand_ParseFail; 4870 } 4871 4872 Value = Parser.getTok().getString(); 4873 return MatchOperand_Success; 4874 } 4875 4876 //===----------------------------------------------------------------------===// 4877 // MTBUF format 4878 //===----------------------------------------------------------------------===// 4879 4880 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 4881 int64_t MaxVal, 4882 int64_t &Fmt) { 4883 int64_t Val; 4884 SMLoc Loc = getLoc(); 4885 4886 auto Res = parseIntWithPrefix(Pref, Val); 4887 if (Res == MatchOperand_ParseFail) 4888 return false; 4889 if (Res == MatchOperand_NoMatch) 4890 return true; 4891 4892 if (Val < 0 || Val > MaxVal) { 4893 Error(Loc, Twine("out of range ", StringRef(Pref))); 4894 return false; 4895 } 4896 4897 Fmt = Val; 4898 return true; 4899 } 4900 4901 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4902 // values to live in a joint format operand in the MCInst encoding. 4903 OperandMatchResultTy 4904 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 4905 using namespace llvm::AMDGPU::MTBUFFormat; 4906 4907 int64_t Dfmt = DFMT_UNDEF; 4908 int64_t Nfmt = NFMT_UNDEF; 4909 4910 // dfmt and nfmt can appear in either order, and each is optional. 4911 for (int I = 0; I < 2; ++I) { 4912 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 4913 return MatchOperand_ParseFail; 4914 4915 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 4916 return MatchOperand_ParseFail; 4917 } 4918 // Skip optional comma between dfmt/nfmt 4919 // but guard against 2 commas following each other. 4920 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 4921 !peekToken().is(AsmToken::Comma)) { 4922 trySkipToken(AsmToken::Comma); 4923 } 4924 } 4925 4926 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 4927 return MatchOperand_NoMatch; 4928 4929 Dfmt = (Dfmt == DFMT_UNDEF)? DFMT_DEFAULT : Dfmt; 4930 Nfmt = (Nfmt == NFMT_UNDEF)? NFMT_DEFAULT : Nfmt; 4931 4932 Format = encodeDfmtNfmt(Dfmt, Nfmt); 4933 return MatchOperand_Success; 4934 } 4935 4936 OperandMatchResultTy 4937 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 4938 using namespace llvm::AMDGPU::MTBUFFormat; 4939 4940 int64_t Fmt = UFMT_UNDEF; 4941 4942 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 4943 return MatchOperand_ParseFail; 4944 4945 if (Fmt == UFMT_UNDEF) 4946 return MatchOperand_NoMatch; 4947 4948 Format = Fmt; 4949 return MatchOperand_Success; 4950 } 4951 4952 OperandMatchResultTy 4953 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 4954 using namespace llvm::AMDGPU::MTBUFFormat; 4955 4956 int64_t Format = isGFX10() ? UFMT_DEFAULT : DFMT_NFMT_DEFAULT; 4957 OperandMatchResultTy Res; 4958 SMLoc Loc = getLoc(); 4959 4960 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 4961 if (Res == MatchOperand_ParseFail) 4962 return Res; 4963 4964 Operands.push_back( 4965 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 4966 return MatchOperand_Success; 4967 } 4968 4969 //===----------------------------------------------------------------------===// 4970 // ds 4971 //===----------------------------------------------------------------------===// 4972 4973 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4974 const OperandVector &Operands) { 4975 OptionalImmIndexMap OptionalIdx; 4976 4977 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4978 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4979 4980 // Add the register arguments 4981 if (Op.isReg()) { 4982 Op.addRegOperands(Inst, 1); 4983 continue; 4984 } 4985 4986 // Handle optional arguments 4987 OptionalIdx[Op.getImmTy()] = i; 4988 } 4989 4990 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4993 4994 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4995 } 4996 4997 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4998 bool IsGdsHardcoded) { 4999 OptionalImmIndexMap OptionalIdx; 5000 5001 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5002 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5003 5004 // Add the register arguments 5005 if (Op.isReg()) { 5006 Op.addRegOperands(Inst, 1); 5007 continue; 5008 } 5009 5010 if (Op.isToken() && Op.getToken() == "gds") { 5011 IsGdsHardcoded = true; 5012 continue; 5013 } 5014 5015 // Handle optional arguments 5016 OptionalIdx[Op.getImmTy()] = i; 5017 } 5018 5019 AMDGPUOperand::ImmTy OffsetType = 5020 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5021 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5022 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5023 AMDGPUOperand::ImmTyOffset; 5024 5025 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5026 5027 if (!IsGdsHardcoded) { 5028 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5029 } 5030 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5031 } 5032 5033 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5034 OptionalImmIndexMap OptionalIdx; 5035 5036 unsigned OperandIdx[4]; 5037 unsigned EnMask = 0; 5038 int SrcIdx = 0; 5039 5040 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5041 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5042 5043 // Add the register arguments 5044 if (Op.isReg()) { 5045 assert(SrcIdx < 4); 5046 OperandIdx[SrcIdx] = Inst.size(); 5047 Op.addRegOperands(Inst, 1); 5048 ++SrcIdx; 5049 continue; 5050 } 5051 5052 if (Op.isOff()) { 5053 assert(SrcIdx < 4); 5054 OperandIdx[SrcIdx] = Inst.size(); 5055 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5056 ++SrcIdx; 5057 continue; 5058 } 5059 5060 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5061 Op.addImmOperands(Inst, 1); 5062 continue; 5063 } 5064 5065 if (Op.isToken() && Op.getToken() == "done") 5066 continue; 5067 5068 // Handle optional arguments 5069 OptionalIdx[Op.getImmTy()] = i; 5070 } 5071 5072 assert(SrcIdx == 4); 5073 5074 bool Compr = false; 5075 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5076 Compr = true; 5077 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5078 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5079 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5080 } 5081 5082 for (auto i = 0; i < SrcIdx; ++i) { 5083 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5084 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5085 } 5086 } 5087 5088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5090 5091 Inst.addOperand(MCOperand::createImm(EnMask)); 5092 } 5093 5094 //===----------------------------------------------------------------------===// 5095 // s_waitcnt 5096 //===----------------------------------------------------------------------===// 5097 5098 static bool 5099 encodeCnt( 5100 const AMDGPU::IsaVersion ISA, 5101 int64_t &IntVal, 5102 int64_t CntVal, 5103 bool Saturate, 5104 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5105 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5106 { 5107 bool Failed = false; 5108 5109 IntVal = encode(ISA, IntVal, CntVal); 5110 if (CntVal != decode(ISA, IntVal)) { 5111 if (Saturate) { 5112 IntVal = encode(ISA, IntVal, -1); 5113 } else { 5114 Failed = true; 5115 } 5116 } 5117 return Failed; 5118 } 5119 5120 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5121 5122 SMLoc CntLoc = getLoc(); 5123 StringRef CntName = getTokenStr(); 5124 5125 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5126 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5127 return false; 5128 5129 int64_t CntVal; 5130 SMLoc ValLoc = getLoc(); 5131 if (!parseExpr(CntVal)) 5132 return false; 5133 5134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5135 5136 bool Failed = true; 5137 bool Sat = CntName.endswith("_sat"); 5138 5139 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5140 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5141 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5142 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5143 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5144 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5145 } else { 5146 Error(CntLoc, "invalid counter name " + CntName); 5147 return false; 5148 } 5149 5150 if (Failed) { 5151 Error(ValLoc, "too large value for " + CntName); 5152 return false; 5153 } 5154 5155 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5156 return false; 5157 5158 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5159 if (isToken(AsmToken::EndOfStatement)) { 5160 Error(getLoc(), "expected a counter name"); 5161 return false; 5162 } 5163 } 5164 5165 return true; 5166 } 5167 5168 OperandMatchResultTy 5169 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5170 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5171 int64_t Waitcnt = getWaitcntBitMask(ISA); 5172 SMLoc S = getLoc(); 5173 5174 // If parse failed, do not return error code 5175 // to avoid excessive error messages. 5176 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5177 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5178 } else { 5179 parseExpr(Waitcnt); 5180 } 5181 5182 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5183 return MatchOperand_Success; 5184 } 5185 5186 bool 5187 AMDGPUOperand::isSWaitCnt() const { 5188 return isImm(); 5189 } 5190 5191 //===----------------------------------------------------------------------===// 5192 // hwreg 5193 //===----------------------------------------------------------------------===// 5194 5195 bool 5196 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5197 int64_t &Offset, 5198 int64_t &Width) { 5199 using namespace llvm::AMDGPU::Hwreg; 5200 5201 // The register may be specified by name or using a numeric code 5202 if (isToken(AsmToken::Identifier) && 5203 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5204 HwReg.IsSymbolic = true; 5205 lex(); // skip message name 5206 } else if (!parseExpr(HwReg.Id)) { 5207 return false; 5208 } 5209 5210 if (trySkipToken(AsmToken::RParen)) 5211 return true; 5212 5213 // parse optional params 5214 return 5215 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5216 parseExpr(Offset) && 5217 skipToken(AsmToken::Comma, "expected a comma") && 5218 parseExpr(Width) && 5219 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5220 } 5221 5222 bool 5223 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5224 const int64_t Offset, 5225 const int64_t Width, 5226 const SMLoc Loc) { 5227 5228 using namespace llvm::AMDGPU::Hwreg; 5229 5230 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5231 Error(Loc, "specified hardware register is not supported on this GPU"); 5232 return false; 5233 } else if (!isValidHwreg(HwReg.Id)) { 5234 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5235 return false; 5236 } else if (!isValidHwregOffset(Offset)) { 5237 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5238 return false; 5239 } else if (!isValidHwregWidth(Width)) { 5240 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5241 return false; 5242 } 5243 return true; 5244 } 5245 5246 OperandMatchResultTy 5247 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5248 using namespace llvm::AMDGPU::Hwreg; 5249 5250 int64_t ImmVal = 0; 5251 SMLoc Loc = getLoc(); 5252 5253 // If parse failed, do not return error code 5254 // to avoid excessive error messages. 5255 if (trySkipId("hwreg", AsmToken::LParen)) { 5256 OperandInfoTy HwReg(ID_UNKNOWN_); 5257 int64_t Offset = OFFSET_DEFAULT_; 5258 int64_t Width = WIDTH_DEFAULT_; 5259 if (parseHwregBody(HwReg, Offset, Width) && 5260 validateHwreg(HwReg, Offset, Width, Loc)) { 5261 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5262 } 5263 } else if (parseExpr(ImmVal)) { 5264 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5265 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5266 } 5267 5268 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5269 return MatchOperand_Success; 5270 } 5271 5272 bool AMDGPUOperand::isHwreg() const { 5273 return isImmTy(ImmTyHwreg); 5274 } 5275 5276 //===----------------------------------------------------------------------===// 5277 // sendmsg 5278 //===----------------------------------------------------------------------===// 5279 5280 bool 5281 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5282 OperandInfoTy &Op, 5283 OperandInfoTy &Stream) { 5284 using namespace llvm::AMDGPU::SendMsg; 5285 5286 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5287 Msg.IsSymbolic = true; 5288 lex(); // skip message name 5289 } else if (!parseExpr(Msg.Id)) { 5290 return false; 5291 } 5292 5293 if (trySkipToken(AsmToken::Comma)) { 5294 Op.IsDefined = true; 5295 if (isToken(AsmToken::Identifier) && 5296 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5297 lex(); // skip operation name 5298 } else if (!parseExpr(Op.Id)) { 5299 return false; 5300 } 5301 5302 if (trySkipToken(AsmToken::Comma)) { 5303 Stream.IsDefined = true; 5304 if (!parseExpr(Stream.Id)) 5305 return false; 5306 } 5307 } 5308 5309 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5310 } 5311 5312 bool 5313 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5314 const OperandInfoTy &Op, 5315 const OperandInfoTy &Stream, 5316 const SMLoc S) { 5317 using namespace llvm::AMDGPU::SendMsg; 5318 5319 // Validation strictness depends on whether message is specified 5320 // in a symbolc or in a numeric form. In the latter case 5321 // only encoding possibility is checked. 5322 bool Strict = Msg.IsSymbolic; 5323 5324 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5325 Error(S, "invalid message id"); 5326 return false; 5327 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5328 Error(S, Op.IsDefined ? 5329 "message does not support operations" : 5330 "missing message operation"); 5331 return false; 5332 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5333 Error(S, "invalid operation id"); 5334 return false; 5335 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5336 Error(S, "message operation does not support streams"); 5337 return false; 5338 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5339 Error(S, "invalid message stream id"); 5340 return false; 5341 } 5342 return true; 5343 } 5344 5345 OperandMatchResultTy 5346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5347 using namespace llvm::AMDGPU::SendMsg; 5348 5349 int64_t ImmVal = 0; 5350 SMLoc Loc = getLoc(); 5351 5352 // If parse failed, do not return error code 5353 // to avoid excessive error messages. 5354 if (trySkipId("sendmsg", AsmToken::LParen)) { 5355 OperandInfoTy Msg(ID_UNKNOWN_); 5356 OperandInfoTy Op(OP_NONE_); 5357 OperandInfoTy Stream(STREAM_ID_NONE_); 5358 if (parseSendMsgBody(Msg, Op, Stream) && 5359 validateSendMsg(Msg, Op, Stream, Loc)) { 5360 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5361 } 5362 } else if (parseExpr(ImmVal)) { 5363 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5364 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5365 } 5366 5367 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5368 return MatchOperand_Success; 5369 } 5370 5371 bool AMDGPUOperand::isSendMsg() const { 5372 return isImmTy(ImmTySendMsg); 5373 } 5374 5375 //===----------------------------------------------------------------------===// 5376 // v_interp 5377 //===----------------------------------------------------------------------===// 5378 5379 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5380 if (getLexer().getKind() != AsmToken::Identifier) 5381 return MatchOperand_NoMatch; 5382 5383 StringRef Str = Parser.getTok().getString(); 5384 int Slot = StringSwitch<int>(Str) 5385 .Case("p10", 0) 5386 .Case("p20", 1) 5387 .Case("p0", 2) 5388 .Default(-1); 5389 5390 SMLoc S = Parser.getTok().getLoc(); 5391 if (Slot == -1) 5392 return MatchOperand_ParseFail; 5393 5394 Parser.Lex(); 5395 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5396 AMDGPUOperand::ImmTyInterpSlot)); 5397 return MatchOperand_Success; 5398 } 5399 5400 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5401 if (getLexer().getKind() != AsmToken::Identifier) 5402 return MatchOperand_NoMatch; 5403 5404 StringRef Str = Parser.getTok().getString(); 5405 if (!Str.startswith("attr")) 5406 return MatchOperand_NoMatch; 5407 5408 StringRef Chan = Str.take_back(2); 5409 int AttrChan = StringSwitch<int>(Chan) 5410 .Case(".x", 0) 5411 .Case(".y", 1) 5412 .Case(".z", 2) 5413 .Case(".w", 3) 5414 .Default(-1); 5415 if (AttrChan == -1) 5416 return MatchOperand_ParseFail; 5417 5418 Str = Str.drop_back(2).drop_front(4); 5419 5420 uint8_t Attr; 5421 if (Str.getAsInteger(10, Attr)) 5422 return MatchOperand_ParseFail; 5423 5424 SMLoc S = Parser.getTok().getLoc(); 5425 Parser.Lex(); 5426 if (Attr > 63) { 5427 Error(S, "out of bounds attr"); 5428 return MatchOperand_Success; 5429 } 5430 5431 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5432 5433 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5434 AMDGPUOperand::ImmTyInterpAttr)); 5435 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5436 AMDGPUOperand::ImmTyAttrChan)); 5437 return MatchOperand_Success; 5438 } 5439 5440 //===----------------------------------------------------------------------===// 5441 // exp 5442 //===----------------------------------------------------------------------===// 5443 5444 void AMDGPUAsmParser::errorExpTgt() { 5445 Error(Parser.getTok().getLoc(), "invalid exp target"); 5446 } 5447 5448 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5449 uint8_t &Val) { 5450 if (Str == "null") { 5451 Val = 9; 5452 return MatchOperand_Success; 5453 } 5454 5455 if (Str.startswith("mrt")) { 5456 Str = Str.drop_front(3); 5457 if (Str == "z") { // == mrtz 5458 Val = 8; 5459 return MatchOperand_Success; 5460 } 5461 5462 if (Str.getAsInteger(10, Val)) 5463 return MatchOperand_ParseFail; 5464 5465 if (Val > 7) 5466 errorExpTgt(); 5467 5468 return MatchOperand_Success; 5469 } 5470 5471 if (Str.startswith("pos")) { 5472 Str = Str.drop_front(3); 5473 if (Str.getAsInteger(10, Val)) 5474 return MatchOperand_ParseFail; 5475 5476 if (Val > 4 || (Val == 4 && !isGFX10())) 5477 errorExpTgt(); 5478 5479 Val += 12; 5480 return MatchOperand_Success; 5481 } 5482 5483 if (isGFX10() && Str == "prim") { 5484 Val = 20; 5485 return MatchOperand_Success; 5486 } 5487 5488 if (Str.startswith("param")) { 5489 Str = Str.drop_front(5); 5490 if (Str.getAsInteger(10, Val)) 5491 return MatchOperand_ParseFail; 5492 5493 if (Val >= 32) 5494 errorExpTgt(); 5495 5496 Val += 32; 5497 return MatchOperand_Success; 5498 } 5499 5500 if (Str.startswith("invalid_target_")) { 5501 Str = Str.drop_front(15); 5502 if (Str.getAsInteger(10, Val)) 5503 return MatchOperand_ParseFail; 5504 5505 errorExpTgt(); 5506 return MatchOperand_Success; 5507 } 5508 5509 return MatchOperand_NoMatch; 5510 } 5511 5512 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5513 uint8_t Val; 5514 StringRef Str = Parser.getTok().getString(); 5515 5516 auto Res = parseExpTgtImpl(Str, Val); 5517 if (Res != MatchOperand_Success) 5518 return Res; 5519 5520 SMLoc S = Parser.getTok().getLoc(); 5521 Parser.Lex(); 5522 5523 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5524 AMDGPUOperand::ImmTyExpTgt)); 5525 return MatchOperand_Success; 5526 } 5527 5528 //===----------------------------------------------------------------------===// 5529 // parser helpers 5530 //===----------------------------------------------------------------------===// 5531 5532 bool 5533 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5534 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5535 } 5536 5537 bool 5538 AMDGPUAsmParser::isId(const StringRef Id) const { 5539 return isId(getToken(), Id); 5540 } 5541 5542 bool 5543 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5544 return getTokenKind() == Kind; 5545 } 5546 5547 bool 5548 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5549 if (isId(Id)) { 5550 lex(); 5551 return true; 5552 } 5553 return false; 5554 } 5555 5556 bool 5557 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5558 if (isId(Id) && peekToken().is(Kind)) { 5559 lex(); 5560 lex(); 5561 return true; 5562 } 5563 return false; 5564 } 5565 5566 bool 5567 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5568 if (isToken(Kind)) { 5569 lex(); 5570 return true; 5571 } 5572 return false; 5573 } 5574 5575 bool 5576 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5577 const StringRef ErrMsg) { 5578 if (!trySkipToken(Kind)) { 5579 Error(getLoc(), ErrMsg); 5580 return false; 5581 } 5582 return true; 5583 } 5584 5585 bool 5586 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5587 return !getParser().parseAbsoluteExpression(Imm); 5588 } 5589 5590 bool 5591 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5592 SMLoc S = getLoc(); 5593 5594 const MCExpr *Expr; 5595 if (Parser.parseExpression(Expr)) 5596 return false; 5597 5598 int64_t IntVal; 5599 if (Expr->evaluateAsAbsolute(IntVal)) { 5600 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5601 } else { 5602 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5603 } 5604 return true; 5605 } 5606 5607 bool 5608 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5609 if (isToken(AsmToken::String)) { 5610 Val = getToken().getStringContents(); 5611 lex(); 5612 return true; 5613 } else { 5614 Error(getLoc(), ErrMsg); 5615 return false; 5616 } 5617 } 5618 5619 AsmToken 5620 AMDGPUAsmParser::getToken() const { 5621 return Parser.getTok(); 5622 } 5623 5624 AsmToken 5625 AMDGPUAsmParser::peekToken() { 5626 return getLexer().peekTok(); 5627 } 5628 5629 void 5630 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5631 auto TokCount = getLexer().peekTokens(Tokens); 5632 5633 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5634 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5635 } 5636 5637 AsmToken::TokenKind 5638 AMDGPUAsmParser::getTokenKind() const { 5639 return getLexer().getKind(); 5640 } 5641 5642 SMLoc 5643 AMDGPUAsmParser::getLoc() const { 5644 return getToken().getLoc(); 5645 } 5646 5647 StringRef 5648 AMDGPUAsmParser::getTokenStr() const { 5649 return getToken().getString(); 5650 } 5651 5652 void 5653 AMDGPUAsmParser::lex() { 5654 Parser.Lex(); 5655 } 5656 5657 //===----------------------------------------------------------------------===// 5658 // swizzle 5659 //===----------------------------------------------------------------------===// 5660 5661 LLVM_READNONE 5662 static unsigned 5663 encodeBitmaskPerm(const unsigned AndMask, 5664 const unsigned OrMask, 5665 const unsigned XorMask) { 5666 using namespace llvm::AMDGPU::Swizzle; 5667 5668 return BITMASK_PERM_ENC | 5669 (AndMask << BITMASK_AND_SHIFT) | 5670 (OrMask << BITMASK_OR_SHIFT) | 5671 (XorMask << BITMASK_XOR_SHIFT); 5672 } 5673 5674 bool 5675 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5676 const unsigned MinVal, 5677 const unsigned MaxVal, 5678 const StringRef ErrMsg) { 5679 for (unsigned i = 0; i < OpNum; ++i) { 5680 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5681 return false; 5682 } 5683 SMLoc ExprLoc = Parser.getTok().getLoc(); 5684 if (!parseExpr(Op[i])) { 5685 return false; 5686 } 5687 if (Op[i] < MinVal || Op[i] > MaxVal) { 5688 Error(ExprLoc, ErrMsg); 5689 return false; 5690 } 5691 } 5692 5693 return true; 5694 } 5695 5696 bool 5697 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5698 using namespace llvm::AMDGPU::Swizzle; 5699 5700 int64_t Lane[LANE_NUM]; 5701 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5702 "expected a 2-bit lane id")) { 5703 Imm = QUAD_PERM_ENC; 5704 for (unsigned I = 0; I < LANE_NUM; ++I) { 5705 Imm |= Lane[I] << (LANE_SHIFT * I); 5706 } 5707 return true; 5708 } 5709 return false; 5710 } 5711 5712 bool 5713 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5714 using namespace llvm::AMDGPU::Swizzle; 5715 5716 SMLoc S = Parser.getTok().getLoc(); 5717 int64_t GroupSize; 5718 int64_t LaneIdx; 5719 5720 if (!parseSwizzleOperands(1, &GroupSize, 5721 2, 32, 5722 "group size must be in the interval [2,32]")) { 5723 return false; 5724 } 5725 if (!isPowerOf2_64(GroupSize)) { 5726 Error(S, "group size must be a power of two"); 5727 return false; 5728 } 5729 if (parseSwizzleOperands(1, &LaneIdx, 5730 0, GroupSize - 1, 5731 "lane id must be in the interval [0,group size - 1]")) { 5732 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5733 return true; 5734 } 5735 return false; 5736 } 5737 5738 bool 5739 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5740 using namespace llvm::AMDGPU::Swizzle; 5741 5742 SMLoc S = Parser.getTok().getLoc(); 5743 int64_t GroupSize; 5744 5745 if (!parseSwizzleOperands(1, &GroupSize, 5746 2, 32, "group size must be in the interval [2,32]")) { 5747 return false; 5748 } 5749 if (!isPowerOf2_64(GroupSize)) { 5750 Error(S, "group size must be a power of two"); 5751 return false; 5752 } 5753 5754 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5755 return true; 5756 } 5757 5758 bool 5759 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5760 using namespace llvm::AMDGPU::Swizzle; 5761 5762 SMLoc S = Parser.getTok().getLoc(); 5763 int64_t GroupSize; 5764 5765 if (!parseSwizzleOperands(1, &GroupSize, 5766 1, 16, "group size must be in the interval [1,16]")) { 5767 return false; 5768 } 5769 if (!isPowerOf2_64(GroupSize)) { 5770 Error(S, "group size must be a power of two"); 5771 return false; 5772 } 5773 5774 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5775 return true; 5776 } 5777 5778 bool 5779 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5780 using namespace llvm::AMDGPU::Swizzle; 5781 5782 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5783 return false; 5784 } 5785 5786 StringRef Ctl; 5787 SMLoc StrLoc = Parser.getTok().getLoc(); 5788 if (!parseString(Ctl)) { 5789 return false; 5790 } 5791 if (Ctl.size() != BITMASK_WIDTH) { 5792 Error(StrLoc, "expected a 5-character mask"); 5793 return false; 5794 } 5795 5796 unsigned AndMask = 0; 5797 unsigned OrMask = 0; 5798 unsigned XorMask = 0; 5799 5800 for (size_t i = 0; i < Ctl.size(); ++i) { 5801 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5802 switch(Ctl[i]) { 5803 default: 5804 Error(StrLoc, "invalid mask"); 5805 return false; 5806 case '0': 5807 break; 5808 case '1': 5809 OrMask |= Mask; 5810 break; 5811 case 'p': 5812 AndMask |= Mask; 5813 break; 5814 case 'i': 5815 AndMask |= Mask; 5816 XorMask |= Mask; 5817 break; 5818 } 5819 } 5820 5821 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5822 return true; 5823 } 5824 5825 bool 5826 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5827 5828 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5829 5830 if (!parseExpr(Imm)) { 5831 return false; 5832 } 5833 if (!isUInt<16>(Imm)) { 5834 Error(OffsetLoc, "expected a 16-bit offset"); 5835 return false; 5836 } 5837 return true; 5838 } 5839 5840 bool 5841 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5842 using namespace llvm::AMDGPU::Swizzle; 5843 5844 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5845 5846 SMLoc ModeLoc = Parser.getTok().getLoc(); 5847 bool Ok = false; 5848 5849 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5850 Ok = parseSwizzleQuadPerm(Imm); 5851 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5852 Ok = parseSwizzleBitmaskPerm(Imm); 5853 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5854 Ok = parseSwizzleBroadcast(Imm); 5855 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5856 Ok = parseSwizzleSwap(Imm); 5857 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5858 Ok = parseSwizzleReverse(Imm); 5859 } else { 5860 Error(ModeLoc, "expected a swizzle mode"); 5861 } 5862 5863 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5864 } 5865 5866 return false; 5867 } 5868 5869 OperandMatchResultTy 5870 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5871 SMLoc S = Parser.getTok().getLoc(); 5872 int64_t Imm = 0; 5873 5874 if (trySkipId("offset")) { 5875 5876 bool Ok = false; 5877 if (skipToken(AsmToken::Colon, "expected a colon")) { 5878 if (trySkipId("swizzle")) { 5879 Ok = parseSwizzleMacro(Imm); 5880 } else { 5881 Ok = parseSwizzleOffset(Imm); 5882 } 5883 } 5884 5885 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5886 5887 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5888 } else { 5889 // Swizzle "offset" operand is optional. 5890 // If it is omitted, try parsing other optional operands. 5891 return parseOptionalOpr(Operands); 5892 } 5893 } 5894 5895 bool 5896 AMDGPUOperand::isSwizzle() const { 5897 return isImmTy(ImmTySwizzle); 5898 } 5899 5900 //===----------------------------------------------------------------------===// 5901 // VGPR Index Mode 5902 //===----------------------------------------------------------------------===// 5903 5904 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5905 5906 using namespace llvm::AMDGPU::VGPRIndexMode; 5907 5908 if (trySkipToken(AsmToken::RParen)) { 5909 return OFF; 5910 } 5911 5912 int64_t Imm = 0; 5913 5914 while (true) { 5915 unsigned Mode = 0; 5916 SMLoc S = Parser.getTok().getLoc(); 5917 5918 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5919 if (trySkipId(IdSymbolic[ModeId])) { 5920 Mode = 1 << ModeId; 5921 break; 5922 } 5923 } 5924 5925 if (Mode == 0) { 5926 Error(S, (Imm == 0)? 5927 "expected a VGPR index mode or a closing parenthesis" : 5928 "expected a VGPR index mode"); 5929 break; 5930 } 5931 5932 if (Imm & Mode) { 5933 Error(S, "duplicate VGPR index mode"); 5934 break; 5935 } 5936 Imm |= Mode; 5937 5938 if (trySkipToken(AsmToken::RParen)) 5939 break; 5940 if (!skipToken(AsmToken::Comma, 5941 "expected a comma or a closing parenthesis")) 5942 break; 5943 } 5944 5945 return Imm; 5946 } 5947 5948 OperandMatchResultTy 5949 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5950 5951 int64_t Imm = 0; 5952 SMLoc S = Parser.getTok().getLoc(); 5953 5954 if (getLexer().getKind() == AsmToken::Identifier && 5955 Parser.getTok().getString() == "gpr_idx" && 5956 getLexer().peekTok().is(AsmToken::LParen)) { 5957 5958 Parser.Lex(); 5959 Parser.Lex(); 5960 5961 // If parse failed, trigger an error but do not return error code 5962 // to avoid excessive error messages. 5963 Imm = parseGPRIdxMacro(); 5964 5965 } else { 5966 if (getParser().parseAbsoluteExpression(Imm)) 5967 return MatchOperand_NoMatch; 5968 if (Imm < 0 || !isUInt<4>(Imm)) { 5969 Error(S, "invalid immediate: only 4-bit values are legal"); 5970 } 5971 } 5972 5973 Operands.push_back( 5974 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5975 return MatchOperand_Success; 5976 } 5977 5978 bool AMDGPUOperand::isGPRIdxMode() const { 5979 return isImmTy(ImmTyGprIdxMode); 5980 } 5981 5982 //===----------------------------------------------------------------------===// 5983 // sopp branch targets 5984 //===----------------------------------------------------------------------===// 5985 5986 OperandMatchResultTy 5987 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5988 5989 // Make sure we are not parsing something 5990 // that looks like a label or an expression but is not. 5991 // This will improve error messages. 5992 if (isRegister() || isModifier()) 5993 return MatchOperand_NoMatch; 5994 5995 if (parseExpr(Operands)) { 5996 5997 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5998 assert(Opr.isImm() || Opr.isExpr()); 5999 SMLoc Loc = Opr.getStartLoc(); 6000 6001 // Currently we do not support arbitrary expressions as branch targets. 6002 // Only labels and absolute expressions are accepted. 6003 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6004 Error(Loc, "expected an absolute expression or a label"); 6005 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6006 Error(Loc, "expected a 16-bit signed jump offset"); 6007 } 6008 } 6009 6010 return MatchOperand_Success; // avoid excessive error messages 6011 } 6012 6013 //===----------------------------------------------------------------------===// 6014 // Boolean holding registers 6015 //===----------------------------------------------------------------------===// 6016 6017 OperandMatchResultTy 6018 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6019 return parseReg(Operands); 6020 } 6021 6022 //===----------------------------------------------------------------------===// 6023 // mubuf 6024 //===----------------------------------------------------------------------===// 6025 6026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6027 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6028 } 6029 6030 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6031 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6032 } 6033 6034 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6035 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6036 } 6037 6038 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6039 const OperandVector &Operands, 6040 bool IsAtomic, 6041 bool IsAtomicReturn, 6042 bool IsLds) { 6043 bool IsLdsOpcode = IsLds; 6044 bool HasLdsModifier = false; 6045 OptionalImmIndexMap OptionalIdx; 6046 assert(IsAtomicReturn ? IsAtomic : true); 6047 unsigned FirstOperandIdx = 1; 6048 6049 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6050 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6051 6052 // Add the register arguments 6053 if (Op.isReg()) { 6054 Op.addRegOperands(Inst, 1); 6055 // Insert a tied src for atomic return dst. 6056 // This cannot be postponed as subsequent calls to 6057 // addImmOperands rely on correct number of MC operands. 6058 if (IsAtomicReturn && i == FirstOperandIdx) 6059 Op.addRegOperands(Inst, 1); 6060 continue; 6061 } 6062 6063 // Handle the case where soffset is an immediate 6064 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6065 Op.addImmOperands(Inst, 1); 6066 continue; 6067 } 6068 6069 HasLdsModifier |= Op.isLDS(); 6070 6071 // Handle tokens like 'offen' which are sometimes hard-coded into the 6072 // asm string. There are no MCInst operands for these. 6073 if (Op.isToken()) { 6074 continue; 6075 } 6076 assert(Op.isImm()); 6077 6078 // Handle optional arguments 6079 OptionalIdx[Op.getImmTy()] = i; 6080 } 6081 6082 // This is a workaround for an llvm quirk which may result in an 6083 // incorrect instruction selection. Lds and non-lds versions of 6084 // MUBUF instructions are identical except that lds versions 6085 // have mandatory 'lds' modifier. However this modifier follows 6086 // optional modifiers and llvm asm matcher regards this 'lds' 6087 // modifier as an optional one. As a result, an lds version 6088 // of opcode may be selected even if it has no 'lds' modifier. 6089 if (IsLdsOpcode && !HasLdsModifier) { 6090 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6091 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6092 Inst.setOpcode(NoLdsOpcode); 6093 IsLdsOpcode = false; 6094 } 6095 } 6096 6097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6098 if (!IsAtomic) { // glc is hard-coded. 6099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6100 } 6101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6102 6103 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6105 } 6106 6107 if (isGFX10()) 6108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6109 } 6110 6111 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6112 OptionalImmIndexMap OptionalIdx; 6113 6114 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6115 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6116 6117 // Add the register arguments 6118 if (Op.isReg()) { 6119 Op.addRegOperands(Inst, 1); 6120 continue; 6121 } 6122 6123 // Handle the case where soffset is an immediate 6124 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6125 Op.addImmOperands(Inst, 1); 6126 continue; 6127 } 6128 6129 // Handle tokens like 'offen' which are sometimes hard-coded into the 6130 // asm string. There are no MCInst operands for these. 6131 if (Op.isToken()) { 6132 continue; 6133 } 6134 assert(Op.isImm()); 6135 6136 // Handle optional arguments 6137 OptionalIdx[Op.getImmTy()] = i; 6138 } 6139 6140 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6141 AMDGPUOperand::ImmTyOffset); 6142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6146 6147 if (isGFX10()) 6148 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6149 } 6150 6151 //===----------------------------------------------------------------------===// 6152 // mimg 6153 //===----------------------------------------------------------------------===// 6154 6155 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6156 bool IsAtomic) { 6157 unsigned I = 1; 6158 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6159 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6160 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6161 } 6162 6163 if (IsAtomic) { 6164 // Add src, same as dst 6165 assert(Desc.getNumDefs() == 1); 6166 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6167 } 6168 6169 OptionalImmIndexMap OptionalIdx; 6170 6171 for (unsigned E = Operands.size(); I != E; ++I) { 6172 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6173 6174 // Add the register arguments 6175 if (Op.isReg()) { 6176 Op.addRegOperands(Inst, 1); 6177 } else if (Op.isImmModifier()) { 6178 OptionalIdx[Op.getImmTy()] = I; 6179 } else if (!Op.isToken()) { 6180 llvm_unreachable("unexpected operand type"); 6181 } 6182 } 6183 6184 bool IsGFX10 = isGFX10(); 6185 6186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6187 if (IsGFX10) 6188 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6190 if (IsGFX10) 6191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6195 if (IsGFX10) 6196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6199 if (!IsGFX10) 6200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6202 } 6203 6204 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6205 cvtMIMG(Inst, Operands, true); 6206 } 6207 6208 //===----------------------------------------------------------------------===// 6209 // smrd 6210 //===----------------------------------------------------------------------===// 6211 6212 bool AMDGPUOperand::isSMRDOffset8() const { 6213 return isImm() && isUInt<8>(getImm()); 6214 } 6215 6216 bool AMDGPUOperand::isSMEMOffset() const { 6217 return isImm(); // Offset range is checked later by validator. 6218 } 6219 6220 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6221 // 32-bit literals are only supported on CI and we only want to use them 6222 // when the offset is > 8-bits. 6223 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6224 } 6225 6226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6227 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6228 } 6229 6230 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6231 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6232 } 6233 6234 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6235 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6236 } 6237 6238 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6239 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6240 } 6241 6242 //===----------------------------------------------------------------------===// 6243 // vop3 6244 //===----------------------------------------------------------------------===// 6245 6246 static bool ConvertOmodMul(int64_t &Mul) { 6247 if (Mul != 1 && Mul != 2 && Mul != 4) 6248 return false; 6249 6250 Mul >>= 1; 6251 return true; 6252 } 6253 6254 static bool ConvertOmodDiv(int64_t &Div) { 6255 if (Div == 1) { 6256 Div = 0; 6257 return true; 6258 } 6259 6260 if (Div == 2) { 6261 Div = 3; 6262 return true; 6263 } 6264 6265 return false; 6266 } 6267 6268 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6269 if (BoundCtrl == 0) { 6270 BoundCtrl = 1; 6271 return true; 6272 } 6273 6274 if (BoundCtrl == -1) { 6275 BoundCtrl = 0; 6276 return true; 6277 } 6278 6279 return false; 6280 } 6281 6282 // Note: the order in this table matches the order of operands in AsmString. 6283 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6284 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6285 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6286 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6287 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6288 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6289 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6290 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6291 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6292 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6293 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6294 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6295 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6296 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6297 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6298 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6299 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6300 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6301 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6302 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6303 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6304 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6305 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6306 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6307 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6308 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6309 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6310 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6311 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6312 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6313 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6314 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6315 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6316 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6317 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6318 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6319 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6320 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6321 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6322 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6323 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6324 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6325 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6326 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6327 }; 6328 6329 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6330 6331 OperandMatchResultTy res = parseOptionalOpr(Operands); 6332 6333 // This is a hack to enable hardcoded mandatory operands which follow 6334 // optional operands. 6335 // 6336 // Current design assumes that all operands after the first optional operand 6337 // are also optional. However implementation of some instructions violates 6338 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6339 // 6340 // To alleviate this problem, we have to (implicitly) parse extra operands 6341 // to make sure autogenerated parser of custom operands never hit hardcoded 6342 // mandatory operands. 6343 6344 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6345 if (res != MatchOperand_Success || 6346 isToken(AsmToken::EndOfStatement)) 6347 break; 6348 6349 trySkipToken(AsmToken::Comma); 6350 res = parseOptionalOpr(Operands); 6351 } 6352 6353 return res; 6354 } 6355 6356 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6357 OperandMatchResultTy res; 6358 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6359 // try to parse any optional operand here 6360 if (Op.IsBit) { 6361 res = parseNamedBit(Op.Name, Operands, Op.Type); 6362 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6363 res = parseOModOperand(Operands); 6364 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6365 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6366 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6367 res = parseSDWASel(Operands, Op.Name, Op.Type); 6368 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6369 res = parseSDWADstUnused(Operands); 6370 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6371 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6372 Op.Type == AMDGPUOperand::ImmTyNegLo || 6373 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6374 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6375 Op.ConvertResult); 6376 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6377 res = parseDim(Operands); 6378 } else { 6379 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6380 } 6381 if (res != MatchOperand_NoMatch) { 6382 return res; 6383 } 6384 } 6385 return MatchOperand_NoMatch; 6386 } 6387 6388 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6389 StringRef Name = Parser.getTok().getString(); 6390 if (Name == "mul") { 6391 return parseIntWithPrefix("mul", Operands, 6392 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6393 } 6394 6395 if (Name == "div") { 6396 return parseIntWithPrefix("div", Operands, 6397 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6398 } 6399 6400 return MatchOperand_NoMatch; 6401 } 6402 6403 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6404 cvtVOP3P(Inst, Operands); 6405 6406 int Opc = Inst.getOpcode(); 6407 6408 int SrcNum; 6409 const int Ops[] = { AMDGPU::OpName::src0, 6410 AMDGPU::OpName::src1, 6411 AMDGPU::OpName::src2 }; 6412 for (SrcNum = 0; 6413 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6414 ++SrcNum); 6415 assert(SrcNum > 0); 6416 6417 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6418 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6419 6420 if ((OpSel & (1 << SrcNum)) != 0) { 6421 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6422 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6423 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6424 } 6425 } 6426 6427 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6428 // 1. This operand is input modifiers 6429 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6430 // 2. This is not last operand 6431 && Desc.NumOperands > (OpNum + 1) 6432 // 3. Next operand is register class 6433 && Desc.OpInfo[OpNum + 1].RegClass != -1 6434 // 4. Next register is not tied to any other operand 6435 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6436 } 6437 6438 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6439 { 6440 OptionalImmIndexMap OptionalIdx; 6441 unsigned Opc = Inst.getOpcode(); 6442 6443 unsigned I = 1; 6444 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6445 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6446 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6447 } 6448 6449 for (unsigned E = Operands.size(); I != E; ++I) { 6450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6451 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6452 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6453 } else if (Op.isInterpSlot() || 6454 Op.isInterpAttr() || 6455 Op.isAttrChan()) { 6456 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6457 } else if (Op.isImmModifier()) { 6458 OptionalIdx[Op.getImmTy()] = I; 6459 } else { 6460 llvm_unreachable("unhandled operand type"); 6461 } 6462 } 6463 6464 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6466 } 6467 6468 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6470 } 6471 6472 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6474 } 6475 } 6476 6477 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6478 OptionalImmIndexMap &OptionalIdx) { 6479 unsigned Opc = Inst.getOpcode(); 6480 6481 unsigned I = 1; 6482 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6483 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6484 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6485 } 6486 6487 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6488 // This instruction has src modifiers 6489 for (unsigned E = Operands.size(); I != E; ++I) { 6490 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6491 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6492 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6493 } else if (Op.isImmModifier()) { 6494 OptionalIdx[Op.getImmTy()] = I; 6495 } else if (Op.isRegOrImm()) { 6496 Op.addRegOrImmOperands(Inst, 1); 6497 } else { 6498 llvm_unreachable("unhandled operand type"); 6499 } 6500 } 6501 } else { 6502 // No src modifiers 6503 for (unsigned E = Operands.size(); I != E; ++I) { 6504 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6505 if (Op.isMod()) { 6506 OptionalIdx[Op.getImmTy()] = I; 6507 } else { 6508 Op.addRegOrImmOperands(Inst, 1); 6509 } 6510 } 6511 } 6512 6513 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6515 } 6516 6517 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6519 } 6520 6521 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6522 // it has src2 register operand that is tied to dst operand 6523 // we don't allow modifiers for this operand in assembler so src2_modifiers 6524 // should be 0. 6525 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6526 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6527 Opc == AMDGPU::V_MAC_F32_e64_vi || 6528 Opc == AMDGPU::V_MAC_F16_e64_vi || 6529 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6530 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6531 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6532 auto it = Inst.begin(); 6533 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6534 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6535 ++it; 6536 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6537 } 6538 } 6539 6540 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6541 OptionalImmIndexMap OptionalIdx; 6542 cvtVOP3(Inst, Operands, OptionalIdx); 6543 } 6544 6545 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6546 const OperandVector &Operands) { 6547 OptionalImmIndexMap OptIdx; 6548 const int Opc = Inst.getOpcode(); 6549 const MCInstrDesc &Desc = MII.get(Opc); 6550 6551 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6552 6553 cvtVOP3(Inst, Operands, OptIdx); 6554 6555 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6556 assert(!IsPacked); 6557 Inst.addOperand(Inst.getOperand(0)); 6558 } 6559 6560 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6561 // instruction, and then figure out where to actually put the modifiers 6562 6563 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6564 6565 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6566 if (OpSelHiIdx != -1) { 6567 int DefaultVal = IsPacked ? -1 : 0; 6568 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6569 DefaultVal); 6570 } 6571 6572 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6573 if (NegLoIdx != -1) { 6574 assert(IsPacked); 6575 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6576 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6577 } 6578 6579 const int Ops[] = { AMDGPU::OpName::src0, 6580 AMDGPU::OpName::src1, 6581 AMDGPU::OpName::src2 }; 6582 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6583 AMDGPU::OpName::src1_modifiers, 6584 AMDGPU::OpName::src2_modifiers }; 6585 6586 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6587 6588 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6589 unsigned OpSelHi = 0; 6590 unsigned NegLo = 0; 6591 unsigned NegHi = 0; 6592 6593 if (OpSelHiIdx != -1) { 6594 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6595 } 6596 6597 if (NegLoIdx != -1) { 6598 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6599 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6600 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6601 } 6602 6603 for (int J = 0; J < 3; ++J) { 6604 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6605 if (OpIdx == -1) 6606 break; 6607 6608 uint32_t ModVal = 0; 6609 6610 if ((OpSel & (1 << J)) != 0) 6611 ModVal |= SISrcMods::OP_SEL_0; 6612 6613 if ((OpSelHi & (1 << J)) != 0) 6614 ModVal |= SISrcMods::OP_SEL_1; 6615 6616 if ((NegLo & (1 << J)) != 0) 6617 ModVal |= SISrcMods::NEG; 6618 6619 if ((NegHi & (1 << J)) != 0) 6620 ModVal |= SISrcMods::NEG_HI; 6621 6622 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6623 6624 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6625 } 6626 } 6627 6628 //===----------------------------------------------------------------------===// 6629 // dpp 6630 //===----------------------------------------------------------------------===// 6631 6632 bool AMDGPUOperand::isDPP8() const { 6633 return isImmTy(ImmTyDPP8); 6634 } 6635 6636 bool AMDGPUOperand::isDPPCtrl() const { 6637 using namespace AMDGPU::DPP; 6638 6639 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6640 if (result) { 6641 int64_t Imm = getImm(); 6642 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6643 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6644 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6645 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6646 (Imm == DppCtrl::WAVE_SHL1) || 6647 (Imm == DppCtrl::WAVE_ROL1) || 6648 (Imm == DppCtrl::WAVE_SHR1) || 6649 (Imm == DppCtrl::WAVE_ROR1) || 6650 (Imm == DppCtrl::ROW_MIRROR) || 6651 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6652 (Imm == DppCtrl::BCAST15) || 6653 (Imm == DppCtrl::BCAST31) || 6654 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6655 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6656 } 6657 return false; 6658 } 6659 6660 //===----------------------------------------------------------------------===// 6661 // mAI 6662 //===----------------------------------------------------------------------===// 6663 6664 bool AMDGPUOperand::isBLGP() const { 6665 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6666 } 6667 6668 bool AMDGPUOperand::isCBSZ() const { 6669 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6670 } 6671 6672 bool AMDGPUOperand::isABID() const { 6673 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6674 } 6675 6676 bool AMDGPUOperand::isS16Imm() const { 6677 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6678 } 6679 6680 bool AMDGPUOperand::isU16Imm() const { 6681 return isImm() && isUInt<16>(getImm()); 6682 } 6683 6684 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6685 if (!isGFX10()) 6686 return MatchOperand_NoMatch; 6687 6688 SMLoc S = Parser.getTok().getLoc(); 6689 6690 if (getLexer().isNot(AsmToken::Identifier)) 6691 return MatchOperand_NoMatch; 6692 if (getLexer().getTok().getString() != "dim") 6693 return MatchOperand_NoMatch; 6694 6695 Parser.Lex(); 6696 if (getLexer().isNot(AsmToken::Colon)) 6697 return MatchOperand_ParseFail; 6698 6699 Parser.Lex(); 6700 6701 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6702 // integer. 6703 std::string Token; 6704 if (getLexer().is(AsmToken::Integer)) { 6705 SMLoc Loc = getLexer().getTok().getEndLoc(); 6706 Token = std::string(getLexer().getTok().getString()); 6707 Parser.Lex(); 6708 if (getLexer().getTok().getLoc() != Loc) 6709 return MatchOperand_ParseFail; 6710 } 6711 if (getLexer().isNot(AsmToken::Identifier)) 6712 return MatchOperand_ParseFail; 6713 Token += getLexer().getTok().getString(); 6714 6715 StringRef DimId = Token; 6716 if (DimId.startswith("SQ_RSRC_IMG_")) 6717 DimId = DimId.substr(12); 6718 6719 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6720 if (!DimInfo) 6721 return MatchOperand_ParseFail; 6722 6723 Parser.Lex(); 6724 6725 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6726 AMDGPUOperand::ImmTyDim)); 6727 return MatchOperand_Success; 6728 } 6729 6730 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6731 SMLoc S = Parser.getTok().getLoc(); 6732 StringRef Prefix; 6733 6734 if (getLexer().getKind() == AsmToken::Identifier) { 6735 Prefix = Parser.getTok().getString(); 6736 } else { 6737 return MatchOperand_NoMatch; 6738 } 6739 6740 if (Prefix != "dpp8") 6741 return parseDPPCtrl(Operands); 6742 if (!isGFX10()) 6743 return MatchOperand_NoMatch; 6744 6745 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6746 6747 int64_t Sels[8]; 6748 6749 Parser.Lex(); 6750 if (getLexer().isNot(AsmToken::Colon)) 6751 return MatchOperand_ParseFail; 6752 6753 Parser.Lex(); 6754 if (getLexer().isNot(AsmToken::LBrac)) 6755 return MatchOperand_ParseFail; 6756 6757 Parser.Lex(); 6758 if (getParser().parseAbsoluteExpression(Sels[0])) 6759 return MatchOperand_ParseFail; 6760 if (0 > Sels[0] || 7 < Sels[0]) 6761 return MatchOperand_ParseFail; 6762 6763 for (size_t i = 1; i < 8; ++i) { 6764 if (getLexer().isNot(AsmToken::Comma)) 6765 return MatchOperand_ParseFail; 6766 6767 Parser.Lex(); 6768 if (getParser().parseAbsoluteExpression(Sels[i])) 6769 return MatchOperand_ParseFail; 6770 if (0 > Sels[i] || 7 < Sels[i]) 6771 return MatchOperand_ParseFail; 6772 } 6773 6774 if (getLexer().isNot(AsmToken::RBrac)) 6775 return MatchOperand_ParseFail; 6776 Parser.Lex(); 6777 6778 unsigned DPP8 = 0; 6779 for (size_t i = 0; i < 8; ++i) 6780 DPP8 |= (Sels[i] << (i * 3)); 6781 6782 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6783 return MatchOperand_Success; 6784 } 6785 6786 OperandMatchResultTy 6787 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6788 using namespace AMDGPU::DPP; 6789 6790 SMLoc S = Parser.getTok().getLoc(); 6791 StringRef Prefix; 6792 int64_t Int; 6793 6794 if (getLexer().getKind() == AsmToken::Identifier) { 6795 Prefix = Parser.getTok().getString(); 6796 } else { 6797 return MatchOperand_NoMatch; 6798 } 6799 6800 if (Prefix == "row_mirror") { 6801 Int = DppCtrl::ROW_MIRROR; 6802 Parser.Lex(); 6803 } else if (Prefix == "row_half_mirror") { 6804 Int = DppCtrl::ROW_HALF_MIRROR; 6805 Parser.Lex(); 6806 } else { 6807 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6808 if (Prefix != "quad_perm" 6809 && Prefix != "row_shl" 6810 && Prefix != "row_shr" 6811 && Prefix != "row_ror" 6812 && Prefix != "wave_shl" 6813 && Prefix != "wave_rol" 6814 && Prefix != "wave_shr" 6815 && Prefix != "wave_ror" 6816 && Prefix != "row_bcast" 6817 && Prefix != "row_share" 6818 && Prefix != "row_xmask") { 6819 return MatchOperand_NoMatch; 6820 } 6821 6822 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6823 return MatchOperand_NoMatch; 6824 6825 if (!isVI() && !isGFX9() && 6826 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6827 Prefix == "wave_rol" || Prefix == "wave_ror" || 6828 Prefix == "row_bcast")) 6829 return MatchOperand_NoMatch; 6830 6831 Parser.Lex(); 6832 if (getLexer().isNot(AsmToken::Colon)) 6833 return MatchOperand_ParseFail; 6834 6835 if (Prefix == "quad_perm") { 6836 // quad_perm:[%d,%d,%d,%d] 6837 Parser.Lex(); 6838 if (getLexer().isNot(AsmToken::LBrac)) 6839 return MatchOperand_ParseFail; 6840 Parser.Lex(); 6841 6842 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6843 return MatchOperand_ParseFail; 6844 6845 for (int i = 0; i < 3; ++i) { 6846 if (getLexer().isNot(AsmToken::Comma)) 6847 return MatchOperand_ParseFail; 6848 Parser.Lex(); 6849 6850 int64_t Temp; 6851 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6852 return MatchOperand_ParseFail; 6853 const int shift = i*2 + 2; 6854 Int += (Temp << shift); 6855 } 6856 6857 if (getLexer().isNot(AsmToken::RBrac)) 6858 return MatchOperand_ParseFail; 6859 Parser.Lex(); 6860 } else { 6861 // sel:%d 6862 Parser.Lex(); 6863 if (getParser().parseAbsoluteExpression(Int)) 6864 return MatchOperand_ParseFail; 6865 6866 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6867 Int |= DppCtrl::ROW_SHL0; 6868 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6869 Int |= DppCtrl::ROW_SHR0; 6870 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6871 Int |= DppCtrl::ROW_ROR0; 6872 } else if (Prefix == "wave_shl" && 1 == Int) { 6873 Int = DppCtrl::WAVE_SHL1; 6874 } else if (Prefix == "wave_rol" && 1 == Int) { 6875 Int = DppCtrl::WAVE_ROL1; 6876 } else if (Prefix == "wave_shr" && 1 == Int) { 6877 Int = DppCtrl::WAVE_SHR1; 6878 } else if (Prefix == "wave_ror" && 1 == Int) { 6879 Int = DppCtrl::WAVE_ROR1; 6880 } else if (Prefix == "row_bcast") { 6881 if (Int == 15) { 6882 Int = DppCtrl::BCAST15; 6883 } else if (Int == 31) { 6884 Int = DppCtrl::BCAST31; 6885 } else { 6886 return MatchOperand_ParseFail; 6887 } 6888 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6889 Int |= DppCtrl::ROW_SHARE_FIRST; 6890 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6891 Int |= DppCtrl::ROW_XMASK_FIRST; 6892 } else { 6893 return MatchOperand_ParseFail; 6894 } 6895 } 6896 } 6897 6898 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6899 return MatchOperand_Success; 6900 } 6901 6902 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6903 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6904 } 6905 6906 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6907 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6908 } 6909 6910 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6911 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6912 } 6913 6914 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6915 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6916 } 6917 6918 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6919 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6920 } 6921 6922 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6923 OptionalImmIndexMap OptionalIdx; 6924 6925 unsigned I = 1; 6926 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6927 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6928 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6929 } 6930 6931 int Fi = 0; 6932 for (unsigned E = Operands.size(); I != E; ++I) { 6933 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6934 MCOI::TIED_TO); 6935 if (TiedTo != -1) { 6936 assert((unsigned)TiedTo < Inst.getNumOperands()); 6937 // handle tied old or src2 for MAC instructions 6938 Inst.addOperand(Inst.getOperand(TiedTo)); 6939 } 6940 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6941 // Add the register arguments 6942 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6943 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6944 // Skip it. 6945 continue; 6946 } 6947 6948 if (IsDPP8) { 6949 if (Op.isDPP8()) { 6950 Op.addImmOperands(Inst, 1); 6951 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6952 Op.addRegWithFPInputModsOperands(Inst, 2); 6953 } else if (Op.isFI()) { 6954 Fi = Op.getImm(); 6955 } else if (Op.isReg()) { 6956 Op.addRegOperands(Inst, 1); 6957 } else { 6958 llvm_unreachable("Invalid operand type"); 6959 } 6960 } else { 6961 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6962 Op.addRegWithFPInputModsOperands(Inst, 2); 6963 } else if (Op.isDPPCtrl()) { 6964 Op.addImmOperands(Inst, 1); 6965 } else if (Op.isImm()) { 6966 // Handle optional arguments 6967 OptionalIdx[Op.getImmTy()] = I; 6968 } else { 6969 llvm_unreachable("Invalid operand type"); 6970 } 6971 } 6972 } 6973 6974 if (IsDPP8) { 6975 using namespace llvm::AMDGPU::DPP; 6976 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6977 } else { 6978 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6981 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6983 } 6984 } 6985 } 6986 6987 //===----------------------------------------------------------------------===// 6988 // sdwa 6989 //===----------------------------------------------------------------------===// 6990 6991 OperandMatchResultTy 6992 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6993 AMDGPUOperand::ImmTy Type) { 6994 using namespace llvm::AMDGPU::SDWA; 6995 6996 SMLoc S = Parser.getTok().getLoc(); 6997 StringRef Value; 6998 OperandMatchResultTy res; 6999 7000 res = parseStringWithPrefix(Prefix, Value); 7001 if (res != MatchOperand_Success) { 7002 return res; 7003 } 7004 7005 int64_t Int; 7006 Int = StringSwitch<int64_t>(Value) 7007 .Case("BYTE_0", SdwaSel::BYTE_0) 7008 .Case("BYTE_1", SdwaSel::BYTE_1) 7009 .Case("BYTE_2", SdwaSel::BYTE_2) 7010 .Case("BYTE_3", SdwaSel::BYTE_3) 7011 .Case("WORD_0", SdwaSel::WORD_0) 7012 .Case("WORD_1", SdwaSel::WORD_1) 7013 .Case("DWORD", SdwaSel::DWORD) 7014 .Default(0xffffffff); 7015 Parser.Lex(); // eat last token 7016 7017 if (Int == 0xffffffff) { 7018 return MatchOperand_ParseFail; 7019 } 7020 7021 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7022 return MatchOperand_Success; 7023 } 7024 7025 OperandMatchResultTy 7026 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7027 using namespace llvm::AMDGPU::SDWA; 7028 7029 SMLoc S = Parser.getTok().getLoc(); 7030 StringRef Value; 7031 OperandMatchResultTy res; 7032 7033 res = parseStringWithPrefix("dst_unused", Value); 7034 if (res != MatchOperand_Success) { 7035 return res; 7036 } 7037 7038 int64_t Int; 7039 Int = StringSwitch<int64_t>(Value) 7040 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7041 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7042 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7043 .Default(0xffffffff); 7044 Parser.Lex(); // eat last token 7045 7046 if (Int == 0xffffffff) { 7047 return MatchOperand_ParseFail; 7048 } 7049 7050 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7051 return MatchOperand_Success; 7052 } 7053 7054 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7055 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7056 } 7057 7058 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7059 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7060 } 7061 7062 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7063 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7064 } 7065 7066 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7067 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7068 } 7069 7070 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7071 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7072 } 7073 7074 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7075 uint64_t BasicInstType, 7076 bool SkipDstVcc, 7077 bool SkipSrcVcc) { 7078 using namespace llvm::AMDGPU::SDWA; 7079 7080 OptionalImmIndexMap OptionalIdx; 7081 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7082 bool SkippedVcc = false; 7083 7084 unsigned I = 1; 7085 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7086 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7087 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7088 } 7089 7090 for (unsigned E = Operands.size(); I != E; ++I) { 7091 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7092 if (SkipVcc && !SkippedVcc && Op.isReg() && 7093 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7094 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7095 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7096 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7097 // Skip VCC only if we didn't skip it on previous iteration. 7098 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7099 if (BasicInstType == SIInstrFlags::VOP2 && 7100 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7101 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7102 SkippedVcc = true; 7103 continue; 7104 } else if (BasicInstType == SIInstrFlags::VOPC && 7105 Inst.getNumOperands() == 0) { 7106 SkippedVcc = true; 7107 continue; 7108 } 7109 } 7110 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7111 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7112 } else if (Op.isImm()) { 7113 // Handle optional arguments 7114 OptionalIdx[Op.getImmTy()] = I; 7115 } else { 7116 llvm_unreachable("Invalid operand type"); 7117 } 7118 SkippedVcc = false; 7119 } 7120 7121 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7122 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7123 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7124 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7125 switch (BasicInstType) { 7126 case SIInstrFlags::VOP1: 7127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7128 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7130 } 7131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7132 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7134 break; 7135 7136 case SIInstrFlags::VOP2: 7137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7138 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7140 } 7141 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7144 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7145 break; 7146 7147 case SIInstrFlags::VOPC: 7148 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7149 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7150 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7152 break; 7153 7154 default: 7155 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7156 } 7157 } 7158 7159 // special case v_mac_{f16, f32}: 7160 // it has src2 register operand that is tied to dst operand 7161 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7162 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7163 auto it = Inst.begin(); 7164 std::advance( 7165 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7166 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7167 } 7168 } 7169 7170 //===----------------------------------------------------------------------===// 7171 // mAI 7172 //===----------------------------------------------------------------------===// 7173 7174 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7175 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7176 } 7177 7178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7179 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7180 } 7181 7182 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7183 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7184 } 7185 7186 /// Force static initialization. 7187 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7188 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7189 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7190 } 7191 7192 #define GET_REGISTER_MATCHER 7193 #define GET_MATCHER_IMPLEMENTATION 7194 #define GET_MNEMONIC_SPELL_CHECKER 7195 #include "AMDGPUGenAsmMatcher.inc" 7196 7197 // This fuction should be defined after auto-generated include so that we have 7198 // MatchClassKind enum defined 7199 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7200 unsigned Kind) { 7201 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7202 // But MatchInstructionImpl() expects to meet token and fails to validate 7203 // operand. This method checks if we are given immediate operand but expect to 7204 // get corresponding token. 7205 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7206 switch (Kind) { 7207 case MCK_addr64: 7208 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7209 case MCK_gds: 7210 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7211 case MCK_lds: 7212 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7213 case MCK_glc: 7214 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7215 case MCK_idxen: 7216 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7217 case MCK_offen: 7218 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7219 case MCK_SSrcB32: 7220 // When operands have expression values, they will return true for isToken, 7221 // because it is not possible to distinguish between a token and an 7222 // expression at parse time. MatchInstructionImpl() will always try to 7223 // match an operand as a token, when isToken returns true, and when the 7224 // name of the expression is not a valid token, the match will fail, 7225 // so we need to handle it here. 7226 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7227 case MCK_SSrcF32: 7228 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7229 case MCK_SoppBrTarget: 7230 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7231 case MCK_VReg32OrOff: 7232 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7233 case MCK_InterpSlot: 7234 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7235 case MCK_Attr: 7236 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7237 case MCK_AttrChan: 7238 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7239 case MCK_ImmSMEMOffset: 7240 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7241 case MCK_SReg_64: 7242 case MCK_SReg_64_XEXEC: 7243 // Null is defined as a 32-bit register but 7244 // it should also be enabled with 64-bit operands. 7245 // The following code enables it for SReg_64 operands 7246 // used as source and destination. Remaining source 7247 // operands are handled in isInlinableImm. 7248 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7249 default: 7250 return Match_InvalidOperand; 7251 } 7252 } 7253 7254 //===----------------------------------------------------------------------===// 7255 // endpgm 7256 //===----------------------------------------------------------------------===// 7257 7258 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7259 SMLoc S = Parser.getTok().getLoc(); 7260 int64_t Imm = 0; 7261 7262 if (!parseExpr(Imm)) { 7263 // The operand is optional, if not present default to 0 7264 Imm = 0; 7265 } 7266 7267 if (!isUInt<16>(Imm)) { 7268 Error(S, "expected a 16-bit value"); 7269 return MatchOperand_ParseFail; 7270 } 7271 7272 Operands.push_back( 7273 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7274 return MatchOperand_Success; 7275 } 7276 7277 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7278