1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX9Plus() const { 1194 return AMDGPU::isGFX9Plus(getSTI()); 1195 } 1196 1197 bool isGFX10() const { 1198 return AMDGPU::isGFX10(getSTI()); 1199 } 1200 1201 bool isGFX10_BEncoding() const { 1202 return AMDGPU::isGFX10_BEncoding(getSTI()); 1203 } 1204 1205 bool hasInv2PiInlineImm() const { 1206 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1207 } 1208 1209 bool hasFlatOffsets() const { 1210 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1211 } 1212 1213 bool hasSGPR102_SGPR103() const { 1214 return !isVI() && !isGFX9(); 1215 } 1216 1217 bool hasSGPR104_SGPR105() const { 1218 return isGFX10(); 1219 } 1220 1221 bool hasIntClamp() const { 1222 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1223 } 1224 1225 AMDGPUTargetStreamer &getTargetStreamer() { 1226 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1227 return static_cast<AMDGPUTargetStreamer &>(TS); 1228 } 1229 1230 const MCRegisterInfo *getMRI() const { 1231 // We need this const_cast because for some reason getContext() is not const 1232 // in MCAsmParser. 1233 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1234 } 1235 1236 const MCInstrInfo *getMII() const { 1237 return &MII; 1238 } 1239 1240 const FeatureBitset &getFeatureBits() const { 1241 return getSTI().getFeatureBits(); 1242 } 1243 1244 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1245 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1246 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1247 1248 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1249 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1250 bool isForcedDPP() const { return ForcedDPP; } 1251 bool isForcedSDWA() const { return ForcedSDWA; } 1252 ArrayRef<unsigned> getMatchedVariants() const; 1253 StringRef getMatchedVariantName() const; 1254 1255 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1256 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1257 bool RestoreOnFailure); 1258 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1259 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1260 SMLoc &EndLoc) override; 1261 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1262 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1263 unsigned Kind) override; 1264 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1265 OperandVector &Operands, MCStreamer &Out, 1266 uint64_t &ErrorInfo, 1267 bool MatchingInlineAsm) override; 1268 bool ParseDirective(AsmToken DirectiveID) override; 1269 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1270 OperandMode Mode = OperandMode_Default); 1271 StringRef parseMnemonicSuffix(StringRef Name); 1272 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1273 SMLoc NameLoc, OperandVector &Operands) override; 1274 //bool ProcessInstruction(MCInst &Inst); 1275 1276 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1277 1278 OperandMatchResultTy 1279 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1280 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1281 bool (*ConvertResult)(int64_t &) = nullptr); 1282 1283 OperandMatchResultTy 1284 parseOperandArrayWithPrefix(const char *Prefix, 1285 OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1287 bool (*ConvertResult)(int64_t&) = nullptr); 1288 1289 OperandMatchResultTy 1290 parseNamedBit(const char *Name, OperandVector &Operands, 1291 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1292 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1293 StringRef &Value); 1294 1295 bool isModifier(); 1296 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1297 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1298 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1299 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1300 bool parseSP3NegModifier(); 1301 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1302 OperandMatchResultTy parseReg(OperandVector &Operands); 1303 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1304 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1305 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1306 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1307 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1308 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1309 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1310 OperandMatchResultTy parseUfmt(int64_t &Format); 1311 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1312 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1313 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1314 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1315 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1316 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1317 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1318 1319 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1320 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1321 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1322 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1323 1324 bool parseCnt(int64_t &IntVal); 1325 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1326 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1327 1328 private: 1329 struct OperandInfoTy { 1330 int64_t Id; 1331 bool IsSymbolic = false; 1332 bool IsDefined = false; 1333 1334 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1335 }; 1336 1337 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1338 bool validateSendMsg(const OperandInfoTy &Msg, 1339 const OperandInfoTy &Op, 1340 const OperandInfoTy &Stream, 1341 const SMLoc Loc); 1342 1343 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1344 bool validateHwreg(const OperandInfoTy &HwReg, 1345 const int64_t Offset, 1346 const int64_t Width, 1347 const SMLoc Loc); 1348 1349 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1350 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1351 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1352 1353 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1354 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1355 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1356 bool validateSOPLiteral(const MCInst &Inst) const; 1357 bool validateConstantBusLimitations(const MCInst &Inst); 1358 bool validateEarlyClobberLimitations(const MCInst &Inst); 1359 bool validateIntClampSupported(const MCInst &Inst); 1360 bool validateMIMGAtomicDMask(const MCInst &Inst); 1361 bool validateMIMGGatherDMask(const MCInst &Inst); 1362 bool validateMovrels(const MCInst &Inst); 1363 bool validateMIMGDataSize(const MCInst &Inst); 1364 bool validateMIMGAddrSize(const MCInst &Inst); 1365 bool validateMIMGD16(const MCInst &Inst); 1366 bool validateMIMGDim(const MCInst &Inst); 1367 bool validateLdsDirect(const MCInst &Inst); 1368 bool validateOpSel(const MCInst &Inst); 1369 bool validateVccOperand(unsigned Reg) const; 1370 bool validateVOP3Literal(const MCInst &Inst) const; 1371 bool validateMAIAccWrite(const MCInst &Inst); 1372 bool validateDivScale(const MCInst &Inst); 1373 unsigned getConstantBusLimit(unsigned Opcode) const; 1374 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1375 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1376 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1377 1378 bool isSupportedMnemo(StringRef Mnemo, 1379 const FeatureBitset &FBS); 1380 bool isSupportedMnemo(StringRef Mnemo, 1381 const FeatureBitset &FBS, 1382 ArrayRef<unsigned> Variants); 1383 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1384 1385 bool isId(const StringRef Id) const; 1386 bool isId(const AsmToken &Token, const StringRef Id) const; 1387 bool isToken(const AsmToken::TokenKind Kind) const; 1388 bool trySkipId(const StringRef Id); 1389 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1390 bool trySkipToken(const AsmToken::TokenKind Kind); 1391 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1392 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1393 bool parseId(StringRef &Val, const StringRef ErrMsg); 1394 1395 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1396 AsmToken::TokenKind getTokenKind() const; 1397 bool parseExpr(int64_t &Imm); 1398 bool parseExpr(OperandVector &Operands); 1399 StringRef getTokenStr() const; 1400 AsmToken peekToken(); 1401 AsmToken getToken() const; 1402 SMLoc getLoc() const; 1403 void lex(); 1404 1405 public: 1406 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1407 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1408 1409 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1410 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1411 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1412 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1413 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1414 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1415 1416 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1417 const unsigned MinVal, 1418 const unsigned MaxVal, 1419 const StringRef ErrMsg); 1420 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1421 bool parseSwizzleOffset(int64_t &Imm); 1422 bool parseSwizzleMacro(int64_t &Imm); 1423 bool parseSwizzleQuadPerm(int64_t &Imm); 1424 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1425 bool parseSwizzleBroadcast(int64_t &Imm); 1426 bool parseSwizzleSwap(int64_t &Imm); 1427 bool parseSwizzleReverse(int64_t &Imm); 1428 1429 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1430 int64_t parseGPRIdxMacro(); 1431 1432 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1433 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1434 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1435 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1436 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1437 1438 AMDGPUOperand::Ptr defaultDLC() const; 1439 AMDGPUOperand::Ptr defaultGLC() const; 1440 AMDGPUOperand::Ptr defaultSLC() const; 1441 1442 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1443 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1444 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1445 AMDGPUOperand::Ptr defaultFlatOffset() const; 1446 1447 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1448 1449 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1450 OptionalImmIndexMap &OptionalIdx); 1451 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1452 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1453 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1454 1455 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1456 1457 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1458 bool IsAtomic = false); 1459 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1460 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1461 1462 OperandMatchResultTy parseDim(OperandVector &Operands); 1463 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1464 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1465 AMDGPUOperand::Ptr defaultRowMask() const; 1466 AMDGPUOperand::Ptr defaultBankMask() const; 1467 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1468 AMDGPUOperand::Ptr defaultFI() const; 1469 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1470 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1471 1472 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1473 AMDGPUOperand::ImmTy Type); 1474 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1475 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1476 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1477 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1478 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1479 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1480 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1481 uint64_t BasicInstType, 1482 bool SkipDstVcc = false, 1483 bool SkipSrcVcc = false); 1484 1485 AMDGPUOperand::Ptr defaultBLGP() const; 1486 AMDGPUOperand::Ptr defaultCBSZ() const; 1487 AMDGPUOperand::Ptr defaultABID() const; 1488 1489 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1490 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1491 }; 1492 1493 struct OptionalOperand { 1494 const char *Name; 1495 AMDGPUOperand::ImmTy Type; 1496 bool IsBit; 1497 bool (*ConvertResult)(int64_t&); 1498 }; 1499 1500 } // end anonymous namespace 1501 1502 // May be called with integer type with equivalent bitwidth. 1503 static const fltSemantics *getFltSemantics(unsigned Size) { 1504 switch (Size) { 1505 case 4: 1506 return &APFloat::IEEEsingle(); 1507 case 8: 1508 return &APFloat::IEEEdouble(); 1509 case 2: 1510 return &APFloat::IEEEhalf(); 1511 default: 1512 llvm_unreachable("unsupported fp type"); 1513 } 1514 } 1515 1516 static const fltSemantics *getFltSemantics(MVT VT) { 1517 return getFltSemantics(VT.getSizeInBits() / 8); 1518 } 1519 1520 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1521 switch (OperandType) { 1522 case AMDGPU::OPERAND_REG_IMM_INT32: 1523 case AMDGPU::OPERAND_REG_IMM_FP32: 1524 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1525 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1528 return &APFloat::IEEEsingle(); 1529 case AMDGPU::OPERAND_REG_IMM_INT64: 1530 case AMDGPU::OPERAND_REG_IMM_FP64: 1531 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1532 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1533 return &APFloat::IEEEdouble(); 1534 case AMDGPU::OPERAND_REG_IMM_INT16: 1535 case AMDGPU::OPERAND_REG_IMM_FP16: 1536 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1537 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1538 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1539 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1540 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1541 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1542 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1543 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1544 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1545 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1546 return &APFloat::IEEEhalf(); 1547 default: 1548 llvm_unreachable("unsupported fp type"); 1549 } 1550 } 1551 1552 //===----------------------------------------------------------------------===// 1553 // Operand 1554 //===----------------------------------------------------------------------===// 1555 1556 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1557 bool Lost; 1558 1559 // Convert literal to single precision 1560 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1561 APFloat::rmNearestTiesToEven, 1562 &Lost); 1563 // We allow precision lost but not overflow or underflow 1564 if (Status != APFloat::opOK && 1565 Lost && 1566 ((Status & APFloat::opOverflow) != 0 || 1567 (Status & APFloat::opUnderflow) != 0)) { 1568 return false; 1569 } 1570 1571 return true; 1572 } 1573 1574 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1575 return isUIntN(Size, Val) || isIntN(Size, Val); 1576 } 1577 1578 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1579 if (VT.getScalarType() == MVT::i16) { 1580 // FP immediate values are broken. 1581 return isInlinableIntLiteral(Val); 1582 } 1583 1584 // f16/v2f16 operands work correctly for all values. 1585 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1586 } 1587 1588 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1589 1590 // This is a hack to enable named inline values like 1591 // shared_base with both 32-bit and 64-bit operands. 1592 // Note that these values are defined as 1593 // 32-bit operands only. 1594 if (isInlineValue()) { 1595 return true; 1596 } 1597 1598 if (!isImmTy(ImmTyNone)) { 1599 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1600 return false; 1601 } 1602 // TODO: We should avoid using host float here. It would be better to 1603 // check the float bit values which is what a few other places do. 1604 // We've had bot failures before due to weird NaN support on mips hosts. 1605 1606 APInt Literal(64, Imm.Val); 1607 1608 if (Imm.IsFPImm) { // We got fp literal token 1609 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1610 return AMDGPU::isInlinableLiteral64(Imm.Val, 1611 AsmParser->hasInv2PiInlineImm()); 1612 } 1613 1614 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1615 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1616 return false; 1617 1618 if (type.getScalarSizeInBits() == 16) { 1619 return isInlineableLiteralOp16( 1620 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1621 type, AsmParser->hasInv2PiInlineImm()); 1622 } 1623 1624 // Check if single precision literal is inlinable 1625 return AMDGPU::isInlinableLiteral32( 1626 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1627 AsmParser->hasInv2PiInlineImm()); 1628 } 1629 1630 // We got int literal token. 1631 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1632 return AMDGPU::isInlinableLiteral64(Imm.Val, 1633 AsmParser->hasInv2PiInlineImm()); 1634 } 1635 1636 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1637 return false; 1638 } 1639 1640 if (type.getScalarSizeInBits() == 16) { 1641 return isInlineableLiteralOp16( 1642 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1643 type, AsmParser->hasInv2PiInlineImm()); 1644 } 1645 1646 return AMDGPU::isInlinableLiteral32( 1647 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1648 AsmParser->hasInv2PiInlineImm()); 1649 } 1650 1651 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1652 // Check that this immediate can be added as literal 1653 if (!isImmTy(ImmTyNone)) { 1654 return false; 1655 } 1656 1657 if (!Imm.IsFPImm) { 1658 // We got int literal token. 1659 1660 if (type == MVT::f64 && hasFPModifiers()) { 1661 // Cannot apply fp modifiers to int literals preserving the same semantics 1662 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1663 // disable these cases. 1664 return false; 1665 } 1666 1667 unsigned Size = type.getSizeInBits(); 1668 if (Size == 64) 1669 Size = 32; 1670 1671 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1672 // types. 1673 return isSafeTruncation(Imm.Val, Size); 1674 } 1675 1676 // We got fp literal token 1677 if (type == MVT::f64) { // Expected 64-bit fp operand 1678 // We would set low 64-bits of literal to zeroes but we accept this literals 1679 return true; 1680 } 1681 1682 if (type == MVT::i64) { // Expected 64-bit int operand 1683 // We don't allow fp literals in 64-bit integer instructions. It is 1684 // unclear how we should encode them. 1685 return false; 1686 } 1687 1688 // We allow fp literals with f16x2 operands assuming that the specified 1689 // literal goes into the lower half and the upper half is zero. We also 1690 // require that the literal may be losslesly converted to f16. 1691 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1692 (type == MVT::v2i16)? MVT::i16 : type; 1693 1694 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1695 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1696 } 1697 1698 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1699 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1700 } 1701 1702 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1703 if (AsmParser->isVI()) 1704 return isVReg32(); 1705 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1706 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1707 else 1708 return false; 1709 } 1710 1711 bool AMDGPUOperand::isSDWAFP16Operand() const { 1712 return isSDWAOperand(MVT::f16); 1713 } 1714 1715 bool AMDGPUOperand::isSDWAFP32Operand() const { 1716 return isSDWAOperand(MVT::f32); 1717 } 1718 1719 bool AMDGPUOperand::isSDWAInt16Operand() const { 1720 return isSDWAOperand(MVT::i16); 1721 } 1722 1723 bool AMDGPUOperand::isSDWAInt32Operand() const { 1724 return isSDWAOperand(MVT::i32); 1725 } 1726 1727 bool AMDGPUOperand::isBoolReg() const { 1728 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1729 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1730 } 1731 1732 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1733 { 1734 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1735 assert(Size == 2 || Size == 4 || Size == 8); 1736 1737 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1738 1739 if (Imm.Mods.Abs) { 1740 Val &= ~FpSignMask; 1741 } 1742 if (Imm.Mods.Neg) { 1743 Val ^= FpSignMask; 1744 } 1745 1746 return Val; 1747 } 1748 1749 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1750 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1751 Inst.getNumOperands())) { 1752 addLiteralImmOperand(Inst, Imm.Val, 1753 ApplyModifiers & 1754 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1755 } else { 1756 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1757 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1758 } 1759 } 1760 1761 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1762 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1763 auto OpNum = Inst.getNumOperands(); 1764 // Check that this operand accepts literals 1765 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1766 1767 if (ApplyModifiers) { 1768 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1769 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1770 Val = applyInputFPModifiers(Val, Size); 1771 } 1772 1773 APInt Literal(64, Val); 1774 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1775 1776 if (Imm.IsFPImm) { // We got fp literal token 1777 switch (OpTy) { 1778 case AMDGPU::OPERAND_REG_IMM_INT64: 1779 case AMDGPU::OPERAND_REG_IMM_FP64: 1780 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1781 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1782 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1783 AsmParser->hasInv2PiInlineImm())) { 1784 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1785 return; 1786 } 1787 1788 // Non-inlineable 1789 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1790 // For fp operands we check if low 32 bits are zeros 1791 if (Literal.getLoBits(32) != 0) { 1792 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1793 "Can't encode literal as exact 64-bit floating-point operand. " 1794 "Low 32-bits will be set to zero"); 1795 } 1796 1797 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1798 return; 1799 } 1800 1801 // We don't allow fp literals in 64-bit integer instructions. It is 1802 // unclear how we should encode them. This case should be checked earlier 1803 // in predicate methods (isLiteralImm()) 1804 llvm_unreachable("fp literal in 64-bit integer instruction."); 1805 1806 case AMDGPU::OPERAND_REG_IMM_INT32: 1807 case AMDGPU::OPERAND_REG_IMM_FP32: 1808 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1809 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1812 case AMDGPU::OPERAND_REG_IMM_INT16: 1813 case AMDGPU::OPERAND_REG_IMM_FP16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1817 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1822 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1823 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1824 bool lost; 1825 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1826 // Convert literal to single precision 1827 FPLiteral.convert(*getOpFltSemantics(OpTy), 1828 APFloat::rmNearestTiesToEven, &lost); 1829 // We allow precision lost but not overflow or underflow. This should be 1830 // checked earlier in isLiteralImm() 1831 1832 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1833 Inst.addOperand(MCOperand::createImm(ImmVal)); 1834 return; 1835 } 1836 default: 1837 llvm_unreachable("invalid operand size"); 1838 } 1839 1840 return; 1841 } 1842 1843 // We got int literal token. 1844 // Only sign extend inline immediates. 1845 switch (OpTy) { 1846 case AMDGPU::OPERAND_REG_IMM_INT32: 1847 case AMDGPU::OPERAND_REG_IMM_FP32: 1848 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1853 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1854 if (isSafeTruncation(Val, 32) && 1855 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1856 AsmParser->hasInv2PiInlineImm())) { 1857 Inst.addOperand(MCOperand::createImm(Val)); 1858 return; 1859 } 1860 1861 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1862 return; 1863 1864 case AMDGPU::OPERAND_REG_IMM_INT64: 1865 case AMDGPU::OPERAND_REG_IMM_FP64: 1866 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1867 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1868 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1869 Inst.addOperand(MCOperand::createImm(Val)); 1870 return; 1871 } 1872 1873 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1874 return; 1875 1876 case AMDGPU::OPERAND_REG_IMM_INT16: 1877 case AMDGPU::OPERAND_REG_IMM_FP16: 1878 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1879 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1882 if (isSafeTruncation(Val, 16) && 1883 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1884 AsmParser->hasInv2PiInlineImm())) { 1885 Inst.addOperand(MCOperand::createImm(Val)); 1886 return; 1887 } 1888 1889 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1890 return; 1891 1892 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1893 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1894 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1895 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1896 assert(isSafeTruncation(Val, 16)); 1897 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1898 AsmParser->hasInv2PiInlineImm())); 1899 1900 Inst.addOperand(MCOperand::createImm(Val)); 1901 return; 1902 } 1903 default: 1904 llvm_unreachable("invalid operand size"); 1905 } 1906 } 1907 1908 template <unsigned Bitwidth> 1909 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1910 APInt Literal(64, Imm.Val); 1911 1912 if (!Imm.IsFPImm) { 1913 // We got int literal token. 1914 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1915 return; 1916 } 1917 1918 bool Lost; 1919 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1920 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1921 APFloat::rmNearestTiesToEven, &Lost); 1922 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1923 } 1924 1925 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1926 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1927 } 1928 1929 static bool isInlineValue(unsigned Reg) { 1930 switch (Reg) { 1931 case AMDGPU::SRC_SHARED_BASE: 1932 case AMDGPU::SRC_SHARED_LIMIT: 1933 case AMDGPU::SRC_PRIVATE_BASE: 1934 case AMDGPU::SRC_PRIVATE_LIMIT: 1935 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1936 return true; 1937 case AMDGPU::SRC_VCCZ: 1938 case AMDGPU::SRC_EXECZ: 1939 case AMDGPU::SRC_SCC: 1940 return true; 1941 case AMDGPU::SGPR_NULL: 1942 return true; 1943 default: 1944 return false; 1945 } 1946 } 1947 1948 bool AMDGPUOperand::isInlineValue() const { 1949 return isRegKind() && ::isInlineValue(getReg()); 1950 } 1951 1952 //===----------------------------------------------------------------------===// 1953 // AsmParser 1954 //===----------------------------------------------------------------------===// 1955 1956 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1957 if (Is == IS_VGPR) { 1958 switch (RegWidth) { 1959 default: return -1; 1960 case 1: return AMDGPU::VGPR_32RegClassID; 1961 case 2: return AMDGPU::VReg_64RegClassID; 1962 case 3: return AMDGPU::VReg_96RegClassID; 1963 case 4: return AMDGPU::VReg_128RegClassID; 1964 case 5: return AMDGPU::VReg_160RegClassID; 1965 case 6: return AMDGPU::VReg_192RegClassID; 1966 case 8: return AMDGPU::VReg_256RegClassID; 1967 case 16: return AMDGPU::VReg_512RegClassID; 1968 case 32: return AMDGPU::VReg_1024RegClassID; 1969 } 1970 } else if (Is == IS_TTMP) { 1971 switch (RegWidth) { 1972 default: return -1; 1973 case 1: return AMDGPU::TTMP_32RegClassID; 1974 case 2: return AMDGPU::TTMP_64RegClassID; 1975 case 4: return AMDGPU::TTMP_128RegClassID; 1976 case 8: return AMDGPU::TTMP_256RegClassID; 1977 case 16: return AMDGPU::TTMP_512RegClassID; 1978 } 1979 } else if (Is == IS_SGPR) { 1980 switch (RegWidth) { 1981 default: return -1; 1982 case 1: return AMDGPU::SGPR_32RegClassID; 1983 case 2: return AMDGPU::SGPR_64RegClassID; 1984 case 3: return AMDGPU::SGPR_96RegClassID; 1985 case 4: return AMDGPU::SGPR_128RegClassID; 1986 case 5: return AMDGPU::SGPR_160RegClassID; 1987 case 6: return AMDGPU::SGPR_192RegClassID; 1988 case 8: return AMDGPU::SGPR_256RegClassID; 1989 case 16: return AMDGPU::SGPR_512RegClassID; 1990 } 1991 } else if (Is == IS_AGPR) { 1992 switch (RegWidth) { 1993 default: return -1; 1994 case 1: return AMDGPU::AGPR_32RegClassID; 1995 case 2: return AMDGPU::AReg_64RegClassID; 1996 case 3: return AMDGPU::AReg_96RegClassID; 1997 case 4: return AMDGPU::AReg_128RegClassID; 1998 case 5: return AMDGPU::AReg_160RegClassID; 1999 case 6: return AMDGPU::AReg_192RegClassID; 2000 case 8: return AMDGPU::AReg_256RegClassID; 2001 case 16: return AMDGPU::AReg_512RegClassID; 2002 case 32: return AMDGPU::AReg_1024RegClassID; 2003 } 2004 } 2005 return -1; 2006 } 2007 2008 static unsigned getSpecialRegForName(StringRef RegName) { 2009 return StringSwitch<unsigned>(RegName) 2010 .Case("exec", AMDGPU::EXEC) 2011 .Case("vcc", AMDGPU::VCC) 2012 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2013 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2014 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2015 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2016 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2017 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2018 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2019 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2020 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2021 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2022 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2023 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2024 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2025 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2026 .Case("m0", AMDGPU::M0) 2027 .Case("vccz", AMDGPU::SRC_VCCZ) 2028 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2029 .Case("execz", AMDGPU::SRC_EXECZ) 2030 .Case("src_execz", AMDGPU::SRC_EXECZ) 2031 .Case("scc", AMDGPU::SRC_SCC) 2032 .Case("src_scc", AMDGPU::SRC_SCC) 2033 .Case("tba", AMDGPU::TBA) 2034 .Case("tma", AMDGPU::TMA) 2035 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2036 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2037 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2038 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2039 .Case("vcc_lo", AMDGPU::VCC_LO) 2040 .Case("vcc_hi", AMDGPU::VCC_HI) 2041 .Case("exec_lo", AMDGPU::EXEC_LO) 2042 .Case("exec_hi", AMDGPU::EXEC_HI) 2043 .Case("tma_lo", AMDGPU::TMA_LO) 2044 .Case("tma_hi", AMDGPU::TMA_HI) 2045 .Case("tba_lo", AMDGPU::TBA_LO) 2046 .Case("tba_hi", AMDGPU::TBA_HI) 2047 .Case("pc", AMDGPU::PC_REG) 2048 .Case("null", AMDGPU::SGPR_NULL) 2049 .Default(AMDGPU::NoRegister); 2050 } 2051 2052 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2053 SMLoc &EndLoc, bool RestoreOnFailure) { 2054 auto R = parseRegister(); 2055 if (!R) return true; 2056 assert(R->isReg()); 2057 RegNo = R->getReg(); 2058 StartLoc = R->getStartLoc(); 2059 EndLoc = R->getEndLoc(); 2060 return false; 2061 } 2062 2063 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2064 SMLoc &EndLoc) { 2065 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2066 } 2067 2068 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2069 SMLoc &StartLoc, 2070 SMLoc &EndLoc) { 2071 bool Result = 2072 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2073 bool PendingErrors = getParser().hasPendingError(); 2074 getParser().clearPendingErrors(); 2075 if (PendingErrors) 2076 return MatchOperand_ParseFail; 2077 if (Result) 2078 return MatchOperand_NoMatch; 2079 return MatchOperand_Success; 2080 } 2081 2082 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2083 RegisterKind RegKind, unsigned Reg1, 2084 SMLoc Loc) { 2085 switch (RegKind) { 2086 case IS_SPECIAL: 2087 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2088 Reg = AMDGPU::EXEC; 2089 RegWidth = 2; 2090 return true; 2091 } 2092 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2093 Reg = AMDGPU::FLAT_SCR; 2094 RegWidth = 2; 2095 return true; 2096 } 2097 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2098 Reg = AMDGPU::XNACK_MASK; 2099 RegWidth = 2; 2100 return true; 2101 } 2102 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2103 Reg = AMDGPU::VCC; 2104 RegWidth = 2; 2105 return true; 2106 } 2107 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2108 Reg = AMDGPU::TBA; 2109 RegWidth = 2; 2110 return true; 2111 } 2112 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2113 Reg = AMDGPU::TMA; 2114 RegWidth = 2; 2115 return true; 2116 } 2117 Error(Loc, "register does not fit in the list"); 2118 return false; 2119 case IS_VGPR: 2120 case IS_SGPR: 2121 case IS_AGPR: 2122 case IS_TTMP: 2123 if (Reg1 != Reg + RegWidth) { 2124 Error(Loc, "registers in a list must have consecutive indices"); 2125 return false; 2126 } 2127 RegWidth++; 2128 return true; 2129 default: 2130 llvm_unreachable("unexpected register kind"); 2131 } 2132 } 2133 2134 struct RegInfo { 2135 StringLiteral Name; 2136 RegisterKind Kind; 2137 }; 2138 2139 static constexpr RegInfo RegularRegisters[] = { 2140 {{"v"}, IS_VGPR}, 2141 {{"s"}, IS_SGPR}, 2142 {{"ttmp"}, IS_TTMP}, 2143 {{"acc"}, IS_AGPR}, 2144 {{"a"}, IS_AGPR}, 2145 }; 2146 2147 static bool isRegularReg(RegisterKind Kind) { 2148 return Kind == IS_VGPR || 2149 Kind == IS_SGPR || 2150 Kind == IS_TTMP || 2151 Kind == IS_AGPR; 2152 } 2153 2154 static const RegInfo* getRegularRegInfo(StringRef Str) { 2155 for (const RegInfo &Reg : RegularRegisters) 2156 if (Str.startswith(Reg.Name)) 2157 return &Reg; 2158 return nullptr; 2159 } 2160 2161 static bool getRegNum(StringRef Str, unsigned& Num) { 2162 return !Str.getAsInteger(10, Num); 2163 } 2164 2165 bool 2166 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2167 const AsmToken &NextToken) const { 2168 2169 // A list of consecutive registers: [s0,s1,s2,s3] 2170 if (Token.is(AsmToken::LBrac)) 2171 return true; 2172 2173 if (!Token.is(AsmToken::Identifier)) 2174 return false; 2175 2176 // A single register like s0 or a range of registers like s[0:1] 2177 2178 StringRef Str = Token.getString(); 2179 const RegInfo *Reg = getRegularRegInfo(Str); 2180 if (Reg) { 2181 StringRef RegName = Reg->Name; 2182 StringRef RegSuffix = Str.substr(RegName.size()); 2183 if (!RegSuffix.empty()) { 2184 unsigned Num; 2185 // A single register with an index: rXX 2186 if (getRegNum(RegSuffix, Num)) 2187 return true; 2188 } else { 2189 // A range of registers: r[XX:YY]. 2190 if (NextToken.is(AsmToken::LBrac)) 2191 return true; 2192 } 2193 } 2194 2195 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2196 } 2197 2198 bool 2199 AMDGPUAsmParser::isRegister() 2200 { 2201 return isRegister(getToken(), peekToken()); 2202 } 2203 2204 unsigned 2205 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2206 unsigned RegNum, 2207 unsigned RegWidth, 2208 SMLoc Loc) { 2209 2210 assert(isRegularReg(RegKind)); 2211 2212 unsigned AlignSize = 1; 2213 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2214 // SGPR and TTMP registers must be aligned. 2215 // Max required alignment is 4 dwords. 2216 AlignSize = std::min(RegWidth, 4u); 2217 } 2218 2219 if (RegNum % AlignSize != 0) { 2220 Error(Loc, "invalid register alignment"); 2221 return AMDGPU::NoRegister; 2222 } 2223 2224 unsigned RegIdx = RegNum / AlignSize; 2225 int RCID = getRegClass(RegKind, RegWidth); 2226 if (RCID == -1) { 2227 Error(Loc, "invalid or unsupported register size"); 2228 return AMDGPU::NoRegister; 2229 } 2230 2231 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2232 const MCRegisterClass RC = TRI->getRegClass(RCID); 2233 if (RegIdx >= RC.getNumRegs()) { 2234 Error(Loc, "register index is out of range"); 2235 return AMDGPU::NoRegister; 2236 } 2237 2238 return RC.getRegister(RegIdx); 2239 } 2240 2241 bool 2242 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2243 int64_t RegLo, RegHi; 2244 if (!skipToken(AsmToken::LBrac, "missing register index")) 2245 return false; 2246 2247 SMLoc FirstIdxLoc = getLoc(); 2248 SMLoc SecondIdxLoc; 2249 2250 if (!parseExpr(RegLo)) 2251 return false; 2252 2253 if (trySkipToken(AsmToken::Colon)) { 2254 SecondIdxLoc = getLoc(); 2255 if (!parseExpr(RegHi)) 2256 return false; 2257 } else { 2258 RegHi = RegLo; 2259 } 2260 2261 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2262 return false; 2263 2264 if (!isUInt<32>(RegLo)) { 2265 Error(FirstIdxLoc, "invalid register index"); 2266 return false; 2267 } 2268 2269 if (!isUInt<32>(RegHi)) { 2270 Error(SecondIdxLoc, "invalid register index"); 2271 return false; 2272 } 2273 2274 if (RegLo > RegHi) { 2275 Error(FirstIdxLoc, "first register index should not exceed second index"); 2276 return false; 2277 } 2278 2279 Num = static_cast<unsigned>(RegLo); 2280 Width = (RegHi - RegLo) + 1; 2281 return true; 2282 } 2283 2284 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2285 unsigned &RegNum, unsigned &RegWidth, 2286 SmallVectorImpl<AsmToken> &Tokens) { 2287 assert(isToken(AsmToken::Identifier)); 2288 unsigned Reg = getSpecialRegForName(getTokenStr()); 2289 if (Reg) { 2290 RegNum = 0; 2291 RegWidth = 1; 2292 RegKind = IS_SPECIAL; 2293 Tokens.push_back(getToken()); 2294 lex(); // skip register name 2295 } 2296 return Reg; 2297 } 2298 2299 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2300 unsigned &RegNum, unsigned &RegWidth, 2301 SmallVectorImpl<AsmToken> &Tokens) { 2302 assert(isToken(AsmToken::Identifier)); 2303 StringRef RegName = getTokenStr(); 2304 auto Loc = getLoc(); 2305 2306 const RegInfo *RI = getRegularRegInfo(RegName); 2307 if (!RI) { 2308 Error(Loc, "invalid register name"); 2309 return AMDGPU::NoRegister; 2310 } 2311 2312 Tokens.push_back(getToken()); 2313 lex(); // skip register name 2314 2315 RegKind = RI->Kind; 2316 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2317 if (!RegSuffix.empty()) { 2318 // Single 32-bit register: vXX. 2319 if (!getRegNum(RegSuffix, RegNum)) { 2320 Error(Loc, "invalid register index"); 2321 return AMDGPU::NoRegister; 2322 } 2323 RegWidth = 1; 2324 } else { 2325 // Range of registers: v[XX:YY]. ":YY" is optional. 2326 if (!ParseRegRange(RegNum, RegWidth)) 2327 return AMDGPU::NoRegister; 2328 } 2329 2330 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2331 } 2332 2333 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2334 unsigned &RegWidth, 2335 SmallVectorImpl<AsmToken> &Tokens) { 2336 unsigned Reg = AMDGPU::NoRegister; 2337 auto ListLoc = getLoc(); 2338 2339 if (!skipToken(AsmToken::LBrac, 2340 "expected a register or a list of registers")) { 2341 return AMDGPU::NoRegister; 2342 } 2343 2344 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2345 2346 auto Loc = getLoc(); 2347 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2348 return AMDGPU::NoRegister; 2349 if (RegWidth != 1) { 2350 Error(Loc, "expected a single 32-bit register"); 2351 return AMDGPU::NoRegister; 2352 } 2353 2354 for (; trySkipToken(AsmToken::Comma); ) { 2355 RegisterKind NextRegKind; 2356 unsigned NextReg, NextRegNum, NextRegWidth; 2357 Loc = getLoc(); 2358 2359 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2360 NextRegNum, NextRegWidth, 2361 Tokens)) { 2362 return AMDGPU::NoRegister; 2363 } 2364 if (NextRegWidth != 1) { 2365 Error(Loc, "expected a single 32-bit register"); 2366 return AMDGPU::NoRegister; 2367 } 2368 if (NextRegKind != RegKind) { 2369 Error(Loc, "registers in a list must be of the same kind"); 2370 return AMDGPU::NoRegister; 2371 } 2372 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2373 return AMDGPU::NoRegister; 2374 } 2375 2376 if (!skipToken(AsmToken::RBrac, 2377 "expected a comma or a closing square bracket")) { 2378 return AMDGPU::NoRegister; 2379 } 2380 2381 if (isRegularReg(RegKind)) 2382 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2383 2384 return Reg; 2385 } 2386 2387 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2388 unsigned &RegNum, unsigned &RegWidth, 2389 SmallVectorImpl<AsmToken> &Tokens) { 2390 auto Loc = getLoc(); 2391 Reg = AMDGPU::NoRegister; 2392 2393 if (isToken(AsmToken::Identifier)) { 2394 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2395 if (Reg == AMDGPU::NoRegister) 2396 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2397 } else { 2398 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2399 } 2400 2401 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2402 if (Reg == AMDGPU::NoRegister) { 2403 assert(Parser.hasPendingError()); 2404 return false; 2405 } 2406 2407 if (!subtargetHasRegister(*TRI, Reg)) { 2408 if (Reg == AMDGPU::SGPR_NULL) { 2409 Error(Loc, "'null' operand is not supported on this GPU"); 2410 } else { 2411 Error(Loc, "register not available on this GPU"); 2412 } 2413 return false; 2414 } 2415 2416 return true; 2417 } 2418 2419 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2420 unsigned &RegNum, unsigned &RegWidth, 2421 bool RestoreOnFailure /*=false*/) { 2422 Reg = AMDGPU::NoRegister; 2423 2424 SmallVector<AsmToken, 1> Tokens; 2425 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2426 if (RestoreOnFailure) { 2427 while (!Tokens.empty()) { 2428 getLexer().UnLex(Tokens.pop_back_val()); 2429 } 2430 } 2431 return true; 2432 } 2433 return false; 2434 } 2435 2436 Optional<StringRef> 2437 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2438 switch (RegKind) { 2439 case IS_VGPR: 2440 return StringRef(".amdgcn.next_free_vgpr"); 2441 case IS_SGPR: 2442 return StringRef(".amdgcn.next_free_sgpr"); 2443 default: 2444 return None; 2445 } 2446 } 2447 2448 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2449 auto SymbolName = getGprCountSymbolName(RegKind); 2450 assert(SymbolName && "initializing invalid register kind"); 2451 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2452 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2453 } 2454 2455 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2456 unsigned DwordRegIndex, 2457 unsigned RegWidth) { 2458 // Symbols are only defined for GCN targets 2459 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2460 return true; 2461 2462 auto SymbolName = getGprCountSymbolName(RegKind); 2463 if (!SymbolName) 2464 return true; 2465 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2466 2467 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2468 int64_t OldCount; 2469 2470 if (!Sym->isVariable()) 2471 return !Error(getParser().getTok().getLoc(), 2472 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2473 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2474 return !Error( 2475 getParser().getTok().getLoc(), 2476 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2477 2478 if (OldCount <= NewMax) 2479 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2480 2481 return true; 2482 } 2483 2484 std::unique_ptr<AMDGPUOperand> 2485 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2486 const auto &Tok = Parser.getTok(); 2487 SMLoc StartLoc = Tok.getLoc(); 2488 SMLoc EndLoc = Tok.getEndLoc(); 2489 RegisterKind RegKind; 2490 unsigned Reg, RegNum, RegWidth; 2491 2492 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2493 return nullptr; 2494 } 2495 if (isHsaAbiVersion3(&getSTI())) { 2496 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2497 return nullptr; 2498 } else 2499 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2500 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2501 } 2502 2503 OperandMatchResultTy 2504 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2505 // TODO: add syntactic sugar for 1/(2*PI) 2506 2507 assert(!isRegister()); 2508 assert(!isModifier()); 2509 2510 const auto& Tok = getToken(); 2511 const auto& NextTok = peekToken(); 2512 bool IsReal = Tok.is(AsmToken::Real); 2513 SMLoc S = getLoc(); 2514 bool Negate = false; 2515 2516 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2517 lex(); 2518 IsReal = true; 2519 Negate = true; 2520 } 2521 2522 if (IsReal) { 2523 // Floating-point expressions are not supported. 2524 // Can only allow floating-point literals with an 2525 // optional sign. 2526 2527 StringRef Num = getTokenStr(); 2528 lex(); 2529 2530 APFloat RealVal(APFloat::IEEEdouble()); 2531 auto roundMode = APFloat::rmNearestTiesToEven; 2532 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2533 return MatchOperand_ParseFail; 2534 } 2535 if (Negate) 2536 RealVal.changeSign(); 2537 2538 Operands.push_back( 2539 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2540 AMDGPUOperand::ImmTyNone, true)); 2541 2542 return MatchOperand_Success; 2543 2544 } else { 2545 int64_t IntVal; 2546 const MCExpr *Expr; 2547 SMLoc S = getLoc(); 2548 2549 if (HasSP3AbsModifier) { 2550 // This is a workaround for handling expressions 2551 // as arguments of SP3 'abs' modifier, for example: 2552 // |1.0| 2553 // |-1| 2554 // |1+x| 2555 // This syntax is not compatible with syntax of standard 2556 // MC expressions (due to the trailing '|'). 2557 SMLoc EndLoc; 2558 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2559 return MatchOperand_ParseFail; 2560 } else { 2561 if (Parser.parseExpression(Expr)) 2562 return MatchOperand_ParseFail; 2563 } 2564 2565 if (Expr->evaluateAsAbsolute(IntVal)) { 2566 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2567 } else { 2568 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2569 } 2570 2571 return MatchOperand_Success; 2572 } 2573 2574 return MatchOperand_NoMatch; 2575 } 2576 2577 OperandMatchResultTy 2578 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2579 if (!isRegister()) 2580 return MatchOperand_NoMatch; 2581 2582 if (auto R = parseRegister()) { 2583 assert(R->isReg()); 2584 Operands.push_back(std::move(R)); 2585 return MatchOperand_Success; 2586 } 2587 return MatchOperand_ParseFail; 2588 } 2589 2590 OperandMatchResultTy 2591 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2592 auto res = parseReg(Operands); 2593 if (res != MatchOperand_NoMatch) { 2594 return res; 2595 } else if (isModifier()) { 2596 return MatchOperand_NoMatch; 2597 } else { 2598 return parseImm(Operands, HasSP3AbsMod); 2599 } 2600 } 2601 2602 bool 2603 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2604 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2605 const auto &str = Token.getString(); 2606 return str == "abs" || str == "neg" || str == "sext"; 2607 } 2608 return false; 2609 } 2610 2611 bool 2612 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2613 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2614 } 2615 2616 bool 2617 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2618 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2619 } 2620 2621 bool 2622 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2623 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2624 } 2625 2626 // Check if this is an operand modifier or an opcode modifier 2627 // which may look like an expression but it is not. We should 2628 // avoid parsing these modifiers as expressions. Currently 2629 // recognized sequences are: 2630 // |...| 2631 // abs(...) 2632 // neg(...) 2633 // sext(...) 2634 // -reg 2635 // -|...| 2636 // -abs(...) 2637 // name:... 2638 // Note that simple opcode modifiers like 'gds' may be parsed as 2639 // expressions; this is a special case. See getExpressionAsToken. 2640 // 2641 bool 2642 AMDGPUAsmParser::isModifier() { 2643 2644 AsmToken Tok = getToken(); 2645 AsmToken NextToken[2]; 2646 peekTokens(NextToken); 2647 2648 return isOperandModifier(Tok, NextToken[0]) || 2649 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2650 isOpcodeModifierWithVal(Tok, NextToken[0]); 2651 } 2652 2653 // Check if the current token is an SP3 'neg' modifier. 2654 // Currently this modifier is allowed in the following context: 2655 // 2656 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2657 // 2. Before an 'abs' modifier: -abs(...) 2658 // 3. Before an SP3 'abs' modifier: -|...| 2659 // 2660 // In all other cases "-" is handled as a part 2661 // of an expression that follows the sign. 2662 // 2663 // Note: When "-" is followed by an integer literal, 2664 // this is interpreted as integer negation rather 2665 // than a floating-point NEG modifier applied to N. 2666 // Beside being contr-intuitive, such use of floating-point 2667 // NEG modifier would have resulted in different meaning 2668 // of integer literals used with VOP1/2/C and VOP3, 2669 // for example: 2670 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2671 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2672 // Negative fp literals with preceding "-" are 2673 // handled likewise for unifomtity 2674 // 2675 bool 2676 AMDGPUAsmParser::parseSP3NegModifier() { 2677 2678 AsmToken NextToken[2]; 2679 peekTokens(NextToken); 2680 2681 if (isToken(AsmToken::Minus) && 2682 (isRegister(NextToken[0], NextToken[1]) || 2683 NextToken[0].is(AsmToken::Pipe) || 2684 isId(NextToken[0], "abs"))) { 2685 lex(); 2686 return true; 2687 } 2688 2689 return false; 2690 } 2691 2692 OperandMatchResultTy 2693 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2694 bool AllowImm) { 2695 bool Neg, SP3Neg; 2696 bool Abs, SP3Abs; 2697 SMLoc Loc; 2698 2699 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2700 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2701 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2702 return MatchOperand_ParseFail; 2703 } 2704 2705 SP3Neg = parseSP3NegModifier(); 2706 2707 Loc = getLoc(); 2708 Neg = trySkipId("neg"); 2709 if (Neg && SP3Neg) { 2710 Error(Loc, "expected register or immediate"); 2711 return MatchOperand_ParseFail; 2712 } 2713 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2714 return MatchOperand_ParseFail; 2715 2716 Abs = trySkipId("abs"); 2717 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2718 return MatchOperand_ParseFail; 2719 2720 Loc = getLoc(); 2721 SP3Abs = trySkipToken(AsmToken::Pipe); 2722 if (Abs && SP3Abs) { 2723 Error(Loc, "expected register or immediate"); 2724 return MatchOperand_ParseFail; 2725 } 2726 2727 OperandMatchResultTy Res; 2728 if (AllowImm) { 2729 Res = parseRegOrImm(Operands, SP3Abs); 2730 } else { 2731 Res = parseReg(Operands); 2732 } 2733 if (Res != MatchOperand_Success) { 2734 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2735 } 2736 2737 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2738 return MatchOperand_ParseFail; 2739 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2740 return MatchOperand_ParseFail; 2741 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2742 return MatchOperand_ParseFail; 2743 2744 AMDGPUOperand::Modifiers Mods; 2745 Mods.Abs = Abs || SP3Abs; 2746 Mods.Neg = Neg || SP3Neg; 2747 2748 if (Mods.hasFPModifiers()) { 2749 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2750 if (Op.isExpr()) { 2751 Error(Op.getStartLoc(), "expected an absolute expression"); 2752 return MatchOperand_ParseFail; 2753 } 2754 Op.setModifiers(Mods); 2755 } 2756 return MatchOperand_Success; 2757 } 2758 2759 OperandMatchResultTy 2760 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2761 bool AllowImm) { 2762 bool Sext = trySkipId("sext"); 2763 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2764 return MatchOperand_ParseFail; 2765 2766 OperandMatchResultTy Res; 2767 if (AllowImm) { 2768 Res = parseRegOrImm(Operands); 2769 } else { 2770 Res = parseReg(Operands); 2771 } 2772 if (Res != MatchOperand_Success) { 2773 return Sext? MatchOperand_ParseFail : Res; 2774 } 2775 2776 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2777 return MatchOperand_ParseFail; 2778 2779 AMDGPUOperand::Modifiers Mods; 2780 Mods.Sext = Sext; 2781 2782 if (Mods.hasIntModifiers()) { 2783 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2784 if (Op.isExpr()) { 2785 Error(Op.getStartLoc(), "expected an absolute expression"); 2786 return MatchOperand_ParseFail; 2787 } 2788 Op.setModifiers(Mods); 2789 } 2790 2791 return MatchOperand_Success; 2792 } 2793 2794 OperandMatchResultTy 2795 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2796 return parseRegOrImmWithFPInputMods(Operands, false); 2797 } 2798 2799 OperandMatchResultTy 2800 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2801 return parseRegOrImmWithIntInputMods(Operands, false); 2802 } 2803 2804 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2805 auto Loc = getLoc(); 2806 if (trySkipId("off")) { 2807 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2808 AMDGPUOperand::ImmTyOff, false)); 2809 return MatchOperand_Success; 2810 } 2811 2812 if (!isRegister()) 2813 return MatchOperand_NoMatch; 2814 2815 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2816 if (Reg) { 2817 Operands.push_back(std::move(Reg)); 2818 return MatchOperand_Success; 2819 } 2820 2821 return MatchOperand_ParseFail; 2822 2823 } 2824 2825 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2826 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2827 2828 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2829 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2830 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2831 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2832 return Match_InvalidOperand; 2833 2834 if ((TSFlags & SIInstrFlags::VOP3) && 2835 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2836 getForcedEncodingSize() != 64) 2837 return Match_PreferE32; 2838 2839 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2840 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2841 // v_mac_f32/16 allow only dst_sel == DWORD; 2842 auto OpNum = 2843 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2844 const auto &Op = Inst.getOperand(OpNum); 2845 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2846 return Match_InvalidOperand; 2847 } 2848 } 2849 2850 return Match_Success; 2851 } 2852 2853 static ArrayRef<unsigned> getAllVariants() { 2854 static const unsigned Variants[] = { 2855 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2856 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2857 }; 2858 2859 return makeArrayRef(Variants); 2860 } 2861 2862 // What asm variants we should check 2863 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2864 if (getForcedEncodingSize() == 32) { 2865 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2866 return makeArrayRef(Variants); 2867 } 2868 2869 if (isForcedVOP3()) { 2870 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2871 return makeArrayRef(Variants); 2872 } 2873 2874 if (isForcedSDWA()) { 2875 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2876 AMDGPUAsmVariants::SDWA9}; 2877 return makeArrayRef(Variants); 2878 } 2879 2880 if (isForcedDPP()) { 2881 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2882 return makeArrayRef(Variants); 2883 } 2884 2885 return getAllVariants(); 2886 } 2887 2888 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2889 if (getForcedEncodingSize() == 32) 2890 return "e32"; 2891 2892 if (isForcedVOP3()) 2893 return "e64"; 2894 2895 if (isForcedSDWA()) 2896 return "sdwa"; 2897 2898 if (isForcedDPP()) 2899 return "dpp"; 2900 2901 return ""; 2902 } 2903 2904 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2905 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2906 const unsigned Num = Desc.getNumImplicitUses(); 2907 for (unsigned i = 0; i < Num; ++i) { 2908 unsigned Reg = Desc.ImplicitUses[i]; 2909 switch (Reg) { 2910 case AMDGPU::FLAT_SCR: 2911 case AMDGPU::VCC: 2912 case AMDGPU::VCC_LO: 2913 case AMDGPU::VCC_HI: 2914 case AMDGPU::M0: 2915 return Reg; 2916 default: 2917 break; 2918 } 2919 } 2920 return AMDGPU::NoRegister; 2921 } 2922 2923 // NB: This code is correct only when used to check constant 2924 // bus limitations because GFX7 support no f16 inline constants. 2925 // Note that there are no cases when a GFX7 opcode violates 2926 // constant bus limitations due to the use of an f16 constant. 2927 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2928 unsigned OpIdx) const { 2929 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2930 2931 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2932 return false; 2933 } 2934 2935 const MCOperand &MO = Inst.getOperand(OpIdx); 2936 2937 int64_t Val = MO.getImm(); 2938 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2939 2940 switch (OpSize) { // expected operand size 2941 case 8: 2942 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2943 case 4: 2944 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2945 case 2: { 2946 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2947 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2948 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2949 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2950 return AMDGPU::isInlinableIntLiteral(Val); 2951 2952 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2953 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2954 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2955 return AMDGPU::isInlinableIntLiteralV216(Val); 2956 2957 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2958 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2959 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2960 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2961 2962 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2963 } 2964 default: 2965 llvm_unreachable("invalid operand size"); 2966 } 2967 } 2968 2969 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2970 if (!isGFX10()) 2971 return 1; 2972 2973 switch (Opcode) { 2974 // 64-bit shift instructions can use only one scalar value input 2975 case AMDGPU::V_LSHLREV_B64: 2976 case AMDGPU::V_LSHLREV_B64_gfx10: 2977 case AMDGPU::V_LSHL_B64: 2978 case AMDGPU::V_LSHRREV_B64: 2979 case AMDGPU::V_LSHRREV_B64_gfx10: 2980 case AMDGPU::V_LSHR_B64: 2981 case AMDGPU::V_ASHRREV_I64: 2982 case AMDGPU::V_ASHRREV_I64_gfx10: 2983 case AMDGPU::V_ASHR_I64: 2984 return 1; 2985 default: 2986 return 2; 2987 } 2988 } 2989 2990 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2991 const MCOperand &MO = Inst.getOperand(OpIdx); 2992 if (MO.isImm()) { 2993 return !isInlineConstant(Inst, OpIdx); 2994 } else if (MO.isReg()) { 2995 auto Reg = MO.getReg(); 2996 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2997 auto PReg = mc2PseudoReg(Reg); 2998 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 2999 } else { 3000 return true; 3001 } 3002 } 3003 3004 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 3005 const unsigned Opcode = Inst.getOpcode(); 3006 const MCInstrDesc &Desc = MII.get(Opcode); 3007 unsigned ConstantBusUseCount = 0; 3008 unsigned NumLiterals = 0; 3009 unsigned LiteralSize; 3010 3011 if (Desc.TSFlags & 3012 (SIInstrFlags::VOPC | 3013 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3014 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3015 SIInstrFlags::SDWA)) { 3016 // Check special imm operands (used by madmk, etc) 3017 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3018 ++ConstantBusUseCount; 3019 } 3020 3021 SmallDenseSet<unsigned> SGPRsUsed; 3022 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3023 if (SGPRUsed != AMDGPU::NoRegister) { 3024 SGPRsUsed.insert(SGPRUsed); 3025 ++ConstantBusUseCount; 3026 } 3027 3028 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3029 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3030 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3031 3032 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3033 3034 for (int OpIdx : OpIndices) { 3035 if (OpIdx == -1) break; 3036 3037 const MCOperand &MO = Inst.getOperand(OpIdx); 3038 if (usesConstantBus(Inst, OpIdx)) { 3039 if (MO.isReg()) { 3040 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3041 // Pairs of registers with a partial intersections like these 3042 // s0, s[0:1] 3043 // flat_scratch_lo, flat_scratch 3044 // flat_scratch_lo, flat_scratch_hi 3045 // are theoretically valid but they are disabled anyway. 3046 // Note that this code mimics SIInstrInfo::verifyInstruction 3047 if (!SGPRsUsed.count(Reg)) { 3048 SGPRsUsed.insert(Reg); 3049 ++ConstantBusUseCount; 3050 } 3051 } else { // Expression or a literal 3052 3053 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3054 continue; // special operand like VINTERP attr_chan 3055 3056 // An instruction may use only one literal. 3057 // This has been validated on the previous step. 3058 // See validateVOP3Literal. 3059 // This literal may be used as more than one operand. 3060 // If all these operands are of the same size, 3061 // this literal counts as one scalar value. 3062 // Otherwise it counts as 2 scalar values. 3063 // See "GFX10 Shader Programming", section 3.6.2.3. 3064 3065 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3066 if (Size < 4) Size = 4; 3067 3068 if (NumLiterals == 0) { 3069 NumLiterals = 1; 3070 LiteralSize = Size; 3071 } else if (LiteralSize != Size) { 3072 NumLiterals = 2; 3073 } 3074 } 3075 } 3076 } 3077 } 3078 ConstantBusUseCount += NumLiterals; 3079 3080 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3081 } 3082 3083 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3084 const unsigned Opcode = Inst.getOpcode(); 3085 const MCInstrDesc &Desc = MII.get(Opcode); 3086 3087 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3088 if (DstIdx == -1 || 3089 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3090 return true; 3091 } 3092 3093 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3094 3095 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3096 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3097 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3098 3099 assert(DstIdx != -1); 3100 const MCOperand &Dst = Inst.getOperand(DstIdx); 3101 assert(Dst.isReg()); 3102 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3103 3104 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3105 3106 for (int SrcIdx : SrcIndices) { 3107 if (SrcIdx == -1) break; 3108 const MCOperand &Src = Inst.getOperand(SrcIdx); 3109 if (Src.isReg()) { 3110 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3111 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3112 return false; 3113 } 3114 } 3115 } 3116 3117 return true; 3118 } 3119 3120 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3121 3122 const unsigned Opc = Inst.getOpcode(); 3123 const MCInstrDesc &Desc = MII.get(Opc); 3124 3125 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3126 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3127 assert(ClampIdx != -1); 3128 return Inst.getOperand(ClampIdx).getImm() == 0; 3129 } 3130 3131 return true; 3132 } 3133 3134 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3135 3136 const unsigned Opc = Inst.getOpcode(); 3137 const MCInstrDesc &Desc = MII.get(Opc); 3138 3139 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3140 return true; 3141 3142 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3143 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3144 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3145 3146 assert(VDataIdx != -1); 3147 3148 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3149 return true; 3150 3151 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3152 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3153 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3154 if (DMask == 0) 3155 DMask = 1; 3156 3157 unsigned DataSize = 3158 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3159 if (hasPackedD16()) { 3160 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3161 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3162 DataSize = (DataSize + 1) / 2; 3163 } 3164 3165 return (VDataSize / 4) == DataSize + TFESize; 3166 } 3167 3168 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3169 const unsigned Opc = Inst.getOpcode(); 3170 const MCInstrDesc &Desc = MII.get(Opc); 3171 3172 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3173 return true; 3174 3175 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3176 3177 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3178 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3179 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3180 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3181 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3182 3183 assert(VAddr0Idx != -1); 3184 assert(SrsrcIdx != -1); 3185 assert(SrsrcIdx > VAddr0Idx); 3186 3187 if (DimIdx == -1) 3188 return true; // intersect_ray 3189 3190 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3191 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3192 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3193 unsigned VAddrSize = 3194 IsNSA ? SrsrcIdx - VAddr0Idx 3195 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3196 3197 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3198 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3199 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3200 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3201 if (!IsNSA) { 3202 if (AddrSize > 8) 3203 AddrSize = 16; 3204 else if (AddrSize > 4) 3205 AddrSize = 8; 3206 } 3207 3208 return VAddrSize == AddrSize; 3209 } 3210 3211 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3212 3213 const unsigned Opc = Inst.getOpcode(); 3214 const MCInstrDesc &Desc = MII.get(Opc); 3215 3216 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3217 return true; 3218 if (!Desc.mayLoad() || !Desc.mayStore()) 3219 return true; // Not atomic 3220 3221 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3222 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3223 3224 // This is an incomplete check because image_atomic_cmpswap 3225 // may only use 0x3 and 0xf while other atomic operations 3226 // may use 0x1 and 0x3. However these limitations are 3227 // verified when we check that dmask matches dst size. 3228 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3229 } 3230 3231 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3232 3233 const unsigned Opc = Inst.getOpcode(); 3234 const MCInstrDesc &Desc = MII.get(Opc); 3235 3236 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3237 return true; 3238 3239 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3240 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3241 3242 // GATHER4 instructions use dmask in a different fashion compared to 3243 // other MIMG instructions. The only useful DMASK values are 3244 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3245 // (red,red,red,red) etc.) The ISA document doesn't mention 3246 // this. 3247 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3248 } 3249 3250 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3251 { 3252 switch (Opcode) { 3253 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3254 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3255 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3256 return true; 3257 default: 3258 return false; 3259 } 3260 } 3261 3262 // movrels* opcodes should only allow VGPRS as src0. 3263 // This is specified in .td description for vop1/vop3, 3264 // but sdwa is handled differently. See isSDWAOperand. 3265 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3266 3267 const unsigned Opc = Inst.getOpcode(); 3268 const MCInstrDesc &Desc = MII.get(Opc); 3269 3270 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3271 return true; 3272 3273 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3274 assert(Src0Idx != -1); 3275 3276 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3277 if (!Src0.isReg()) 3278 return false; 3279 3280 auto Reg = Src0.getReg(); 3281 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3282 return !isSGPR(mc2PseudoReg(Reg), TRI); 3283 } 3284 3285 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3286 3287 const unsigned Opc = Inst.getOpcode(); 3288 3289 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3290 return true; 3291 3292 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3293 assert(Src0Idx != -1); 3294 3295 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3296 if (!Src0.isReg()) 3297 return true; 3298 3299 auto Reg = Src0.getReg(); 3300 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3301 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3302 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3303 return false; 3304 } 3305 3306 return true; 3307 } 3308 3309 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3310 switch (Inst.getOpcode()) { 3311 default: 3312 return true; 3313 case V_DIV_SCALE_F32_gfx6_gfx7: 3314 case V_DIV_SCALE_F32_vi: 3315 case V_DIV_SCALE_F32_gfx10: 3316 case V_DIV_SCALE_F64_gfx6_gfx7: 3317 case V_DIV_SCALE_F64_vi: 3318 case V_DIV_SCALE_F64_gfx10: 3319 break; 3320 } 3321 3322 // TODO: Check that src0 = src1 or src2. 3323 3324 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3325 AMDGPU::OpName::src2_modifiers, 3326 AMDGPU::OpName::src2_modifiers}) { 3327 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3328 .getImm() & 3329 SISrcMods::ABS) { 3330 Error(getLoc(), "ABS not allowed in VOP3B instructions"); 3331 return false; 3332 } 3333 } 3334 3335 return true; 3336 } 3337 3338 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3339 3340 const unsigned Opc = Inst.getOpcode(); 3341 const MCInstrDesc &Desc = MII.get(Opc); 3342 3343 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3344 return true; 3345 3346 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3347 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3348 if (isCI() || isSI()) 3349 return false; 3350 } 3351 3352 return true; 3353 } 3354 3355 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3356 const unsigned Opc = Inst.getOpcode(); 3357 const MCInstrDesc &Desc = MII.get(Opc); 3358 3359 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3360 return true; 3361 3362 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3363 if (DimIdx < 0) 3364 return true; 3365 3366 long Imm = Inst.getOperand(DimIdx).getImm(); 3367 if (Imm < 0 || Imm >= 8) 3368 return false; 3369 3370 return true; 3371 } 3372 3373 static bool IsRevOpcode(const unsigned Opcode) 3374 { 3375 switch (Opcode) { 3376 case AMDGPU::V_SUBREV_F32_e32: 3377 case AMDGPU::V_SUBREV_F32_e64: 3378 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3379 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3380 case AMDGPU::V_SUBREV_F32_e32_vi: 3381 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3382 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3383 case AMDGPU::V_SUBREV_F32_e64_vi: 3384 3385 case AMDGPU::V_SUBREV_CO_U32_e32: 3386 case AMDGPU::V_SUBREV_CO_U32_e64: 3387 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3388 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3389 3390 case AMDGPU::V_SUBBREV_U32_e32: 3391 case AMDGPU::V_SUBBREV_U32_e64: 3392 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3393 case AMDGPU::V_SUBBREV_U32_e32_vi: 3394 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3395 case AMDGPU::V_SUBBREV_U32_e64_vi: 3396 3397 case AMDGPU::V_SUBREV_U32_e32: 3398 case AMDGPU::V_SUBREV_U32_e64: 3399 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3400 case AMDGPU::V_SUBREV_U32_e32_vi: 3401 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3402 case AMDGPU::V_SUBREV_U32_e64_vi: 3403 3404 case AMDGPU::V_SUBREV_F16_e32: 3405 case AMDGPU::V_SUBREV_F16_e64: 3406 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3407 case AMDGPU::V_SUBREV_F16_e32_vi: 3408 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3409 case AMDGPU::V_SUBREV_F16_e64_vi: 3410 3411 case AMDGPU::V_SUBREV_U16_e32: 3412 case AMDGPU::V_SUBREV_U16_e64: 3413 case AMDGPU::V_SUBREV_U16_e32_vi: 3414 case AMDGPU::V_SUBREV_U16_e64_vi: 3415 3416 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3417 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3418 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3419 3420 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3421 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3422 3423 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3424 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3425 3426 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3427 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3428 3429 case AMDGPU::V_LSHRREV_B32_e32: 3430 case AMDGPU::V_LSHRREV_B32_e64: 3431 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3432 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3433 case AMDGPU::V_LSHRREV_B32_e32_vi: 3434 case AMDGPU::V_LSHRREV_B32_e64_vi: 3435 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3436 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3437 3438 case AMDGPU::V_ASHRREV_I32_e32: 3439 case AMDGPU::V_ASHRREV_I32_e64: 3440 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3441 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3442 case AMDGPU::V_ASHRREV_I32_e32_vi: 3443 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3444 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3445 case AMDGPU::V_ASHRREV_I32_e64_vi: 3446 3447 case AMDGPU::V_LSHLREV_B32_e32: 3448 case AMDGPU::V_LSHLREV_B32_e64: 3449 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3450 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3451 case AMDGPU::V_LSHLREV_B32_e32_vi: 3452 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3453 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3454 case AMDGPU::V_LSHLREV_B32_e64_vi: 3455 3456 case AMDGPU::V_LSHLREV_B16_e32: 3457 case AMDGPU::V_LSHLREV_B16_e64: 3458 case AMDGPU::V_LSHLREV_B16_e32_vi: 3459 case AMDGPU::V_LSHLREV_B16_e64_vi: 3460 case AMDGPU::V_LSHLREV_B16_gfx10: 3461 3462 case AMDGPU::V_LSHRREV_B16_e32: 3463 case AMDGPU::V_LSHRREV_B16_e64: 3464 case AMDGPU::V_LSHRREV_B16_e32_vi: 3465 case AMDGPU::V_LSHRREV_B16_e64_vi: 3466 case AMDGPU::V_LSHRREV_B16_gfx10: 3467 3468 case AMDGPU::V_ASHRREV_I16_e32: 3469 case AMDGPU::V_ASHRREV_I16_e64: 3470 case AMDGPU::V_ASHRREV_I16_e32_vi: 3471 case AMDGPU::V_ASHRREV_I16_e64_vi: 3472 case AMDGPU::V_ASHRREV_I16_gfx10: 3473 3474 case AMDGPU::V_LSHLREV_B64: 3475 case AMDGPU::V_LSHLREV_B64_gfx10: 3476 case AMDGPU::V_LSHLREV_B64_vi: 3477 3478 case AMDGPU::V_LSHRREV_B64: 3479 case AMDGPU::V_LSHRREV_B64_gfx10: 3480 case AMDGPU::V_LSHRREV_B64_vi: 3481 3482 case AMDGPU::V_ASHRREV_I64: 3483 case AMDGPU::V_ASHRREV_I64_gfx10: 3484 case AMDGPU::V_ASHRREV_I64_vi: 3485 3486 case AMDGPU::V_PK_LSHLREV_B16: 3487 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3488 case AMDGPU::V_PK_LSHLREV_B16_vi: 3489 3490 case AMDGPU::V_PK_LSHRREV_B16: 3491 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3492 case AMDGPU::V_PK_LSHRREV_B16_vi: 3493 case AMDGPU::V_PK_ASHRREV_I16: 3494 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3495 case AMDGPU::V_PK_ASHRREV_I16_vi: 3496 return true; 3497 default: 3498 return false; 3499 } 3500 } 3501 3502 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3503 3504 using namespace SIInstrFlags; 3505 const unsigned Opcode = Inst.getOpcode(); 3506 const MCInstrDesc &Desc = MII.get(Opcode); 3507 3508 // lds_direct register is defined so that it can be used 3509 // with 9-bit operands only. Ignore encodings which do not accept these. 3510 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3511 return true; 3512 3513 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3514 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3515 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3516 3517 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3518 3519 // lds_direct cannot be specified as either src1 or src2. 3520 for (int SrcIdx : SrcIndices) { 3521 if (SrcIdx == -1) break; 3522 const MCOperand &Src = Inst.getOperand(SrcIdx); 3523 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3524 return false; 3525 } 3526 } 3527 3528 if (Src0Idx == -1) 3529 return true; 3530 3531 const MCOperand &Src = Inst.getOperand(Src0Idx); 3532 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3533 return true; 3534 3535 // lds_direct is specified as src0. Check additional limitations. 3536 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3537 } 3538 3539 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3540 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3541 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3542 if (Op.isFlatOffset()) 3543 return Op.getStartLoc(); 3544 } 3545 return getLoc(); 3546 } 3547 3548 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3549 const OperandVector &Operands) { 3550 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3551 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3552 return true; 3553 3554 auto Opcode = Inst.getOpcode(); 3555 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3556 assert(OpNum != -1); 3557 3558 const auto &Op = Inst.getOperand(OpNum); 3559 if (!hasFlatOffsets() && Op.getImm() != 0) { 3560 Error(getFlatOffsetLoc(Operands), 3561 "flat offset modifier is not supported on this GPU"); 3562 return false; 3563 } 3564 3565 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3566 // For FLAT segment the offset must be positive; 3567 // MSB is ignored and forced to zero. 3568 unsigned OffsetSize = isGFX9() ? 13 : 12; 3569 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3570 if (!isIntN(OffsetSize, Op.getImm())) { 3571 Error(getFlatOffsetLoc(Operands), 3572 isGFX9() ? "expected a 13-bit signed offset" : 3573 "expected a 12-bit signed offset"); 3574 return false; 3575 } 3576 } else { 3577 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3578 Error(getFlatOffsetLoc(Operands), 3579 isGFX9() ? "expected a 12-bit unsigned offset" : 3580 "expected an 11-bit unsigned offset"); 3581 return false; 3582 } 3583 } 3584 3585 return true; 3586 } 3587 3588 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3589 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3590 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3591 if (Op.isSMEMOffset()) 3592 return Op.getStartLoc(); 3593 } 3594 return getLoc(); 3595 } 3596 3597 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3598 const OperandVector &Operands) { 3599 if (isCI() || isSI()) 3600 return true; 3601 3602 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3603 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3604 return true; 3605 3606 auto Opcode = Inst.getOpcode(); 3607 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3608 if (OpNum == -1) 3609 return true; 3610 3611 const auto &Op = Inst.getOperand(OpNum); 3612 if (!Op.isImm()) 3613 return true; 3614 3615 uint64_t Offset = Op.getImm(); 3616 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3617 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3618 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3619 return true; 3620 3621 Error(getSMEMOffsetLoc(Operands), 3622 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3623 "expected a 21-bit signed offset"); 3624 3625 return false; 3626 } 3627 3628 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3629 unsigned Opcode = Inst.getOpcode(); 3630 const MCInstrDesc &Desc = MII.get(Opcode); 3631 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3632 return true; 3633 3634 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3635 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3636 3637 const int OpIndices[] = { Src0Idx, Src1Idx }; 3638 3639 unsigned NumExprs = 0; 3640 unsigned NumLiterals = 0; 3641 uint32_t LiteralValue; 3642 3643 for (int OpIdx : OpIndices) { 3644 if (OpIdx == -1) break; 3645 3646 const MCOperand &MO = Inst.getOperand(OpIdx); 3647 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3648 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3649 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3650 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3651 if (NumLiterals == 0 || LiteralValue != Value) { 3652 LiteralValue = Value; 3653 ++NumLiterals; 3654 } 3655 } else if (MO.isExpr()) { 3656 ++NumExprs; 3657 } 3658 } 3659 } 3660 3661 return NumLiterals + NumExprs <= 1; 3662 } 3663 3664 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3665 const unsigned Opc = Inst.getOpcode(); 3666 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3667 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3668 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3669 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3670 3671 if (OpSel & ~3) 3672 return false; 3673 } 3674 return true; 3675 } 3676 3677 // Check if VCC register matches wavefront size 3678 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3679 auto FB = getFeatureBits(); 3680 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3681 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3682 } 3683 3684 // VOP3 literal is only allowed in GFX10+ and only one can be used 3685 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3686 unsigned Opcode = Inst.getOpcode(); 3687 const MCInstrDesc &Desc = MII.get(Opcode); 3688 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3689 return true; 3690 3691 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3692 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3693 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3694 3695 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3696 3697 unsigned NumExprs = 0; 3698 unsigned NumLiterals = 0; 3699 uint32_t LiteralValue; 3700 3701 for (int OpIdx : OpIndices) { 3702 if (OpIdx == -1) break; 3703 3704 const MCOperand &MO = Inst.getOperand(OpIdx); 3705 if (!MO.isImm() && !MO.isExpr()) 3706 continue; 3707 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3708 continue; 3709 3710 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3711 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3712 return false; 3713 3714 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3715 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3716 if (NumLiterals == 0 || LiteralValue != Value) { 3717 LiteralValue = Value; 3718 ++NumLiterals; 3719 } 3720 } else if (MO.isExpr()) { 3721 ++NumExprs; 3722 } 3723 } 3724 NumLiterals += NumExprs; 3725 3726 return !NumLiterals || 3727 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3728 } 3729 3730 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3731 const SMLoc &IDLoc, 3732 const OperandVector &Operands) { 3733 if (!validateLdsDirect(Inst)) { 3734 Error(IDLoc, 3735 "invalid use of lds_direct"); 3736 return false; 3737 } 3738 if (!validateSOPLiteral(Inst)) { 3739 Error(IDLoc, 3740 "only one literal operand is allowed"); 3741 return false; 3742 } 3743 if (!validateVOP3Literal(Inst)) { 3744 Error(IDLoc, 3745 "invalid literal operand"); 3746 return false; 3747 } 3748 if (!validateConstantBusLimitations(Inst)) { 3749 Error(IDLoc, 3750 "invalid operand (violates constant bus restrictions)"); 3751 return false; 3752 } 3753 if (!validateEarlyClobberLimitations(Inst)) { 3754 Error(IDLoc, 3755 "destination must be different than all sources"); 3756 return false; 3757 } 3758 if (!validateIntClampSupported(Inst)) { 3759 Error(IDLoc, 3760 "integer clamping is not supported on this GPU"); 3761 return false; 3762 } 3763 if (!validateOpSel(Inst)) { 3764 Error(IDLoc, 3765 "invalid op_sel operand"); 3766 return false; 3767 } 3768 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3769 if (!validateMIMGD16(Inst)) { 3770 Error(IDLoc, 3771 "d16 modifier is not supported on this GPU"); 3772 return false; 3773 } 3774 if (!validateMIMGDim(Inst)) { 3775 Error(IDLoc, "dim modifier is required on this GPU"); 3776 return false; 3777 } 3778 if (!validateMIMGDataSize(Inst)) { 3779 Error(IDLoc, 3780 "image data size does not match dmask and tfe"); 3781 return false; 3782 } 3783 if (!validateMIMGAddrSize(Inst)) { 3784 Error(IDLoc, 3785 "image address size does not match dim and a16"); 3786 return false; 3787 } 3788 if (!validateMIMGAtomicDMask(Inst)) { 3789 Error(IDLoc, 3790 "invalid atomic image dmask"); 3791 return false; 3792 } 3793 if (!validateMIMGGatherDMask(Inst)) { 3794 Error(IDLoc, 3795 "invalid image_gather dmask: only one bit must be set"); 3796 return false; 3797 } 3798 if (!validateMovrels(Inst)) { 3799 Error(IDLoc, "source operand must be a VGPR"); 3800 return false; 3801 } 3802 if (!validateFlatOffset(Inst, Operands)) { 3803 return false; 3804 } 3805 if (!validateSMEMOffset(Inst, Operands)) { 3806 return false; 3807 } 3808 if (!validateMAIAccWrite(Inst)) { 3809 return false; 3810 } 3811 if (!validateDivScale(Inst)) { 3812 return false; 3813 } 3814 3815 return true; 3816 } 3817 3818 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3819 const FeatureBitset &FBS, 3820 unsigned VariantID = 0); 3821 3822 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3823 const FeatureBitset &AvailableFeatures, 3824 unsigned VariantID); 3825 3826 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3827 const FeatureBitset &FBS) { 3828 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3829 } 3830 3831 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3832 const FeatureBitset &FBS, 3833 ArrayRef<unsigned> Variants) { 3834 for (auto Variant : Variants) { 3835 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3836 return true; 3837 } 3838 3839 return false; 3840 } 3841 3842 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3843 const SMLoc &IDLoc) { 3844 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3845 3846 // Check if requested instruction variant is supported. 3847 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3848 return false; 3849 3850 // This instruction is not supported. 3851 // Clear any other pending errors because they are no longer relevant. 3852 getParser().clearPendingErrors(); 3853 3854 // Requested instruction variant is not supported. 3855 // Check if any other variants are supported. 3856 StringRef VariantName = getMatchedVariantName(); 3857 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3858 return Error(IDLoc, 3859 Twine(VariantName, 3860 " variant of this instruction is not supported")); 3861 } 3862 3863 // Finally check if this instruction is supported on any other GPU. 3864 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3865 return Error(IDLoc, "instruction not supported on this GPU"); 3866 } 3867 3868 // Instruction not supported on any GPU. Probably a typo. 3869 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3870 return Error(IDLoc, "invalid instruction" + Suggestion); 3871 } 3872 3873 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3874 OperandVector &Operands, 3875 MCStreamer &Out, 3876 uint64_t &ErrorInfo, 3877 bool MatchingInlineAsm) { 3878 MCInst Inst; 3879 unsigned Result = Match_Success; 3880 for (auto Variant : getMatchedVariants()) { 3881 uint64_t EI; 3882 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3883 Variant); 3884 // We order match statuses from least to most specific. We use most specific 3885 // status as resulting 3886 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3887 if ((R == Match_Success) || 3888 (R == Match_PreferE32) || 3889 (R == Match_MissingFeature && Result != Match_PreferE32) || 3890 (R == Match_InvalidOperand && Result != Match_MissingFeature 3891 && Result != Match_PreferE32) || 3892 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3893 && Result != Match_MissingFeature 3894 && Result != Match_PreferE32)) { 3895 Result = R; 3896 ErrorInfo = EI; 3897 } 3898 if (R == Match_Success) 3899 break; 3900 } 3901 3902 if (Result == Match_Success) { 3903 if (!validateInstruction(Inst, IDLoc, Operands)) { 3904 return true; 3905 } 3906 Inst.setLoc(IDLoc); 3907 Out.emitInstruction(Inst, getSTI()); 3908 return false; 3909 } 3910 3911 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 3912 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 3913 return true; 3914 } 3915 3916 switch (Result) { 3917 default: break; 3918 case Match_MissingFeature: 3919 // It has been verified that the specified instruction 3920 // mnemonic is valid. A match was found but it requires 3921 // features which are not supported on this GPU. 3922 return Error(IDLoc, "operands are not valid for this GPU or mode"); 3923 3924 case Match_InvalidOperand: { 3925 SMLoc ErrorLoc = IDLoc; 3926 if (ErrorInfo != ~0ULL) { 3927 if (ErrorInfo >= Operands.size()) { 3928 return Error(IDLoc, "too few operands for instruction"); 3929 } 3930 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3931 if (ErrorLoc == SMLoc()) 3932 ErrorLoc = IDLoc; 3933 } 3934 return Error(ErrorLoc, "invalid operand for instruction"); 3935 } 3936 3937 case Match_PreferE32: 3938 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3939 "should be encoded as e32"); 3940 case Match_MnemonicFail: 3941 llvm_unreachable("Invalid instructions should have been handled already"); 3942 } 3943 llvm_unreachable("Implement any new match types added!"); 3944 } 3945 3946 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3947 int64_t Tmp = -1; 3948 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3949 return true; 3950 } 3951 if (getParser().parseAbsoluteExpression(Tmp)) { 3952 return true; 3953 } 3954 Ret = static_cast<uint32_t>(Tmp); 3955 return false; 3956 } 3957 3958 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3959 uint32_t &Minor) { 3960 if (ParseAsAbsoluteExpression(Major)) 3961 return TokError("invalid major version"); 3962 3963 if (getLexer().isNot(AsmToken::Comma)) 3964 return TokError("minor version number required, comma expected"); 3965 Lex(); 3966 3967 if (ParseAsAbsoluteExpression(Minor)) 3968 return TokError("invalid minor version"); 3969 3970 return false; 3971 } 3972 3973 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3974 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3975 return TokError("directive only supported for amdgcn architecture"); 3976 3977 std::string Target; 3978 3979 SMLoc TargetStart = getTok().getLoc(); 3980 if (getParser().parseEscapedString(Target)) 3981 return true; 3982 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3983 3984 std::string ExpectedTarget; 3985 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3986 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3987 3988 if (Target != ExpectedTargetOS.str()) 3989 return getParser().Error(TargetRange.Start, "target must match options", 3990 TargetRange); 3991 3992 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3993 return false; 3994 } 3995 3996 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3997 return getParser().Error(Range.Start, "value out of range", Range); 3998 } 3999 4000 bool AMDGPUAsmParser::calculateGPRBlocks( 4001 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4002 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4003 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4004 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4005 // TODO(scott.linder): These calculations are duplicated from 4006 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4007 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4008 4009 unsigned NumVGPRs = NextFreeVGPR; 4010 unsigned NumSGPRs = NextFreeSGPR; 4011 4012 if (Version.Major >= 10) 4013 NumSGPRs = 0; 4014 else { 4015 unsigned MaxAddressableNumSGPRs = 4016 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4017 4018 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4019 NumSGPRs > MaxAddressableNumSGPRs) 4020 return OutOfRangeError(SGPRRange); 4021 4022 NumSGPRs += 4023 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4024 4025 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4026 NumSGPRs > MaxAddressableNumSGPRs) 4027 return OutOfRangeError(SGPRRange); 4028 4029 if (Features.test(FeatureSGPRInitBug)) 4030 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4031 } 4032 4033 VGPRBlocks = 4034 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4035 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4036 4037 return false; 4038 } 4039 4040 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4041 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4042 return TokError("directive only supported for amdgcn architecture"); 4043 4044 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4045 return TokError("directive only supported for amdhsa OS"); 4046 4047 StringRef KernelName; 4048 if (getParser().parseIdentifier(KernelName)) 4049 return true; 4050 4051 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4052 4053 StringSet<> Seen; 4054 4055 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4056 4057 SMRange VGPRRange; 4058 uint64_t NextFreeVGPR = 0; 4059 SMRange SGPRRange; 4060 uint64_t NextFreeSGPR = 0; 4061 unsigned UserSGPRCount = 0; 4062 bool ReserveVCC = true; 4063 bool ReserveFlatScr = true; 4064 bool ReserveXNACK = hasXNACK(); 4065 Optional<bool> EnableWavefrontSize32; 4066 4067 while (true) { 4068 while (getLexer().is(AsmToken::EndOfStatement)) 4069 Lex(); 4070 4071 if (getLexer().isNot(AsmToken::Identifier)) 4072 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4073 4074 StringRef ID = getTok().getIdentifier(); 4075 SMRange IDRange = getTok().getLocRange(); 4076 Lex(); 4077 4078 if (ID == ".end_amdhsa_kernel") 4079 break; 4080 4081 if (Seen.find(ID) != Seen.end()) 4082 return TokError(".amdhsa_ directives cannot be repeated"); 4083 Seen.insert(ID); 4084 4085 SMLoc ValStart = getTok().getLoc(); 4086 int64_t IVal; 4087 if (getParser().parseAbsoluteExpression(IVal)) 4088 return true; 4089 SMLoc ValEnd = getTok().getLoc(); 4090 SMRange ValRange = SMRange(ValStart, ValEnd); 4091 4092 if (IVal < 0) 4093 return OutOfRangeError(ValRange); 4094 4095 uint64_t Val = IVal; 4096 4097 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4098 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4099 return OutOfRangeError(RANGE); \ 4100 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4101 4102 if (ID == ".amdhsa_group_segment_fixed_size") { 4103 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4104 return OutOfRangeError(ValRange); 4105 KD.group_segment_fixed_size = Val; 4106 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4107 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4108 return OutOfRangeError(ValRange); 4109 KD.private_segment_fixed_size = Val; 4110 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4111 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4112 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4113 Val, ValRange); 4114 if (Val) 4115 UserSGPRCount += 4; 4116 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4117 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4118 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4119 ValRange); 4120 if (Val) 4121 UserSGPRCount += 2; 4122 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4123 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4124 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4125 ValRange); 4126 if (Val) 4127 UserSGPRCount += 2; 4128 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4129 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4130 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4131 Val, ValRange); 4132 if (Val) 4133 UserSGPRCount += 2; 4134 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4135 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4136 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4137 ValRange); 4138 if (Val) 4139 UserSGPRCount += 2; 4140 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4141 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4142 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4143 ValRange); 4144 if (Val) 4145 UserSGPRCount += 2; 4146 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4147 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4148 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4149 Val, ValRange); 4150 if (Val) 4151 UserSGPRCount += 1; 4152 } else if (ID == ".amdhsa_wavefront_size32") { 4153 if (IVersion.Major < 10) 4154 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4155 IDRange); 4156 EnableWavefrontSize32 = Val; 4157 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4158 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4159 Val, ValRange); 4160 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4161 PARSE_BITS_ENTRY( 4162 KD.compute_pgm_rsrc2, 4163 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4164 ValRange); 4165 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4166 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4167 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4168 ValRange); 4169 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4170 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4171 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4172 ValRange); 4173 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4175 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4176 ValRange); 4177 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4178 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4179 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4180 ValRange); 4181 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4182 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4183 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4184 ValRange); 4185 } else if (ID == ".amdhsa_next_free_vgpr") { 4186 VGPRRange = ValRange; 4187 NextFreeVGPR = Val; 4188 } else if (ID == ".amdhsa_next_free_sgpr") { 4189 SGPRRange = ValRange; 4190 NextFreeSGPR = Val; 4191 } else if (ID == ".amdhsa_reserve_vcc") { 4192 if (!isUInt<1>(Val)) 4193 return OutOfRangeError(ValRange); 4194 ReserveVCC = Val; 4195 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4196 if (IVersion.Major < 7) 4197 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4198 IDRange); 4199 if (!isUInt<1>(Val)) 4200 return OutOfRangeError(ValRange); 4201 ReserveFlatScr = Val; 4202 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4203 if (IVersion.Major < 8) 4204 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4205 IDRange); 4206 if (!isUInt<1>(Val)) 4207 return OutOfRangeError(ValRange); 4208 ReserveXNACK = Val; 4209 } else if (ID == ".amdhsa_float_round_mode_32") { 4210 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4211 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4212 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4213 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4214 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4215 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4216 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4217 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4218 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4219 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4220 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4221 ValRange); 4222 } else if (ID == ".amdhsa_dx10_clamp") { 4223 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4224 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4225 } else if (ID == ".amdhsa_ieee_mode") { 4226 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4227 Val, ValRange); 4228 } else if (ID == ".amdhsa_fp16_overflow") { 4229 if (IVersion.Major < 9) 4230 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4231 IDRange); 4232 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4233 ValRange); 4234 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4235 if (IVersion.Major < 10) 4236 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4237 IDRange); 4238 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4239 ValRange); 4240 } else if (ID == ".amdhsa_memory_ordered") { 4241 if (IVersion.Major < 10) 4242 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4243 IDRange); 4244 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4245 ValRange); 4246 } else if (ID == ".amdhsa_forward_progress") { 4247 if (IVersion.Major < 10) 4248 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4249 IDRange); 4250 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4251 ValRange); 4252 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4253 PARSE_BITS_ENTRY( 4254 KD.compute_pgm_rsrc2, 4255 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4256 ValRange); 4257 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4258 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4259 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4260 Val, ValRange); 4261 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4262 PARSE_BITS_ENTRY( 4263 KD.compute_pgm_rsrc2, 4264 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4265 ValRange); 4266 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4267 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4268 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4269 Val, ValRange); 4270 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4271 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4272 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4273 Val, ValRange); 4274 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4275 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4276 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4277 Val, ValRange); 4278 } else if (ID == ".amdhsa_exception_int_div_zero") { 4279 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4280 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4281 Val, ValRange); 4282 } else { 4283 return getParser().Error(IDRange.Start, 4284 "unknown .amdhsa_kernel directive", IDRange); 4285 } 4286 4287 #undef PARSE_BITS_ENTRY 4288 } 4289 4290 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4291 return TokError(".amdhsa_next_free_vgpr directive is required"); 4292 4293 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4294 return TokError(".amdhsa_next_free_sgpr directive is required"); 4295 4296 unsigned VGPRBlocks; 4297 unsigned SGPRBlocks; 4298 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4299 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4300 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4301 SGPRBlocks)) 4302 return true; 4303 4304 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4305 VGPRBlocks)) 4306 return OutOfRangeError(VGPRRange); 4307 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4308 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4309 4310 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4311 SGPRBlocks)) 4312 return OutOfRangeError(SGPRRange); 4313 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4314 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4315 SGPRBlocks); 4316 4317 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4318 return TokError("too many user SGPRs enabled"); 4319 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4320 UserSGPRCount); 4321 4322 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4323 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4324 ReserveFlatScr, ReserveXNACK); 4325 return false; 4326 } 4327 4328 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4329 uint32_t Major; 4330 uint32_t Minor; 4331 4332 if (ParseDirectiveMajorMinor(Major, Minor)) 4333 return true; 4334 4335 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4336 return false; 4337 } 4338 4339 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4340 uint32_t Major; 4341 uint32_t Minor; 4342 uint32_t Stepping; 4343 StringRef VendorName; 4344 StringRef ArchName; 4345 4346 // If this directive has no arguments, then use the ISA version for the 4347 // targeted GPU. 4348 if (getLexer().is(AsmToken::EndOfStatement)) { 4349 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4350 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4351 ISA.Stepping, 4352 "AMD", "AMDGPU"); 4353 return false; 4354 } 4355 4356 if (ParseDirectiveMajorMinor(Major, Minor)) 4357 return true; 4358 4359 if (getLexer().isNot(AsmToken::Comma)) 4360 return TokError("stepping version number required, comma expected"); 4361 Lex(); 4362 4363 if (ParseAsAbsoluteExpression(Stepping)) 4364 return TokError("invalid stepping version"); 4365 4366 if (getLexer().isNot(AsmToken::Comma)) 4367 return TokError("vendor name required, comma expected"); 4368 Lex(); 4369 4370 if (getLexer().isNot(AsmToken::String)) 4371 return TokError("invalid vendor name"); 4372 4373 VendorName = getLexer().getTok().getStringContents(); 4374 Lex(); 4375 4376 if (getLexer().isNot(AsmToken::Comma)) 4377 return TokError("arch name required, comma expected"); 4378 Lex(); 4379 4380 if (getLexer().isNot(AsmToken::String)) 4381 return TokError("invalid arch name"); 4382 4383 ArchName = getLexer().getTok().getStringContents(); 4384 Lex(); 4385 4386 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4387 VendorName, ArchName); 4388 return false; 4389 } 4390 4391 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4392 amd_kernel_code_t &Header) { 4393 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4394 // assembly for backwards compatibility. 4395 if (ID == "max_scratch_backing_memory_byte_size") { 4396 Parser.eatToEndOfStatement(); 4397 return false; 4398 } 4399 4400 SmallString<40> ErrStr; 4401 raw_svector_ostream Err(ErrStr); 4402 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4403 return TokError(Err.str()); 4404 } 4405 Lex(); 4406 4407 if (ID == "enable_wavefront_size32") { 4408 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4409 if (!isGFX10()) 4410 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4411 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4412 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4413 } else { 4414 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4415 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4416 } 4417 } 4418 4419 if (ID == "wavefront_size") { 4420 if (Header.wavefront_size == 5) { 4421 if (!isGFX10()) 4422 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4423 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4424 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4425 } else if (Header.wavefront_size == 6) { 4426 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4427 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4428 } 4429 } 4430 4431 if (ID == "enable_wgp_mode") { 4432 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4433 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4434 } 4435 4436 if (ID == "enable_mem_ordered") { 4437 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4438 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4439 } 4440 4441 if (ID == "enable_fwd_progress") { 4442 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4443 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4444 } 4445 4446 return false; 4447 } 4448 4449 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4450 amd_kernel_code_t Header; 4451 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4452 4453 while (true) { 4454 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4455 // will set the current token to EndOfStatement. 4456 while(getLexer().is(AsmToken::EndOfStatement)) 4457 Lex(); 4458 4459 if (getLexer().isNot(AsmToken::Identifier)) 4460 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4461 4462 StringRef ID = getLexer().getTok().getIdentifier(); 4463 Lex(); 4464 4465 if (ID == ".end_amd_kernel_code_t") 4466 break; 4467 4468 if (ParseAMDKernelCodeTValue(ID, Header)) 4469 return true; 4470 } 4471 4472 getTargetStreamer().EmitAMDKernelCodeT(Header); 4473 4474 return false; 4475 } 4476 4477 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4478 if (getLexer().isNot(AsmToken::Identifier)) 4479 return TokError("expected symbol name"); 4480 4481 StringRef KernelName = Parser.getTok().getString(); 4482 4483 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4484 ELF::STT_AMDGPU_HSA_KERNEL); 4485 Lex(); 4486 4487 KernelScope.initialize(getContext()); 4488 return false; 4489 } 4490 4491 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4492 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4493 return Error(getParser().getTok().getLoc(), 4494 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4495 "architectures"); 4496 } 4497 4498 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4499 4500 std::string ISAVersionStringFromSTI; 4501 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4502 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4503 4504 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4505 return Error(getParser().getTok().getLoc(), 4506 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4507 "arguments specified through the command line"); 4508 } 4509 4510 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4511 Lex(); 4512 4513 return false; 4514 } 4515 4516 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4517 const char *AssemblerDirectiveBegin; 4518 const char *AssemblerDirectiveEnd; 4519 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4520 isHsaAbiVersion3(&getSTI()) 4521 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4522 HSAMD::V3::AssemblerDirectiveEnd) 4523 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4524 HSAMD::AssemblerDirectiveEnd); 4525 4526 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4527 return Error(getParser().getTok().getLoc(), 4528 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4529 "not available on non-amdhsa OSes")).str()); 4530 } 4531 4532 std::string HSAMetadataString; 4533 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4534 HSAMetadataString)) 4535 return true; 4536 4537 if (isHsaAbiVersion3(&getSTI())) { 4538 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4539 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4540 } else { 4541 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4542 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4543 } 4544 4545 return false; 4546 } 4547 4548 /// Common code to parse out a block of text (typically YAML) between start and 4549 /// end directives. 4550 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4551 const char *AssemblerDirectiveEnd, 4552 std::string &CollectString) { 4553 4554 raw_string_ostream CollectStream(CollectString); 4555 4556 getLexer().setSkipSpace(false); 4557 4558 bool FoundEnd = false; 4559 while (!getLexer().is(AsmToken::Eof)) { 4560 while (getLexer().is(AsmToken::Space)) { 4561 CollectStream << getLexer().getTok().getString(); 4562 Lex(); 4563 } 4564 4565 if (getLexer().is(AsmToken::Identifier)) { 4566 StringRef ID = getLexer().getTok().getIdentifier(); 4567 if (ID == AssemblerDirectiveEnd) { 4568 Lex(); 4569 FoundEnd = true; 4570 break; 4571 } 4572 } 4573 4574 CollectStream << Parser.parseStringToEndOfStatement() 4575 << getContext().getAsmInfo()->getSeparatorString(); 4576 4577 Parser.eatToEndOfStatement(); 4578 } 4579 4580 getLexer().setSkipSpace(true); 4581 4582 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4583 return TokError(Twine("expected directive ") + 4584 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4585 } 4586 4587 CollectStream.flush(); 4588 return false; 4589 } 4590 4591 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4592 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4593 std::string String; 4594 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4595 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4596 return true; 4597 4598 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4599 if (!PALMetadata->setFromString(String)) 4600 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4601 return false; 4602 } 4603 4604 /// Parse the assembler directive for old linear-format PAL metadata. 4605 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4606 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4607 return Error(getParser().getTok().getLoc(), 4608 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4609 "not available on non-amdpal OSes")).str()); 4610 } 4611 4612 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4613 PALMetadata->setLegacy(); 4614 for (;;) { 4615 uint32_t Key, Value; 4616 if (ParseAsAbsoluteExpression(Key)) { 4617 return TokError(Twine("invalid value in ") + 4618 Twine(PALMD::AssemblerDirective)); 4619 } 4620 if (getLexer().isNot(AsmToken::Comma)) { 4621 return TokError(Twine("expected an even number of values in ") + 4622 Twine(PALMD::AssemblerDirective)); 4623 } 4624 Lex(); 4625 if (ParseAsAbsoluteExpression(Value)) { 4626 return TokError(Twine("invalid value in ") + 4627 Twine(PALMD::AssemblerDirective)); 4628 } 4629 PALMetadata->setRegister(Key, Value); 4630 if (getLexer().isNot(AsmToken::Comma)) 4631 break; 4632 Lex(); 4633 } 4634 return false; 4635 } 4636 4637 /// ParseDirectiveAMDGPULDS 4638 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4639 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4640 if (getParser().checkForValidSection()) 4641 return true; 4642 4643 StringRef Name; 4644 SMLoc NameLoc = getLexer().getLoc(); 4645 if (getParser().parseIdentifier(Name)) 4646 return TokError("expected identifier in directive"); 4647 4648 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4649 if (parseToken(AsmToken::Comma, "expected ','")) 4650 return true; 4651 4652 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4653 4654 int64_t Size; 4655 SMLoc SizeLoc = getLexer().getLoc(); 4656 if (getParser().parseAbsoluteExpression(Size)) 4657 return true; 4658 if (Size < 0) 4659 return Error(SizeLoc, "size must be non-negative"); 4660 if (Size > LocalMemorySize) 4661 return Error(SizeLoc, "size is too large"); 4662 4663 int64_t Alignment = 4; 4664 if (getLexer().is(AsmToken::Comma)) { 4665 Lex(); 4666 SMLoc AlignLoc = getLexer().getLoc(); 4667 if (getParser().parseAbsoluteExpression(Alignment)) 4668 return true; 4669 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4670 return Error(AlignLoc, "alignment must be a power of two"); 4671 4672 // Alignment larger than the size of LDS is possible in theory, as long 4673 // as the linker manages to place to symbol at address 0, but we do want 4674 // to make sure the alignment fits nicely into a 32-bit integer. 4675 if (Alignment >= 1u << 31) 4676 return Error(AlignLoc, "alignment is too large"); 4677 } 4678 4679 if (parseToken(AsmToken::EndOfStatement, 4680 "unexpected token in '.amdgpu_lds' directive")) 4681 return true; 4682 4683 Symbol->redefineIfPossible(); 4684 if (!Symbol->isUndefined()) 4685 return Error(NameLoc, "invalid symbol redefinition"); 4686 4687 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4688 return false; 4689 } 4690 4691 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4692 StringRef IDVal = DirectiveID.getString(); 4693 4694 if (isHsaAbiVersion3(&getSTI())) { 4695 if (IDVal == ".amdgcn_target") 4696 return ParseDirectiveAMDGCNTarget(); 4697 4698 if (IDVal == ".amdhsa_kernel") 4699 return ParseDirectiveAMDHSAKernel(); 4700 4701 // TODO: Restructure/combine with PAL metadata directive. 4702 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4703 return ParseDirectiveHSAMetadata(); 4704 } else { 4705 if (IDVal == ".hsa_code_object_version") 4706 return ParseDirectiveHSACodeObjectVersion(); 4707 4708 if (IDVal == ".hsa_code_object_isa") 4709 return ParseDirectiveHSACodeObjectISA(); 4710 4711 if (IDVal == ".amd_kernel_code_t") 4712 return ParseDirectiveAMDKernelCodeT(); 4713 4714 if (IDVal == ".amdgpu_hsa_kernel") 4715 return ParseDirectiveAMDGPUHsaKernel(); 4716 4717 if (IDVal == ".amd_amdgpu_isa") 4718 return ParseDirectiveISAVersion(); 4719 4720 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4721 return ParseDirectiveHSAMetadata(); 4722 } 4723 4724 if (IDVal == ".amdgpu_lds") 4725 return ParseDirectiveAMDGPULDS(); 4726 4727 if (IDVal == PALMD::AssemblerDirectiveBegin) 4728 return ParseDirectivePALMetadataBegin(); 4729 4730 if (IDVal == PALMD::AssemblerDirective) 4731 return ParseDirectivePALMetadata(); 4732 4733 return true; 4734 } 4735 4736 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4737 unsigned RegNo) const { 4738 4739 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4740 R.isValid(); ++R) { 4741 if (*R == RegNo) 4742 return isGFX9Plus(); 4743 } 4744 4745 // GFX10 has 2 more SGPRs 104 and 105. 4746 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4747 R.isValid(); ++R) { 4748 if (*R == RegNo) 4749 return hasSGPR104_SGPR105(); 4750 } 4751 4752 switch (RegNo) { 4753 case AMDGPU::SRC_SHARED_BASE: 4754 case AMDGPU::SRC_SHARED_LIMIT: 4755 case AMDGPU::SRC_PRIVATE_BASE: 4756 case AMDGPU::SRC_PRIVATE_LIMIT: 4757 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4758 return !isCI() && !isSI() && !isVI(); 4759 case AMDGPU::TBA: 4760 case AMDGPU::TBA_LO: 4761 case AMDGPU::TBA_HI: 4762 case AMDGPU::TMA: 4763 case AMDGPU::TMA_LO: 4764 case AMDGPU::TMA_HI: 4765 return !isGFX9() && !isGFX10(); 4766 case AMDGPU::XNACK_MASK: 4767 case AMDGPU::XNACK_MASK_LO: 4768 case AMDGPU::XNACK_MASK_HI: 4769 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4770 case AMDGPU::SGPR_NULL: 4771 return isGFX10(); 4772 default: 4773 break; 4774 } 4775 4776 if (isCI()) 4777 return true; 4778 4779 if (isSI() || isGFX10()) { 4780 // No flat_scr on SI. 4781 // On GFX10 flat scratch is not a valid register operand and can only be 4782 // accessed with s_setreg/s_getreg. 4783 switch (RegNo) { 4784 case AMDGPU::FLAT_SCR: 4785 case AMDGPU::FLAT_SCR_LO: 4786 case AMDGPU::FLAT_SCR_HI: 4787 return false; 4788 default: 4789 return true; 4790 } 4791 } 4792 4793 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4794 // SI/CI have. 4795 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4796 R.isValid(); ++R) { 4797 if (*R == RegNo) 4798 return hasSGPR102_SGPR103(); 4799 } 4800 4801 return true; 4802 } 4803 4804 OperandMatchResultTy 4805 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4806 OperandMode Mode) { 4807 // Try to parse with a custom parser 4808 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4809 4810 // If we successfully parsed the operand or if there as an error parsing, 4811 // we are done. 4812 // 4813 // If we are parsing after we reach EndOfStatement then this means we 4814 // are appending default values to the Operands list. This is only done 4815 // by custom parser, so we shouldn't continue on to the generic parsing. 4816 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4817 getLexer().is(AsmToken::EndOfStatement)) 4818 return ResTy; 4819 4820 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4821 unsigned Prefix = Operands.size(); 4822 SMLoc LBraceLoc = getTok().getLoc(); 4823 Parser.Lex(); // eat the '[' 4824 4825 for (;;) { 4826 ResTy = parseReg(Operands); 4827 if (ResTy != MatchOperand_Success) 4828 return ResTy; 4829 4830 if (getLexer().is(AsmToken::RBrac)) 4831 break; 4832 4833 if (getLexer().isNot(AsmToken::Comma)) 4834 return MatchOperand_ParseFail; 4835 Parser.Lex(); 4836 } 4837 4838 if (Operands.size() - Prefix > 1) { 4839 Operands.insert(Operands.begin() + Prefix, 4840 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4841 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4842 getTok().getLoc())); 4843 } 4844 4845 Parser.Lex(); // eat the ']' 4846 return MatchOperand_Success; 4847 } 4848 4849 return parseRegOrImm(Operands); 4850 } 4851 4852 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4853 // Clear any forced encodings from the previous instruction. 4854 setForcedEncodingSize(0); 4855 setForcedDPP(false); 4856 setForcedSDWA(false); 4857 4858 if (Name.endswith("_e64")) { 4859 setForcedEncodingSize(64); 4860 return Name.substr(0, Name.size() - 4); 4861 } else if (Name.endswith("_e32")) { 4862 setForcedEncodingSize(32); 4863 return Name.substr(0, Name.size() - 4); 4864 } else if (Name.endswith("_dpp")) { 4865 setForcedDPP(true); 4866 return Name.substr(0, Name.size() - 4); 4867 } else if (Name.endswith("_sdwa")) { 4868 setForcedSDWA(true); 4869 return Name.substr(0, Name.size() - 5); 4870 } 4871 return Name; 4872 } 4873 4874 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4875 StringRef Name, 4876 SMLoc NameLoc, OperandVector &Operands) { 4877 // Add the instruction mnemonic 4878 Name = parseMnemonicSuffix(Name); 4879 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4880 4881 bool IsMIMG = Name.startswith("image_"); 4882 4883 while (!getLexer().is(AsmToken::EndOfStatement)) { 4884 OperandMode Mode = OperandMode_Default; 4885 if (IsMIMG && isGFX10() && Operands.size() == 2) 4886 Mode = OperandMode_NSA; 4887 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4888 4889 // Eat the comma or space if there is one. 4890 if (getLexer().is(AsmToken::Comma)) 4891 Parser.Lex(); 4892 4893 if (Res != MatchOperand_Success) { 4894 checkUnsupportedInstruction(Name, NameLoc); 4895 if (!Parser.hasPendingError()) { 4896 // FIXME: use real operand location rather than the current location. 4897 StringRef Msg = 4898 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4899 "not a valid operand."; 4900 Error(getLexer().getLoc(), Msg); 4901 } 4902 while (!getLexer().is(AsmToken::EndOfStatement)) { 4903 Parser.Lex(); 4904 } 4905 return true; 4906 } 4907 } 4908 4909 return false; 4910 } 4911 4912 //===----------------------------------------------------------------------===// 4913 // Utility functions 4914 //===----------------------------------------------------------------------===// 4915 4916 OperandMatchResultTy 4917 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4918 4919 if (!trySkipId(Prefix, AsmToken::Colon)) 4920 return MatchOperand_NoMatch; 4921 4922 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4923 } 4924 4925 OperandMatchResultTy 4926 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4927 AMDGPUOperand::ImmTy ImmTy, 4928 bool (*ConvertResult)(int64_t&)) { 4929 SMLoc S = getLoc(); 4930 int64_t Value = 0; 4931 4932 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4933 if (Res != MatchOperand_Success) 4934 return Res; 4935 4936 if (ConvertResult && !ConvertResult(Value)) { 4937 Error(S, "invalid " + StringRef(Prefix) + " value."); 4938 } 4939 4940 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4941 return MatchOperand_Success; 4942 } 4943 4944 OperandMatchResultTy 4945 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4946 OperandVector &Operands, 4947 AMDGPUOperand::ImmTy ImmTy, 4948 bool (*ConvertResult)(int64_t&)) { 4949 SMLoc S = getLoc(); 4950 if (!trySkipId(Prefix, AsmToken::Colon)) 4951 return MatchOperand_NoMatch; 4952 4953 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4954 return MatchOperand_ParseFail; 4955 4956 unsigned Val = 0; 4957 const unsigned MaxSize = 4; 4958 4959 // FIXME: How to verify the number of elements matches the number of src 4960 // operands? 4961 for (int I = 0; ; ++I) { 4962 int64_t Op; 4963 SMLoc Loc = getLoc(); 4964 if (!parseExpr(Op)) 4965 return MatchOperand_ParseFail; 4966 4967 if (Op != 0 && Op != 1) { 4968 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4969 return MatchOperand_ParseFail; 4970 } 4971 4972 Val |= (Op << I); 4973 4974 if (trySkipToken(AsmToken::RBrac)) 4975 break; 4976 4977 if (I + 1 == MaxSize) { 4978 Error(getLoc(), "expected a closing square bracket"); 4979 return MatchOperand_ParseFail; 4980 } 4981 4982 if (!skipToken(AsmToken::Comma, "expected a comma")) 4983 return MatchOperand_ParseFail; 4984 } 4985 4986 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4987 return MatchOperand_Success; 4988 } 4989 4990 OperandMatchResultTy 4991 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4992 AMDGPUOperand::ImmTy ImmTy) { 4993 int64_t Bit = 0; 4994 SMLoc S = Parser.getTok().getLoc(); 4995 4996 // We are at the end of the statement, and this is a default argument, so 4997 // use a default value. 4998 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4999 switch(getLexer().getKind()) { 5000 case AsmToken::Identifier: { 5001 StringRef Tok = Parser.getTok().getString(); 5002 if (Tok == Name) { 5003 if (Tok == "r128" && !hasMIMG_R128()) 5004 Error(S, "r128 modifier is not supported on this GPU"); 5005 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5006 Error(S, "a16 modifier is not supported on this GPU"); 5007 Bit = 1; 5008 Parser.Lex(); 5009 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5010 Bit = 0; 5011 Parser.Lex(); 5012 } else { 5013 return MatchOperand_NoMatch; 5014 } 5015 break; 5016 } 5017 default: 5018 return MatchOperand_NoMatch; 5019 } 5020 } 5021 5022 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 5023 return MatchOperand_ParseFail; 5024 5025 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5026 ImmTy = AMDGPUOperand::ImmTyR128A16; 5027 5028 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5029 return MatchOperand_Success; 5030 } 5031 5032 static void addOptionalImmOperand( 5033 MCInst& Inst, const OperandVector& Operands, 5034 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5035 AMDGPUOperand::ImmTy ImmT, 5036 int64_t Default = 0) { 5037 auto i = OptionalIdx.find(ImmT); 5038 if (i != OptionalIdx.end()) { 5039 unsigned Idx = i->second; 5040 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5041 } else { 5042 Inst.addOperand(MCOperand::createImm(Default)); 5043 } 5044 } 5045 5046 OperandMatchResultTy 5047 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5048 if (getLexer().isNot(AsmToken::Identifier)) { 5049 return MatchOperand_NoMatch; 5050 } 5051 StringRef Tok = Parser.getTok().getString(); 5052 if (Tok != Prefix) { 5053 return MatchOperand_NoMatch; 5054 } 5055 5056 Parser.Lex(); 5057 if (getLexer().isNot(AsmToken::Colon)) { 5058 return MatchOperand_ParseFail; 5059 } 5060 5061 Parser.Lex(); 5062 if (getLexer().isNot(AsmToken::Identifier)) { 5063 return MatchOperand_ParseFail; 5064 } 5065 5066 Value = Parser.getTok().getString(); 5067 return MatchOperand_Success; 5068 } 5069 5070 //===----------------------------------------------------------------------===// 5071 // MTBUF format 5072 //===----------------------------------------------------------------------===// 5073 5074 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5075 int64_t MaxVal, 5076 int64_t &Fmt) { 5077 int64_t Val; 5078 SMLoc Loc = getLoc(); 5079 5080 auto Res = parseIntWithPrefix(Pref, Val); 5081 if (Res == MatchOperand_ParseFail) 5082 return false; 5083 if (Res == MatchOperand_NoMatch) 5084 return true; 5085 5086 if (Val < 0 || Val > MaxVal) { 5087 Error(Loc, Twine("out of range ", StringRef(Pref))); 5088 return false; 5089 } 5090 5091 Fmt = Val; 5092 return true; 5093 } 5094 5095 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5096 // values to live in a joint format operand in the MCInst encoding. 5097 OperandMatchResultTy 5098 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5099 using namespace llvm::AMDGPU::MTBUFFormat; 5100 5101 int64_t Dfmt = DFMT_UNDEF; 5102 int64_t Nfmt = NFMT_UNDEF; 5103 5104 // dfmt and nfmt can appear in either order, and each is optional. 5105 for (int I = 0; I < 2; ++I) { 5106 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5107 return MatchOperand_ParseFail; 5108 5109 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5110 return MatchOperand_ParseFail; 5111 } 5112 // Skip optional comma between dfmt/nfmt 5113 // but guard against 2 commas following each other. 5114 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5115 !peekToken().is(AsmToken::Comma)) { 5116 trySkipToken(AsmToken::Comma); 5117 } 5118 } 5119 5120 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5121 return MatchOperand_NoMatch; 5122 5123 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5124 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5125 5126 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5127 return MatchOperand_Success; 5128 } 5129 5130 OperandMatchResultTy 5131 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5132 using namespace llvm::AMDGPU::MTBUFFormat; 5133 5134 int64_t Fmt = UFMT_UNDEF; 5135 5136 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5137 return MatchOperand_ParseFail; 5138 5139 if (Fmt == UFMT_UNDEF) 5140 return MatchOperand_NoMatch; 5141 5142 Format = Fmt; 5143 return MatchOperand_Success; 5144 } 5145 5146 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5147 int64_t &Nfmt, 5148 StringRef FormatStr, 5149 SMLoc Loc) { 5150 using namespace llvm::AMDGPU::MTBUFFormat; 5151 int64_t Format; 5152 5153 Format = getDfmt(FormatStr); 5154 if (Format != DFMT_UNDEF) { 5155 Dfmt = Format; 5156 return true; 5157 } 5158 5159 Format = getNfmt(FormatStr, getSTI()); 5160 if (Format != NFMT_UNDEF) { 5161 Nfmt = Format; 5162 return true; 5163 } 5164 5165 Error(Loc, "unsupported format"); 5166 return false; 5167 } 5168 5169 OperandMatchResultTy 5170 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5171 SMLoc FormatLoc, 5172 int64_t &Format) { 5173 using namespace llvm::AMDGPU::MTBUFFormat; 5174 5175 int64_t Dfmt = DFMT_UNDEF; 5176 int64_t Nfmt = NFMT_UNDEF; 5177 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5178 return MatchOperand_ParseFail; 5179 5180 if (trySkipToken(AsmToken::Comma)) { 5181 StringRef Str; 5182 SMLoc Loc = getLoc(); 5183 if (!parseId(Str, "expected a format string") || 5184 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5185 return MatchOperand_ParseFail; 5186 } 5187 if (Dfmt == DFMT_UNDEF) { 5188 Error(Loc, "duplicate numeric format"); 5189 return MatchOperand_ParseFail; 5190 } else if (Nfmt == NFMT_UNDEF) { 5191 Error(Loc, "duplicate data format"); 5192 return MatchOperand_ParseFail; 5193 } 5194 } 5195 5196 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5197 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5198 5199 if (isGFX10()) { 5200 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5201 if (Ufmt == UFMT_UNDEF) { 5202 Error(FormatLoc, "unsupported format"); 5203 return MatchOperand_ParseFail; 5204 } 5205 Format = Ufmt; 5206 } else { 5207 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5208 } 5209 5210 return MatchOperand_Success; 5211 } 5212 5213 OperandMatchResultTy 5214 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5215 SMLoc Loc, 5216 int64_t &Format) { 5217 using namespace llvm::AMDGPU::MTBUFFormat; 5218 5219 auto Id = getUnifiedFormat(FormatStr); 5220 if (Id == UFMT_UNDEF) 5221 return MatchOperand_NoMatch; 5222 5223 if (!isGFX10()) { 5224 Error(Loc, "unified format is not supported on this GPU"); 5225 return MatchOperand_ParseFail; 5226 } 5227 5228 Format = Id; 5229 return MatchOperand_Success; 5230 } 5231 5232 OperandMatchResultTy 5233 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5234 using namespace llvm::AMDGPU::MTBUFFormat; 5235 SMLoc Loc = getLoc(); 5236 5237 if (!parseExpr(Format)) 5238 return MatchOperand_ParseFail; 5239 if (!isValidFormatEncoding(Format, getSTI())) { 5240 Error(Loc, "out of range format"); 5241 return MatchOperand_ParseFail; 5242 } 5243 5244 return MatchOperand_Success; 5245 } 5246 5247 OperandMatchResultTy 5248 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5249 using namespace llvm::AMDGPU::MTBUFFormat; 5250 5251 if (!trySkipId("format", AsmToken::Colon)) 5252 return MatchOperand_NoMatch; 5253 5254 if (trySkipToken(AsmToken::LBrac)) { 5255 StringRef FormatStr; 5256 SMLoc Loc = getLoc(); 5257 if (!parseId(FormatStr, "expected a format string")) 5258 return MatchOperand_ParseFail; 5259 5260 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5261 if (Res == MatchOperand_NoMatch) 5262 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5263 if (Res != MatchOperand_Success) 5264 return Res; 5265 5266 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5267 return MatchOperand_ParseFail; 5268 5269 return MatchOperand_Success; 5270 } 5271 5272 return parseNumericFormat(Format); 5273 } 5274 5275 OperandMatchResultTy 5276 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5277 using namespace llvm::AMDGPU::MTBUFFormat; 5278 5279 int64_t Format = getDefaultFormatEncoding(getSTI()); 5280 OperandMatchResultTy Res; 5281 SMLoc Loc = getLoc(); 5282 5283 // Parse legacy format syntax. 5284 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5285 if (Res == MatchOperand_ParseFail) 5286 return Res; 5287 5288 bool FormatFound = (Res == MatchOperand_Success); 5289 5290 Operands.push_back( 5291 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5292 5293 if (FormatFound) 5294 trySkipToken(AsmToken::Comma); 5295 5296 if (isToken(AsmToken::EndOfStatement)) { 5297 // We are expecting an soffset operand, 5298 // but let matcher handle the error. 5299 return MatchOperand_Success; 5300 } 5301 5302 // Parse soffset. 5303 Res = parseRegOrImm(Operands); 5304 if (Res != MatchOperand_Success) 5305 return Res; 5306 5307 trySkipToken(AsmToken::Comma); 5308 5309 if (!FormatFound) { 5310 Res = parseSymbolicOrNumericFormat(Format); 5311 if (Res == MatchOperand_ParseFail) 5312 return Res; 5313 if (Res == MatchOperand_Success) { 5314 auto Size = Operands.size(); 5315 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5316 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5317 Op.setImm(Format); 5318 } 5319 return MatchOperand_Success; 5320 } 5321 5322 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5323 Error(getLoc(), "duplicate format"); 5324 return MatchOperand_ParseFail; 5325 } 5326 return MatchOperand_Success; 5327 } 5328 5329 //===----------------------------------------------------------------------===// 5330 // ds 5331 //===----------------------------------------------------------------------===// 5332 5333 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5334 const OperandVector &Operands) { 5335 OptionalImmIndexMap OptionalIdx; 5336 5337 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5338 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5339 5340 // Add the register arguments 5341 if (Op.isReg()) { 5342 Op.addRegOperands(Inst, 1); 5343 continue; 5344 } 5345 5346 // Handle optional arguments 5347 OptionalIdx[Op.getImmTy()] = i; 5348 } 5349 5350 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5351 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5353 5354 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5355 } 5356 5357 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5358 bool IsGdsHardcoded) { 5359 OptionalImmIndexMap OptionalIdx; 5360 5361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5363 5364 // Add the register arguments 5365 if (Op.isReg()) { 5366 Op.addRegOperands(Inst, 1); 5367 continue; 5368 } 5369 5370 if (Op.isToken() && Op.getToken() == "gds") { 5371 IsGdsHardcoded = true; 5372 continue; 5373 } 5374 5375 // Handle optional arguments 5376 OptionalIdx[Op.getImmTy()] = i; 5377 } 5378 5379 AMDGPUOperand::ImmTy OffsetType = 5380 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5381 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5382 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5383 AMDGPUOperand::ImmTyOffset; 5384 5385 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5386 5387 if (!IsGdsHardcoded) { 5388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5389 } 5390 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5391 } 5392 5393 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5394 OptionalImmIndexMap OptionalIdx; 5395 5396 unsigned OperandIdx[4]; 5397 unsigned EnMask = 0; 5398 int SrcIdx = 0; 5399 5400 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5401 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5402 5403 // Add the register arguments 5404 if (Op.isReg()) { 5405 assert(SrcIdx < 4); 5406 OperandIdx[SrcIdx] = Inst.size(); 5407 Op.addRegOperands(Inst, 1); 5408 ++SrcIdx; 5409 continue; 5410 } 5411 5412 if (Op.isOff()) { 5413 assert(SrcIdx < 4); 5414 OperandIdx[SrcIdx] = Inst.size(); 5415 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5416 ++SrcIdx; 5417 continue; 5418 } 5419 5420 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5421 Op.addImmOperands(Inst, 1); 5422 continue; 5423 } 5424 5425 if (Op.isToken() && Op.getToken() == "done") 5426 continue; 5427 5428 // Handle optional arguments 5429 OptionalIdx[Op.getImmTy()] = i; 5430 } 5431 5432 assert(SrcIdx == 4); 5433 5434 bool Compr = false; 5435 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5436 Compr = true; 5437 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5438 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5439 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5440 } 5441 5442 for (auto i = 0; i < SrcIdx; ++i) { 5443 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5444 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5445 } 5446 } 5447 5448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5450 5451 Inst.addOperand(MCOperand::createImm(EnMask)); 5452 } 5453 5454 //===----------------------------------------------------------------------===// 5455 // s_waitcnt 5456 //===----------------------------------------------------------------------===// 5457 5458 static bool 5459 encodeCnt( 5460 const AMDGPU::IsaVersion ISA, 5461 int64_t &IntVal, 5462 int64_t CntVal, 5463 bool Saturate, 5464 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5465 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5466 { 5467 bool Failed = false; 5468 5469 IntVal = encode(ISA, IntVal, CntVal); 5470 if (CntVal != decode(ISA, IntVal)) { 5471 if (Saturate) { 5472 IntVal = encode(ISA, IntVal, -1); 5473 } else { 5474 Failed = true; 5475 } 5476 } 5477 return Failed; 5478 } 5479 5480 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5481 5482 SMLoc CntLoc = getLoc(); 5483 StringRef CntName = getTokenStr(); 5484 5485 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5486 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5487 return false; 5488 5489 int64_t CntVal; 5490 SMLoc ValLoc = getLoc(); 5491 if (!parseExpr(CntVal)) 5492 return false; 5493 5494 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5495 5496 bool Failed = true; 5497 bool Sat = CntName.endswith("_sat"); 5498 5499 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5500 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5501 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5502 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5503 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5504 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5505 } else { 5506 Error(CntLoc, "invalid counter name " + CntName); 5507 return false; 5508 } 5509 5510 if (Failed) { 5511 Error(ValLoc, "too large value for " + CntName); 5512 return false; 5513 } 5514 5515 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5516 return false; 5517 5518 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5519 if (isToken(AsmToken::EndOfStatement)) { 5520 Error(getLoc(), "expected a counter name"); 5521 return false; 5522 } 5523 } 5524 5525 return true; 5526 } 5527 5528 OperandMatchResultTy 5529 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5530 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5531 int64_t Waitcnt = getWaitcntBitMask(ISA); 5532 SMLoc S = getLoc(); 5533 5534 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5535 while (!isToken(AsmToken::EndOfStatement)) { 5536 if (!parseCnt(Waitcnt)) 5537 return MatchOperand_ParseFail; 5538 } 5539 } else { 5540 if (!parseExpr(Waitcnt)) 5541 return MatchOperand_ParseFail; 5542 } 5543 5544 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5545 return MatchOperand_Success; 5546 } 5547 5548 bool 5549 AMDGPUOperand::isSWaitCnt() const { 5550 return isImm(); 5551 } 5552 5553 //===----------------------------------------------------------------------===// 5554 // hwreg 5555 //===----------------------------------------------------------------------===// 5556 5557 bool 5558 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5559 int64_t &Offset, 5560 int64_t &Width) { 5561 using namespace llvm::AMDGPU::Hwreg; 5562 5563 // The register may be specified by name or using a numeric code 5564 if (isToken(AsmToken::Identifier) && 5565 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5566 HwReg.IsSymbolic = true; 5567 lex(); // skip message name 5568 } else if (!parseExpr(HwReg.Id)) { 5569 return false; 5570 } 5571 5572 if (trySkipToken(AsmToken::RParen)) 5573 return true; 5574 5575 // parse optional params 5576 return 5577 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5578 parseExpr(Offset) && 5579 skipToken(AsmToken::Comma, "expected a comma") && 5580 parseExpr(Width) && 5581 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5582 } 5583 5584 bool 5585 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5586 const int64_t Offset, 5587 const int64_t Width, 5588 const SMLoc Loc) { 5589 5590 using namespace llvm::AMDGPU::Hwreg; 5591 5592 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5593 Error(Loc, "specified hardware register is not supported on this GPU"); 5594 return false; 5595 } else if (!isValidHwreg(HwReg.Id)) { 5596 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5597 return false; 5598 } else if (!isValidHwregOffset(Offset)) { 5599 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5600 return false; 5601 } else if (!isValidHwregWidth(Width)) { 5602 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5603 return false; 5604 } 5605 return true; 5606 } 5607 5608 OperandMatchResultTy 5609 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5610 using namespace llvm::AMDGPU::Hwreg; 5611 5612 int64_t ImmVal = 0; 5613 SMLoc Loc = getLoc(); 5614 5615 if (trySkipId("hwreg", AsmToken::LParen)) { 5616 OperandInfoTy HwReg(ID_UNKNOWN_); 5617 int64_t Offset = OFFSET_DEFAULT_; 5618 int64_t Width = WIDTH_DEFAULT_; 5619 if (parseHwregBody(HwReg, Offset, Width) && 5620 validateHwreg(HwReg, Offset, Width, Loc)) { 5621 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5622 } else { 5623 return MatchOperand_ParseFail; 5624 } 5625 } else if (parseExpr(ImmVal)) { 5626 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5627 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5628 return MatchOperand_ParseFail; 5629 } 5630 } else { 5631 return MatchOperand_ParseFail; 5632 } 5633 5634 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5635 return MatchOperand_Success; 5636 } 5637 5638 bool AMDGPUOperand::isHwreg() const { 5639 return isImmTy(ImmTyHwreg); 5640 } 5641 5642 //===----------------------------------------------------------------------===// 5643 // sendmsg 5644 //===----------------------------------------------------------------------===// 5645 5646 bool 5647 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5648 OperandInfoTy &Op, 5649 OperandInfoTy &Stream) { 5650 using namespace llvm::AMDGPU::SendMsg; 5651 5652 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5653 Msg.IsSymbolic = true; 5654 lex(); // skip message name 5655 } else if (!parseExpr(Msg.Id)) { 5656 return false; 5657 } 5658 5659 if (trySkipToken(AsmToken::Comma)) { 5660 Op.IsDefined = true; 5661 if (isToken(AsmToken::Identifier) && 5662 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5663 lex(); // skip operation name 5664 } else if (!parseExpr(Op.Id)) { 5665 return false; 5666 } 5667 5668 if (trySkipToken(AsmToken::Comma)) { 5669 Stream.IsDefined = true; 5670 if (!parseExpr(Stream.Id)) 5671 return false; 5672 } 5673 } 5674 5675 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5676 } 5677 5678 bool 5679 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5680 const OperandInfoTy &Op, 5681 const OperandInfoTy &Stream, 5682 const SMLoc S) { 5683 using namespace llvm::AMDGPU::SendMsg; 5684 5685 // Validation strictness depends on whether message is specified 5686 // in a symbolc or in a numeric form. In the latter case 5687 // only encoding possibility is checked. 5688 bool Strict = Msg.IsSymbolic; 5689 5690 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5691 Error(S, "invalid message id"); 5692 return false; 5693 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5694 Error(S, Op.IsDefined ? 5695 "message does not support operations" : 5696 "missing message operation"); 5697 return false; 5698 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5699 Error(S, "invalid operation id"); 5700 return false; 5701 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5702 Error(S, "message operation does not support streams"); 5703 return false; 5704 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5705 Error(S, "invalid message stream id"); 5706 return false; 5707 } 5708 return true; 5709 } 5710 5711 OperandMatchResultTy 5712 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5713 using namespace llvm::AMDGPU::SendMsg; 5714 5715 int64_t ImmVal = 0; 5716 SMLoc Loc = getLoc(); 5717 5718 if (trySkipId("sendmsg", AsmToken::LParen)) { 5719 OperandInfoTy Msg(ID_UNKNOWN_); 5720 OperandInfoTy Op(OP_NONE_); 5721 OperandInfoTy Stream(STREAM_ID_NONE_); 5722 if (parseSendMsgBody(Msg, Op, Stream) && 5723 validateSendMsg(Msg, Op, Stream, Loc)) { 5724 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5725 } else { 5726 return MatchOperand_ParseFail; 5727 } 5728 } else if (parseExpr(ImmVal)) { 5729 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5730 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5731 return MatchOperand_ParseFail; 5732 } 5733 } else { 5734 return MatchOperand_ParseFail; 5735 } 5736 5737 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5738 return MatchOperand_Success; 5739 } 5740 5741 bool AMDGPUOperand::isSendMsg() const { 5742 return isImmTy(ImmTySendMsg); 5743 } 5744 5745 //===----------------------------------------------------------------------===// 5746 // v_interp 5747 //===----------------------------------------------------------------------===// 5748 5749 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5750 if (getLexer().getKind() != AsmToken::Identifier) 5751 return MatchOperand_NoMatch; 5752 5753 StringRef Str = Parser.getTok().getString(); 5754 int Slot = StringSwitch<int>(Str) 5755 .Case("p10", 0) 5756 .Case("p20", 1) 5757 .Case("p0", 2) 5758 .Default(-1); 5759 5760 SMLoc S = Parser.getTok().getLoc(); 5761 if (Slot == -1) 5762 return MatchOperand_ParseFail; 5763 5764 Parser.Lex(); 5765 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5766 AMDGPUOperand::ImmTyInterpSlot)); 5767 return MatchOperand_Success; 5768 } 5769 5770 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5771 if (getLexer().getKind() != AsmToken::Identifier) 5772 return MatchOperand_NoMatch; 5773 5774 StringRef Str = Parser.getTok().getString(); 5775 if (!Str.startswith("attr")) 5776 return MatchOperand_NoMatch; 5777 5778 StringRef Chan = Str.take_back(2); 5779 int AttrChan = StringSwitch<int>(Chan) 5780 .Case(".x", 0) 5781 .Case(".y", 1) 5782 .Case(".z", 2) 5783 .Case(".w", 3) 5784 .Default(-1); 5785 if (AttrChan == -1) 5786 return MatchOperand_ParseFail; 5787 5788 Str = Str.drop_back(2).drop_front(4); 5789 5790 uint8_t Attr; 5791 if (Str.getAsInteger(10, Attr)) 5792 return MatchOperand_ParseFail; 5793 5794 SMLoc S = Parser.getTok().getLoc(); 5795 Parser.Lex(); 5796 if (Attr > 63) { 5797 Error(S, "out of bounds attr"); 5798 return MatchOperand_ParseFail; 5799 } 5800 5801 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5802 5803 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5804 AMDGPUOperand::ImmTyInterpAttr)); 5805 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5806 AMDGPUOperand::ImmTyAttrChan)); 5807 return MatchOperand_Success; 5808 } 5809 5810 //===----------------------------------------------------------------------===// 5811 // exp 5812 //===----------------------------------------------------------------------===// 5813 5814 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5815 uint8_t &Val) { 5816 if (Str == "null") { 5817 Val = 9; 5818 return MatchOperand_Success; 5819 } 5820 5821 if (Str.startswith("mrt")) { 5822 Str = Str.drop_front(3); 5823 if (Str == "z") { // == mrtz 5824 Val = 8; 5825 return MatchOperand_Success; 5826 } 5827 5828 if (Str.getAsInteger(10, Val)) 5829 return MatchOperand_ParseFail; 5830 5831 if (Val > 7) { 5832 Error(getLoc(), "invalid exp target"); 5833 return MatchOperand_ParseFail; 5834 } 5835 5836 return MatchOperand_Success; 5837 } 5838 5839 if (Str.startswith("pos")) { 5840 Str = Str.drop_front(3); 5841 if (Str.getAsInteger(10, Val)) 5842 return MatchOperand_ParseFail; 5843 5844 if (Val > 4 || (Val == 4 && !isGFX10())) { 5845 Error(getLoc(), "invalid exp target"); 5846 return MatchOperand_ParseFail; 5847 } 5848 5849 Val += 12; 5850 return MatchOperand_Success; 5851 } 5852 5853 if (isGFX10() && Str == "prim") { 5854 Val = 20; 5855 return MatchOperand_Success; 5856 } 5857 5858 if (Str.startswith("param")) { 5859 Str = Str.drop_front(5); 5860 if (Str.getAsInteger(10, Val)) 5861 return MatchOperand_ParseFail; 5862 5863 if (Val >= 32) { 5864 Error(getLoc(), "invalid exp target"); 5865 return MatchOperand_ParseFail; 5866 } 5867 5868 Val += 32; 5869 return MatchOperand_Success; 5870 } 5871 5872 if (Str.startswith("invalid_target_")) { 5873 Str = Str.drop_front(15); 5874 if (Str.getAsInteger(10, Val)) 5875 return MatchOperand_ParseFail; 5876 5877 Error(getLoc(), "invalid exp target"); 5878 return MatchOperand_ParseFail; 5879 } 5880 5881 return MatchOperand_NoMatch; 5882 } 5883 5884 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5885 uint8_t Val; 5886 StringRef Str = Parser.getTok().getString(); 5887 5888 auto Res = parseExpTgtImpl(Str, Val); 5889 if (Res != MatchOperand_Success) 5890 return Res; 5891 5892 SMLoc S = Parser.getTok().getLoc(); 5893 Parser.Lex(); 5894 5895 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5896 AMDGPUOperand::ImmTyExpTgt)); 5897 return MatchOperand_Success; 5898 } 5899 5900 //===----------------------------------------------------------------------===// 5901 // parser helpers 5902 //===----------------------------------------------------------------------===// 5903 5904 bool 5905 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5906 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5907 } 5908 5909 bool 5910 AMDGPUAsmParser::isId(const StringRef Id) const { 5911 return isId(getToken(), Id); 5912 } 5913 5914 bool 5915 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5916 return getTokenKind() == Kind; 5917 } 5918 5919 bool 5920 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5921 if (isId(Id)) { 5922 lex(); 5923 return true; 5924 } 5925 return false; 5926 } 5927 5928 bool 5929 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5930 if (isId(Id) && peekToken().is(Kind)) { 5931 lex(); 5932 lex(); 5933 return true; 5934 } 5935 return false; 5936 } 5937 5938 bool 5939 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5940 if (isToken(Kind)) { 5941 lex(); 5942 return true; 5943 } 5944 return false; 5945 } 5946 5947 bool 5948 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5949 const StringRef ErrMsg) { 5950 if (!trySkipToken(Kind)) { 5951 Error(getLoc(), ErrMsg); 5952 return false; 5953 } 5954 return true; 5955 } 5956 5957 bool 5958 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5959 return !getParser().parseAbsoluteExpression(Imm); 5960 } 5961 5962 bool 5963 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5964 SMLoc S = getLoc(); 5965 5966 const MCExpr *Expr; 5967 if (Parser.parseExpression(Expr)) 5968 return false; 5969 5970 int64_t IntVal; 5971 if (Expr->evaluateAsAbsolute(IntVal)) { 5972 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5973 } else { 5974 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5975 } 5976 return true; 5977 } 5978 5979 bool 5980 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5981 if (isToken(AsmToken::String)) { 5982 Val = getToken().getStringContents(); 5983 lex(); 5984 return true; 5985 } else { 5986 Error(getLoc(), ErrMsg); 5987 return false; 5988 } 5989 } 5990 5991 bool 5992 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5993 if (isToken(AsmToken::Identifier)) { 5994 Val = getTokenStr(); 5995 lex(); 5996 return true; 5997 } else { 5998 Error(getLoc(), ErrMsg); 5999 return false; 6000 } 6001 } 6002 6003 AsmToken 6004 AMDGPUAsmParser::getToken() const { 6005 return Parser.getTok(); 6006 } 6007 6008 AsmToken 6009 AMDGPUAsmParser::peekToken() { 6010 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6011 } 6012 6013 void 6014 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6015 auto TokCount = getLexer().peekTokens(Tokens); 6016 6017 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6018 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6019 } 6020 6021 AsmToken::TokenKind 6022 AMDGPUAsmParser::getTokenKind() const { 6023 return getLexer().getKind(); 6024 } 6025 6026 SMLoc 6027 AMDGPUAsmParser::getLoc() const { 6028 return getToken().getLoc(); 6029 } 6030 6031 StringRef 6032 AMDGPUAsmParser::getTokenStr() const { 6033 return getToken().getString(); 6034 } 6035 6036 void 6037 AMDGPUAsmParser::lex() { 6038 Parser.Lex(); 6039 } 6040 6041 //===----------------------------------------------------------------------===// 6042 // swizzle 6043 //===----------------------------------------------------------------------===// 6044 6045 LLVM_READNONE 6046 static unsigned 6047 encodeBitmaskPerm(const unsigned AndMask, 6048 const unsigned OrMask, 6049 const unsigned XorMask) { 6050 using namespace llvm::AMDGPU::Swizzle; 6051 6052 return BITMASK_PERM_ENC | 6053 (AndMask << BITMASK_AND_SHIFT) | 6054 (OrMask << BITMASK_OR_SHIFT) | 6055 (XorMask << BITMASK_XOR_SHIFT); 6056 } 6057 6058 bool 6059 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6060 const unsigned MinVal, 6061 const unsigned MaxVal, 6062 const StringRef ErrMsg) { 6063 for (unsigned i = 0; i < OpNum; ++i) { 6064 if (!skipToken(AsmToken::Comma, "expected a comma")){ 6065 return false; 6066 } 6067 SMLoc ExprLoc = Parser.getTok().getLoc(); 6068 if (!parseExpr(Op[i])) { 6069 return false; 6070 } 6071 if (Op[i] < MinVal || Op[i] > MaxVal) { 6072 Error(ExprLoc, ErrMsg); 6073 return false; 6074 } 6075 } 6076 6077 return true; 6078 } 6079 6080 bool 6081 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6082 using namespace llvm::AMDGPU::Swizzle; 6083 6084 int64_t Lane[LANE_NUM]; 6085 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6086 "expected a 2-bit lane id")) { 6087 Imm = QUAD_PERM_ENC; 6088 for (unsigned I = 0; I < LANE_NUM; ++I) { 6089 Imm |= Lane[I] << (LANE_SHIFT * I); 6090 } 6091 return true; 6092 } 6093 return false; 6094 } 6095 6096 bool 6097 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6098 using namespace llvm::AMDGPU::Swizzle; 6099 6100 SMLoc S = Parser.getTok().getLoc(); 6101 int64_t GroupSize; 6102 int64_t LaneIdx; 6103 6104 if (!parseSwizzleOperands(1, &GroupSize, 6105 2, 32, 6106 "group size must be in the interval [2,32]")) { 6107 return false; 6108 } 6109 if (!isPowerOf2_64(GroupSize)) { 6110 Error(S, "group size must be a power of two"); 6111 return false; 6112 } 6113 if (parseSwizzleOperands(1, &LaneIdx, 6114 0, GroupSize - 1, 6115 "lane id must be in the interval [0,group size - 1]")) { 6116 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6117 return true; 6118 } 6119 return false; 6120 } 6121 6122 bool 6123 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6124 using namespace llvm::AMDGPU::Swizzle; 6125 6126 SMLoc S = Parser.getTok().getLoc(); 6127 int64_t GroupSize; 6128 6129 if (!parseSwizzleOperands(1, &GroupSize, 6130 2, 32, "group size must be in the interval [2,32]")) { 6131 return false; 6132 } 6133 if (!isPowerOf2_64(GroupSize)) { 6134 Error(S, "group size must be a power of two"); 6135 return false; 6136 } 6137 6138 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6139 return true; 6140 } 6141 6142 bool 6143 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6144 using namespace llvm::AMDGPU::Swizzle; 6145 6146 SMLoc S = Parser.getTok().getLoc(); 6147 int64_t GroupSize; 6148 6149 if (!parseSwizzleOperands(1, &GroupSize, 6150 1, 16, "group size must be in the interval [1,16]")) { 6151 return false; 6152 } 6153 if (!isPowerOf2_64(GroupSize)) { 6154 Error(S, "group size must be a power of two"); 6155 return false; 6156 } 6157 6158 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6159 return true; 6160 } 6161 6162 bool 6163 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6164 using namespace llvm::AMDGPU::Swizzle; 6165 6166 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6167 return false; 6168 } 6169 6170 StringRef Ctl; 6171 SMLoc StrLoc = Parser.getTok().getLoc(); 6172 if (!parseString(Ctl)) { 6173 return false; 6174 } 6175 if (Ctl.size() != BITMASK_WIDTH) { 6176 Error(StrLoc, "expected a 5-character mask"); 6177 return false; 6178 } 6179 6180 unsigned AndMask = 0; 6181 unsigned OrMask = 0; 6182 unsigned XorMask = 0; 6183 6184 for (size_t i = 0; i < Ctl.size(); ++i) { 6185 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6186 switch(Ctl[i]) { 6187 default: 6188 Error(StrLoc, "invalid mask"); 6189 return false; 6190 case '0': 6191 break; 6192 case '1': 6193 OrMask |= Mask; 6194 break; 6195 case 'p': 6196 AndMask |= Mask; 6197 break; 6198 case 'i': 6199 AndMask |= Mask; 6200 XorMask |= Mask; 6201 break; 6202 } 6203 } 6204 6205 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6206 return true; 6207 } 6208 6209 bool 6210 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6211 6212 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6213 6214 if (!parseExpr(Imm)) { 6215 return false; 6216 } 6217 if (!isUInt<16>(Imm)) { 6218 Error(OffsetLoc, "expected a 16-bit offset"); 6219 return false; 6220 } 6221 return true; 6222 } 6223 6224 bool 6225 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6226 using namespace llvm::AMDGPU::Swizzle; 6227 6228 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6229 6230 SMLoc ModeLoc = Parser.getTok().getLoc(); 6231 bool Ok = false; 6232 6233 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6234 Ok = parseSwizzleQuadPerm(Imm); 6235 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6236 Ok = parseSwizzleBitmaskPerm(Imm); 6237 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6238 Ok = parseSwizzleBroadcast(Imm); 6239 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6240 Ok = parseSwizzleSwap(Imm); 6241 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6242 Ok = parseSwizzleReverse(Imm); 6243 } else { 6244 Error(ModeLoc, "expected a swizzle mode"); 6245 } 6246 6247 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6248 } 6249 6250 return false; 6251 } 6252 6253 OperandMatchResultTy 6254 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6255 SMLoc S = Parser.getTok().getLoc(); 6256 int64_t Imm = 0; 6257 6258 if (trySkipId("offset")) { 6259 6260 bool Ok = false; 6261 if (skipToken(AsmToken::Colon, "expected a colon")) { 6262 if (trySkipId("swizzle")) { 6263 Ok = parseSwizzleMacro(Imm); 6264 } else { 6265 Ok = parseSwizzleOffset(Imm); 6266 } 6267 } 6268 6269 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6270 6271 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6272 } else { 6273 // Swizzle "offset" operand is optional. 6274 // If it is omitted, try parsing other optional operands. 6275 return parseOptionalOpr(Operands); 6276 } 6277 } 6278 6279 bool 6280 AMDGPUOperand::isSwizzle() const { 6281 return isImmTy(ImmTySwizzle); 6282 } 6283 6284 //===----------------------------------------------------------------------===// 6285 // VGPR Index Mode 6286 //===----------------------------------------------------------------------===// 6287 6288 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6289 6290 using namespace llvm::AMDGPU::VGPRIndexMode; 6291 6292 if (trySkipToken(AsmToken::RParen)) { 6293 return OFF; 6294 } 6295 6296 int64_t Imm = 0; 6297 6298 while (true) { 6299 unsigned Mode = 0; 6300 SMLoc S = Parser.getTok().getLoc(); 6301 6302 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6303 if (trySkipId(IdSymbolic[ModeId])) { 6304 Mode = 1 << ModeId; 6305 break; 6306 } 6307 } 6308 6309 if (Mode == 0) { 6310 Error(S, (Imm == 0)? 6311 "expected a VGPR index mode or a closing parenthesis" : 6312 "expected a VGPR index mode"); 6313 return UNDEF; 6314 } 6315 6316 if (Imm & Mode) { 6317 Error(S, "duplicate VGPR index mode"); 6318 return UNDEF; 6319 } 6320 Imm |= Mode; 6321 6322 if (trySkipToken(AsmToken::RParen)) 6323 break; 6324 if (!skipToken(AsmToken::Comma, 6325 "expected a comma or a closing parenthesis")) 6326 return UNDEF; 6327 } 6328 6329 return Imm; 6330 } 6331 6332 OperandMatchResultTy 6333 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6334 6335 using namespace llvm::AMDGPU::VGPRIndexMode; 6336 6337 int64_t Imm = 0; 6338 SMLoc S = Parser.getTok().getLoc(); 6339 6340 if (getLexer().getKind() == AsmToken::Identifier && 6341 Parser.getTok().getString() == "gpr_idx" && 6342 getLexer().peekTok().is(AsmToken::LParen)) { 6343 6344 Parser.Lex(); 6345 Parser.Lex(); 6346 6347 Imm = parseGPRIdxMacro(); 6348 if (Imm == UNDEF) 6349 return MatchOperand_ParseFail; 6350 6351 } else { 6352 if (getParser().parseAbsoluteExpression(Imm)) 6353 return MatchOperand_ParseFail; 6354 if (Imm < 0 || !isUInt<4>(Imm)) { 6355 Error(S, "invalid immediate: only 4-bit values are legal"); 6356 return MatchOperand_ParseFail; 6357 } 6358 } 6359 6360 Operands.push_back( 6361 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6362 return MatchOperand_Success; 6363 } 6364 6365 bool AMDGPUOperand::isGPRIdxMode() const { 6366 return isImmTy(ImmTyGprIdxMode); 6367 } 6368 6369 //===----------------------------------------------------------------------===// 6370 // sopp branch targets 6371 //===----------------------------------------------------------------------===// 6372 6373 OperandMatchResultTy 6374 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6375 6376 // Make sure we are not parsing something 6377 // that looks like a label or an expression but is not. 6378 // This will improve error messages. 6379 if (isRegister() || isModifier()) 6380 return MatchOperand_NoMatch; 6381 6382 if (!parseExpr(Operands)) 6383 return MatchOperand_ParseFail; 6384 6385 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6386 assert(Opr.isImm() || Opr.isExpr()); 6387 SMLoc Loc = Opr.getStartLoc(); 6388 6389 // Currently we do not support arbitrary expressions as branch targets. 6390 // Only labels and absolute expressions are accepted. 6391 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6392 Error(Loc, "expected an absolute expression or a label"); 6393 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6394 Error(Loc, "expected a 16-bit signed jump offset"); 6395 } 6396 6397 return MatchOperand_Success; 6398 } 6399 6400 //===----------------------------------------------------------------------===// 6401 // Boolean holding registers 6402 //===----------------------------------------------------------------------===// 6403 6404 OperandMatchResultTy 6405 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6406 return parseReg(Operands); 6407 } 6408 6409 //===----------------------------------------------------------------------===// 6410 // mubuf 6411 //===----------------------------------------------------------------------===// 6412 6413 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6414 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6415 } 6416 6417 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6418 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6419 } 6420 6421 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6422 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6423 } 6424 6425 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6426 const OperandVector &Operands, 6427 bool IsAtomic, 6428 bool IsAtomicReturn, 6429 bool IsLds) { 6430 bool IsLdsOpcode = IsLds; 6431 bool HasLdsModifier = false; 6432 OptionalImmIndexMap OptionalIdx; 6433 assert(IsAtomicReturn ? IsAtomic : true); 6434 unsigned FirstOperandIdx = 1; 6435 6436 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6437 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6438 6439 // Add the register arguments 6440 if (Op.isReg()) { 6441 Op.addRegOperands(Inst, 1); 6442 // Insert a tied src for atomic return dst. 6443 // This cannot be postponed as subsequent calls to 6444 // addImmOperands rely on correct number of MC operands. 6445 if (IsAtomicReturn && i == FirstOperandIdx) 6446 Op.addRegOperands(Inst, 1); 6447 continue; 6448 } 6449 6450 // Handle the case where soffset is an immediate 6451 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6452 Op.addImmOperands(Inst, 1); 6453 continue; 6454 } 6455 6456 HasLdsModifier |= Op.isLDS(); 6457 6458 // Handle tokens like 'offen' which are sometimes hard-coded into the 6459 // asm string. There are no MCInst operands for these. 6460 if (Op.isToken()) { 6461 continue; 6462 } 6463 assert(Op.isImm()); 6464 6465 // Handle optional arguments 6466 OptionalIdx[Op.getImmTy()] = i; 6467 } 6468 6469 // This is a workaround for an llvm quirk which may result in an 6470 // incorrect instruction selection. Lds and non-lds versions of 6471 // MUBUF instructions are identical except that lds versions 6472 // have mandatory 'lds' modifier. However this modifier follows 6473 // optional modifiers and llvm asm matcher regards this 'lds' 6474 // modifier as an optional one. As a result, an lds version 6475 // of opcode may be selected even if it has no 'lds' modifier. 6476 if (IsLdsOpcode && !HasLdsModifier) { 6477 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6478 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6479 Inst.setOpcode(NoLdsOpcode); 6480 IsLdsOpcode = false; 6481 } 6482 } 6483 6484 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6485 if (!IsAtomic) { // glc is hard-coded. 6486 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6487 } 6488 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6489 6490 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6492 } 6493 6494 if (isGFX10()) 6495 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6496 } 6497 6498 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6499 OptionalImmIndexMap OptionalIdx; 6500 6501 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6502 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6503 6504 // Add the register arguments 6505 if (Op.isReg()) { 6506 Op.addRegOperands(Inst, 1); 6507 continue; 6508 } 6509 6510 // Handle the case where soffset is an immediate 6511 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6512 Op.addImmOperands(Inst, 1); 6513 continue; 6514 } 6515 6516 // Handle tokens like 'offen' which are sometimes hard-coded into the 6517 // asm string. There are no MCInst operands for these. 6518 if (Op.isToken()) { 6519 continue; 6520 } 6521 assert(Op.isImm()); 6522 6523 // Handle optional arguments 6524 OptionalIdx[Op.getImmTy()] = i; 6525 } 6526 6527 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6528 AMDGPUOperand::ImmTyOffset); 6529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6533 6534 if (isGFX10()) 6535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6536 } 6537 6538 //===----------------------------------------------------------------------===// 6539 // mimg 6540 //===----------------------------------------------------------------------===// 6541 6542 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6543 bool IsAtomic) { 6544 unsigned I = 1; 6545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6546 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6547 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6548 } 6549 6550 if (IsAtomic) { 6551 // Add src, same as dst 6552 assert(Desc.getNumDefs() == 1); 6553 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6554 } 6555 6556 OptionalImmIndexMap OptionalIdx; 6557 6558 for (unsigned E = Operands.size(); I != E; ++I) { 6559 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6560 6561 // Add the register arguments 6562 if (Op.isReg()) { 6563 Op.addRegOperands(Inst, 1); 6564 } else if (Op.isImmModifier()) { 6565 OptionalIdx[Op.getImmTy()] = I; 6566 } else if (!Op.isToken()) { 6567 llvm_unreachable("unexpected operand type"); 6568 } 6569 } 6570 6571 bool IsGFX10 = isGFX10(); 6572 6573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6574 if (IsGFX10) 6575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6577 if (IsGFX10) 6578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6582 if (IsGFX10) 6583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6584 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6586 if (!IsGFX10) 6587 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6589 } 6590 6591 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6592 cvtMIMG(Inst, Operands, true); 6593 } 6594 6595 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6596 const OperandVector &Operands) { 6597 for (unsigned I = 1; I < Operands.size(); ++I) { 6598 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6599 if (Operand.isReg()) 6600 Operand.addRegOperands(Inst, 1); 6601 } 6602 6603 Inst.addOperand(MCOperand::createImm(1)); // a16 6604 } 6605 6606 //===----------------------------------------------------------------------===// 6607 // smrd 6608 //===----------------------------------------------------------------------===// 6609 6610 bool AMDGPUOperand::isSMRDOffset8() const { 6611 return isImm() && isUInt<8>(getImm()); 6612 } 6613 6614 bool AMDGPUOperand::isSMEMOffset() const { 6615 return isImm(); // Offset range is checked later by validator. 6616 } 6617 6618 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6619 // 32-bit literals are only supported on CI and we only want to use them 6620 // when the offset is > 8-bits. 6621 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6622 } 6623 6624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6625 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6626 } 6627 6628 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6629 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6630 } 6631 6632 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6633 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6634 } 6635 6636 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6637 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6638 } 6639 6640 //===----------------------------------------------------------------------===// 6641 // vop3 6642 //===----------------------------------------------------------------------===// 6643 6644 static bool ConvertOmodMul(int64_t &Mul) { 6645 if (Mul != 1 && Mul != 2 && Mul != 4) 6646 return false; 6647 6648 Mul >>= 1; 6649 return true; 6650 } 6651 6652 static bool ConvertOmodDiv(int64_t &Div) { 6653 if (Div == 1) { 6654 Div = 0; 6655 return true; 6656 } 6657 6658 if (Div == 2) { 6659 Div = 3; 6660 return true; 6661 } 6662 6663 return false; 6664 } 6665 6666 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6667 if (BoundCtrl == 0) { 6668 BoundCtrl = 1; 6669 return true; 6670 } 6671 6672 if (BoundCtrl == -1) { 6673 BoundCtrl = 0; 6674 return true; 6675 } 6676 6677 return false; 6678 } 6679 6680 // Note: the order in this table matches the order of operands in AsmString. 6681 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6682 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6683 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6684 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6685 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6686 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6687 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6688 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6689 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6690 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6691 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6692 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6693 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6694 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6695 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6696 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6697 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6698 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6699 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6700 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6701 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6702 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6703 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6704 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6705 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6706 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6707 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6708 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6709 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6710 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6711 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6712 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6713 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6714 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6715 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6716 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6717 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6718 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6719 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6720 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6721 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6722 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6723 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6724 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6725 }; 6726 6727 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6728 6729 OperandMatchResultTy res = parseOptionalOpr(Operands); 6730 6731 // This is a hack to enable hardcoded mandatory operands which follow 6732 // optional operands. 6733 // 6734 // Current design assumes that all operands after the first optional operand 6735 // are also optional. However implementation of some instructions violates 6736 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6737 // 6738 // To alleviate this problem, we have to (implicitly) parse extra operands 6739 // to make sure autogenerated parser of custom operands never hit hardcoded 6740 // mandatory operands. 6741 6742 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6743 if (res != MatchOperand_Success || 6744 isToken(AsmToken::EndOfStatement)) 6745 break; 6746 6747 trySkipToken(AsmToken::Comma); 6748 res = parseOptionalOpr(Operands); 6749 } 6750 6751 return res; 6752 } 6753 6754 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6755 OperandMatchResultTy res; 6756 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6757 // try to parse any optional operand here 6758 if (Op.IsBit) { 6759 res = parseNamedBit(Op.Name, Operands, Op.Type); 6760 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6761 res = parseOModOperand(Operands); 6762 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6763 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6764 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6765 res = parseSDWASel(Operands, Op.Name, Op.Type); 6766 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6767 res = parseSDWADstUnused(Operands); 6768 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6769 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6770 Op.Type == AMDGPUOperand::ImmTyNegLo || 6771 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6772 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6773 Op.ConvertResult); 6774 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6775 res = parseDim(Operands); 6776 } else { 6777 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6778 } 6779 if (res != MatchOperand_NoMatch) { 6780 return res; 6781 } 6782 } 6783 return MatchOperand_NoMatch; 6784 } 6785 6786 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6787 StringRef Name = Parser.getTok().getString(); 6788 if (Name == "mul") { 6789 return parseIntWithPrefix("mul", Operands, 6790 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6791 } 6792 6793 if (Name == "div") { 6794 return parseIntWithPrefix("div", Operands, 6795 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6796 } 6797 6798 return MatchOperand_NoMatch; 6799 } 6800 6801 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6802 cvtVOP3P(Inst, Operands); 6803 6804 int Opc = Inst.getOpcode(); 6805 6806 int SrcNum; 6807 const int Ops[] = { AMDGPU::OpName::src0, 6808 AMDGPU::OpName::src1, 6809 AMDGPU::OpName::src2 }; 6810 for (SrcNum = 0; 6811 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6812 ++SrcNum); 6813 assert(SrcNum > 0); 6814 6815 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6816 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6817 6818 if ((OpSel & (1 << SrcNum)) != 0) { 6819 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6820 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6821 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6822 } 6823 } 6824 6825 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6826 // 1. This operand is input modifiers 6827 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6828 // 2. This is not last operand 6829 && Desc.NumOperands > (OpNum + 1) 6830 // 3. Next operand is register class 6831 && Desc.OpInfo[OpNum + 1].RegClass != -1 6832 // 4. Next register is not tied to any other operand 6833 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6834 } 6835 6836 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6837 { 6838 OptionalImmIndexMap OptionalIdx; 6839 unsigned Opc = Inst.getOpcode(); 6840 6841 unsigned I = 1; 6842 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6843 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6844 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6845 } 6846 6847 for (unsigned E = Operands.size(); I != E; ++I) { 6848 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6849 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6850 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6851 } else if (Op.isInterpSlot() || 6852 Op.isInterpAttr() || 6853 Op.isAttrChan()) { 6854 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6855 } else if (Op.isImmModifier()) { 6856 OptionalIdx[Op.getImmTy()] = I; 6857 } else { 6858 llvm_unreachable("unhandled operand type"); 6859 } 6860 } 6861 6862 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6863 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6864 } 6865 6866 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6867 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6868 } 6869 6870 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6872 } 6873 } 6874 6875 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6876 OptionalImmIndexMap &OptionalIdx) { 6877 unsigned Opc = Inst.getOpcode(); 6878 6879 unsigned I = 1; 6880 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6881 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6882 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6883 } 6884 6885 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6886 // This instruction has src modifiers 6887 for (unsigned E = Operands.size(); I != E; ++I) { 6888 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6889 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6890 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6891 } else if (Op.isImmModifier()) { 6892 OptionalIdx[Op.getImmTy()] = I; 6893 } else if (Op.isRegOrImm()) { 6894 Op.addRegOrImmOperands(Inst, 1); 6895 } else { 6896 llvm_unreachable("unhandled operand type"); 6897 } 6898 } 6899 } else { 6900 // No src modifiers 6901 for (unsigned E = Operands.size(); I != E; ++I) { 6902 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6903 if (Op.isMod()) { 6904 OptionalIdx[Op.getImmTy()] = I; 6905 } else { 6906 Op.addRegOrImmOperands(Inst, 1); 6907 } 6908 } 6909 } 6910 6911 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6913 } 6914 6915 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6916 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6917 } 6918 6919 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6920 // it has src2 register operand that is tied to dst operand 6921 // we don't allow modifiers for this operand in assembler so src2_modifiers 6922 // should be 0. 6923 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6924 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6925 Opc == AMDGPU::V_MAC_F32_e64_vi || 6926 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 6927 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 6928 Opc == AMDGPU::V_MAC_F16_e64_vi || 6929 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6930 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6931 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 6932 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6933 auto it = Inst.begin(); 6934 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6935 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6936 ++it; 6937 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6938 } 6939 } 6940 6941 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6942 OptionalImmIndexMap OptionalIdx; 6943 cvtVOP3(Inst, Operands, OptionalIdx); 6944 } 6945 6946 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6947 const OperandVector &Operands) { 6948 OptionalImmIndexMap OptIdx; 6949 const int Opc = Inst.getOpcode(); 6950 const MCInstrDesc &Desc = MII.get(Opc); 6951 6952 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6953 6954 cvtVOP3(Inst, Operands, OptIdx); 6955 6956 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6957 assert(!IsPacked); 6958 Inst.addOperand(Inst.getOperand(0)); 6959 } 6960 6961 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6962 // instruction, and then figure out where to actually put the modifiers 6963 6964 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6965 6966 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6967 if (OpSelHiIdx != -1) { 6968 int DefaultVal = IsPacked ? -1 : 0; 6969 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6970 DefaultVal); 6971 } 6972 6973 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6974 if (NegLoIdx != -1) { 6975 assert(IsPacked); 6976 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6977 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6978 } 6979 6980 const int Ops[] = { AMDGPU::OpName::src0, 6981 AMDGPU::OpName::src1, 6982 AMDGPU::OpName::src2 }; 6983 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6984 AMDGPU::OpName::src1_modifiers, 6985 AMDGPU::OpName::src2_modifiers }; 6986 6987 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6988 6989 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6990 unsigned OpSelHi = 0; 6991 unsigned NegLo = 0; 6992 unsigned NegHi = 0; 6993 6994 if (OpSelHiIdx != -1) { 6995 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6996 } 6997 6998 if (NegLoIdx != -1) { 6999 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7000 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7001 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7002 } 7003 7004 for (int J = 0; J < 3; ++J) { 7005 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7006 if (OpIdx == -1) 7007 break; 7008 7009 uint32_t ModVal = 0; 7010 7011 if ((OpSel & (1 << J)) != 0) 7012 ModVal |= SISrcMods::OP_SEL_0; 7013 7014 if ((OpSelHi & (1 << J)) != 0) 7015 ModVal |= SISrcMods::OP_SEL_1; 7016 7017 if ((NegLo & (1 << J)) != 0) 7018 ModVal |= SISrcMods::NEG; 7019 7020 if ((NegHi & (1 << J)) != 0) 7021 ModVal |= SISrcMods::NEG_HI; 7022 7023 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7024 7025 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7026 } 7027 } 7028 7029 //===----------------------------------------------------------------------===// 7030 // dpp 7031 //===----------------------------------------------------------------------===// 7032 7033 bool AMDGPUOperand::isDPP8() const { 7034 return isImmTy(ImmTyDPP8); 7035 } 7036 7037 bool AMDGPUOperand::isDPPCtrl() const { 7038 using namespace AMDGPU::DPP; 7039 7040 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7041 if (result) { 7042 int64_t Imm = getImm(); 7043 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7044 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7045 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7046 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7047 (Imm == DppCtrl::WAVE_SHL1) || 7048 (Imm == DppCtrl::WAVE_ROL1) || 7049 (Imm == DppCtrl::WAVE_SHR1) || 7050 (Imm == DppCtrl::WAVE_ROR1) || 7051 (Imm == DppCtrl::ROW_MIRROR) || 7052 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7053 (Imm == DppCtrl::BCAST15) || 7054 (Imm == DppCtrl::BCAST31) || 7055 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7056 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7057 } 7058 return false; 7059 } 7060 7061 //===----------------------------------------------------------------------===// 7062 // mAI 7063 //===----------------------------------------------------------------------===// 7064 7065 bool AMDGPUOperand::isBLGP() const { 7066 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7067 } 7068 7069 bool AMDGPUOperand::isCBSZ() const { 7070 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7071 } 7072 7073 bool AMDGPUOperand::isABID() const { 7074 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7075 } 7076 7077 bool AMDGPUOperand::isS16Imm() const { 7078 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7079 } 7080 7081 bool AMDGPUOperand::isU16Imm() const { 7082 return isImm() && isUInt<16>(getImm()); 7083 } 7084 7085 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7086 if (!isGFX10()) 7087 return MatchOperand_NoMatch; 7088 7089 SMLoc S = Parser.getTok().getLoc(); 7090 7091 if (getLexer().isNot(AsmToken::Identifier)) 7092 return MatchOperand_NoMatch; 7093 if (getLexer().getTok().getString() != "dim") 7094 return MatchOperand_NoMatch; 7095 7096 Parser.Lex(); 7097 if (getLexer().isNot(AsmToken::Colon)) 7098 return MatchOperand_ParseFail; 7099 7100 Parser.Lex(); 7101 7102 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7103 // integer. 7104 std::string Token; 7105 if (getLexer().is(AsmToken::Integer)) { 7106 SMLoc Loc = getLexer().getTok().getEndLoc(); 7107 Token = std::string(getLexer().getTok().getString()); 7108 Parser.Lex(); 7109 if (getLexer().getTok().getLoc() != Loc) 7110 return MatchOperand_ParseFail; 7111 } 7112 if (getLexer().isNot(AsmToken::Identifier)) 7113 return MatchOperand_ParseFail; 7114 Token += getLexer().getTok().getString(); 7115 7116 StringRef DimId = Token; 7117 if (DimId.startswith("SQ_RSRC_IMG_")) 7118 DimId = DimId.substr(12); 7119 7120 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7121 if (!DimInfo) 7122 return MatchOperand_ParseFail; 7123 7124 Parser.Lex(); 7125 7126 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7127 AMDGPUOperand::ImmTyDim)); 7128 return MatchOperand_Success; 7129 } 7130 7131 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7132 SMLoc S = Parser.getTok().getLoc(); 7133 StringRef Prefix; 7134 7135 if (getLexer().getKind() == AsmToken::Identifier) { 7136 Prefix = Parser.getTok().getString(); 7137 } else { 7138 return MatchOperand_NoMatch; 7139 } 7140 7141 if (Prefix != "dpp8") 7142 return parseDPPCtrl(Operands); 7143 if (!isGFX10()) 7144 return MatchOperand_NoMatch; 7145 7146 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7147 7148 int64_t Sels[8]; 7149 7150 Parser.Lex(); 7151 if (getLexer().isNot(AsmToken::Colon)) 7152 return MatchOperand_ParseFail; 7153 7154 Parser.Lex(); 7155 if (getLexer().isNot(AsmToken::LBrac)) 7156 return MatchOperand_ParseFail; 7157 7158 Parser.Lex(); 7159 if (getParser().parseAbsoluteExpression(Sels[0])) 7160 return MatchOperand_ParseFail; 7161 if (0 > Sels[0] || 7 < Sels[0]) 7162 return MatchOperand_ParseFail; 7163 7164 for (size_t i = 1; i < 8; ++i) { 7165 if (getLexer().isNot(AsmToken::Comma)) 7166 return MatchOperand_ParseFail; 7167 7168 Parser.Lex(); 7169 if (getParser().parseAbsoluteExpression(Sels[i])) 7170 return MatchOperand_ParseFail; 7171 if (0 > Sels[i] || 7 < Sels[i]) 7172 return MatchOperand_ParseFail; 7173 } 7174 7175 if (getLexer().isNot(AsmToken::RBrac)) 7176 return MatchOperand_ParseFail; 7177 Parser.Lex(); 7178 7179 unsigned DPP8 = 0; 7180 for (size_t i = 0; i < 8; ++i) 7181 DPP8 |= (Sels[i] << (i * 3)); 7182 7183 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7184 return MatchOperand_Success; 7185 } 7186 7187 OperandMatchResultTy 7188 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7189 using namespace AMDGPU::DPP; 7190 7191 SMLoc S = Parser.getTok().getLoc(); 7192 StringRef Prefix; 7193 int64_t Int; 7194 7195 if (getLexer().getKind() == AsmToken::Identifier) { 7196 Prefix = Parser.getTok().getString(); 7197 } else { 7198 return MatchOperand_NoMatch; 7199 } 7200 7201 if (Prefix == "row_mirror") { 7202 Int = DppCtrl::ROW_MIRROR; 7203 Parser.Lex(); 7204 } else if (Prefix == "row_half_mirror") { 7205 Int = DppCtrl::ROW_HALF_MIRROR; 7206 Parser.Lex(); 7207 } else { 7208 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7209 if (Prefix != "quad_perm" 7210 && Prefix != "row_shl" 7211 && Prefix != "row_shr" 7212 && Prefix != "row_ror" 7213 && Prefix != "wave_shl" 7214 && Prefix != "wave_rol" 7215 && Prefix != "wave_shr" 7216 && Prefix != "wave_ror" 7217 && Prefix != "row_bcast" 7218 && Prefix != "row_share" 7219 && Prefix != "row_xmask") { 7220 return MatchOperand_NoMatch; 7221 } 7222 7223 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7224 return MatchOperand_NoMatch; 7225 7226 if (!isVI() && !isGFX9() && 7227 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7228 Prefix == "wave_rol" || Prefix == "wave_ror" || 7229 Prefix == "row_bcast")) 7230 return MatchOperand_NoMatch; 7231 7232 Parser.Lex(); 7233 if (getLexer().isNot(AsmToken::Colon)) 7234 return MatchOperand_ParseFail; 7235 7236 if (Prefix == "quad_perm") { 7237 // quad_perm:[%d,%d,%d,%d] 7238 Parser.Lex(); 7239 if (getLexer().isNot(AsmToken::LBrac)) 7240 return MatchOperand_ParseFail; 7241 Parser.Lex(); 7242 7243 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7244 return MatchOperand_ParseFail; 7245 7246 for (int i = 0; i < 3; ++i) { 7247 if (getLexer().isNot(AsmToken::Comma)) 7248 return MatchOperand_ParseFail; 7249 Parser.Lex(); 7250 7251 int64_t Temp; 7252 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7253 return MatchOperand_ParseFail; 7254 const int shift = i*2 + 2; 7255 Int += (Temp << shift); 7256 } 7257 7258 if (getLexer().isNot(AsmToken::RBrac)) 7259 return MatchOperand_ParseFail; 7260 Parser.Lex(); 7261 } else { 7262 // sel:%d 7263 Parser.Lex(); 7264 if (getParser().parseAbsoluteExpression(Int)) 7265 return MatchOperand_ParseFail; 7266 7267 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7268 Int |= DppCtrl::ROW_SHL0; 7269 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7270 Int |= DppCtrl::ROW_SHR0; 7271 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7272 Int |= DppCtrl::ROW_ROR0; 7273 } else if (Prefix == "wave_shl" && 1 == Int) { 7274 Int = DppCtrl::WAVE_SHL1; 7275 } else if (Prefix == "wave_rol" && 1 == Int) { 7276 Int = DppCtrl::WAVE_ROL1; 7277 } else if (Prefix == "wave_shr" && 1 == Int) { 7278 Int = DppCtrl::WAVE_SHR1; 7279 } else if (Prefix == "wave_ror" && 1 == Int) { 7280 Int = DppCtrl::WAVE_ROR1; 7281 } else if (Prefix == "row_bcast") { 7282 if (Int == 15) { 7283 Int = DppCtrl::BCAST15; 7284 } else if (Int == 31) { 7285 Int = DppCtrl::BCAST31; 7286 } else { 7287 return MatchOperand_ParseFail; 7288 } 7289 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7290 Int |= DppCtrl::ROW_SHARE_FIRST; 7291 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7292 Int |= DppCtrl::ROW_XMASK_FIRST; 7293 } else { 7294 return MatchOperand_ParseFail; 7295 } 7296 } 7297 } 7298 7299 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7300 return MatchOperand_Success; 7301 } 7302 7303 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7304 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7305 } 7306 7307 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7308 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7309 } 7310 7311 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7312 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7313 } 7314 7315 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7316 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7317 } 7318 7319 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7320 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7321 } 7322 7323 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7324 OptionalImmIndexMap OptionalIdx; 7325 7326 unsigned I = 1; 7327 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7328 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7329 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7330 } 7331 7332 int Fi = 0; 7333 for (unsigned E = Operands.size(); I != E; ++I) { 7334 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7335 MCOI::TIED_TO); 7336 if (TiedTo != -1) { 7337 assert((unsigned)TiedTo < Inst.getNumOperands()); 7338 // handle tied old or src2 for MAC instructions 7339 Inst.addOperand(Inst.getOperand(TiedTo)); 7340 } 7341 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7342 // Add the register arguments 7343 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7344 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7345 // Skip it. 7346 continue; 7347 } 7348 7349 if (IsDPP8) { 7350 if (Op.isDPP8()) { 7351 Op.addImmOperands(Inst, 1); 7352 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7353 Op.addRegWithFPInputModsOperands(Inst, 2); 7354 } else if (Op.isFI()) { 7355 Fi = Op.getImm(); 7356 } else if (Op.isReg()) { 7357 Op.addRegOperands(Inst, 1); 7358 } else { 7359 llvm_unreachable("Invalid operand type"); 7360 } 7361 } else { 7362 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7363 Op.addRegWithFPInputModsOperands(Inst, 2); 7364 } else if (Op.isDPPCtrl()) { 7365 Op.addImmOperands(Inst, 1); 7366 } else if (Op.isImm()) { 7367 // Handle optional arguments 7368 OptionalIdx[Op.getImmTy()] = I; 7369 } else { 7370 llvm_unreachable("Invalid operand type"); 7371 } 7372 } 7373 } 7374 7375 if (IsDPP8) { 7376 using namespace llvm::AMDGPU::DPP; 7377 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7378 } else { 7379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7380 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7381 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7382 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7384 } 7385 } 7386 } 7387 7388 //===----------------------------------------------------------------------===// 7389 // sdwa 7390 //===----------------------------------------------------------------------===// 7391 7392 OperandMatchResultTy 7393 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7394 AMDGPUOperand::ImmTy Type) { 7395 using namespace llvm::AMDGPU::SDWA; 7396 7397 SMLoc S = Parser.getTok().getLoc(); 7398 StringRef Value; 7399 OperandMatchResultTy res; 7400 7401 res = parseStringWithPrefix(Prefix, Value); 7402 if (res != MatchOperand_Success) { 7403 return res; 7404 } 7405 7406 int64_t Int; 7407 Int = StringSwitch<int64_t>(Value) 7408 .Case("BYTE_0", SdwaSel::BYTE_0) 7409 .Case("BYTE_1", SdwaSel::BYTE_1) 7410 .Case("BYTE_2", SdwaSel::BYTE_2) 7411 .Case("BYTE_3", SdwaSel::BYTE_3) 7412 .Case("WORD_0", SdwaSel::WORD_0) 7413 .Case("WORD_1", SdwaSel::WORD_1) 7414 .Case("DWORD", SdwaSel::DWORD) 7415 .Default(0xffffffff); 7416 Parser.Lex(); // eat last token 7417 7418 if (Int == 0xffffffff) { 7419 return MatchOperand_ParseFail; 7420 } 7421 7422 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7423 return MatchOperand_Success; 7424 } 7425 7426 OperandMatchResultTy 7427 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7428 using namespace llvm::AMDGPU::SDWA; 7429 7430 SMLoc S = Parser.getTok().getLoc(); 7431 StringRef Value; 7432 OperandMatchResultTy res; 7433 7434 res = parseStringWithPrefix("dst_unused", Value); 7435 if (res != MatchOperand_Success) { 7436 return res; 7437 } 7438 7439 int64_t Int; 7440 Int = StringSwitch<int64_t>(Value) 7441 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7442 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7443 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7444 .Default(0xffffffff); 7445 Parser.Lex(); // eat last token 7446 7447 if (Int == 0xffffffff) { 7448 return MatchOperand_ParseFail; 7449 } 7450 7451 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7452 return MatchOperand_Success; 7453 } 7454 7455 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7456 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7457 } 7458 7459 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7460 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7461 } 7462 7463 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7464 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7465 } 7466 7467 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7468 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7469 } 7470 7471 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7472 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7473 } 7474 7475 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7476 uint64_t BasicInstType, 7477 bool SkipDstVcc, 7478 bool SkipSrcVcc) { 7479 using namespace llvm::AMDGPU::SDWA; 7480 7481 OptionalImmIndexMap OptionalIdx; 7482 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7483 bool SkippedVcc = false; 7484 7485 unsigned I = 1; 7486 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7487 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7488 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7489 } 7490 7491 for (unsigned E = Operands.size(); I != E; ++I) { 7492 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7493 if (SkipVcc && !SkippedVcc && Op.isReg() && 7494 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7495 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7496 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7497 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7498 // Skip VCC only if we didn't skip it on previous iteration. 7499 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7500 if (BasicInstType == SIInstrFlags::VOP2 && 7501 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7502 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7503 SkippedVcc = true; 7504 continue; 7505 } else if (BasicInstType == SIInstrFlags::VOPC && 7506 Inst.getNumOperands() == 0) { 7507 SkippedVcc = true; 7508 continue; 7509 } 7510 } 7511 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7512 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7513 } else if (Op.isImm()) { 7514 // Handle optional arguments 7515 OptionalIdx[Op.getImmTy()] = I; 7516 } else { 7517 llvm_unreachable("Invalid operand type"); 7518 } 7519 SkippedVcc = false; 7520 } 7521 7522 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7523 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7524 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7525 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7526 switch (BasicInstType) { 7527 case SIInstrFlags::VOP1: 7528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7529 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7531 } 7532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7533 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7535 break; 7536 7537 case SIInstrFlags::VOP2: 7538 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7539 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7540 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7541 } 7542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7544 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7546 break; 7547 7548 case SIInstrFlags::VOPC: 7549 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7553 break; 7554 7555 default: 7556 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7557 } 7558 } 7559 7560 // special case v_mac_{f16, f32}: 7561 // it has src2 register operand that is tied to dst operand 7562 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7563 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7564 auto it = Inst.begin(); 7565 std::advance( 7566 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7567 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7568 } 7569 } 7570 7571 //===----------------------------------------------------------------------===// 7572 // mAI 7573 //===----------------------------------------------------------------------===// 7574 7575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7576 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7577 } 7578 7579 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7580 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7581 } 7582 7583 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7584 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7585 } 7586 7587 /// Force static initialization. 7588 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7589 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7590 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7591 } 7592 7593 #define GET_REGISTER_MATCHER 7594 #define GET_MATCHER_IMPLEMENTATION 7595 #define GET_MNEMONIC_SPELL_CHECKER 7596 #define GET_MNEMONIC_CHECKER 7597 #include "AMDGPUGenAsmMatcher.inc" 7598 7599 // This fuction should be defined after auto-generated include so that we have 7600 // MatchClassKind enum defined 7601 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7602 unsigned Kind) { 7603 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7604 // But MatchInstructionImpl() expects to meet token and fails to validate 7605 // operand. This method checks if we are given immediate operand but expect to 7606 // get corresponding token. 7607 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7608 switch (Kind) { 7609 case MCK_addr64: 7610 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7611 case MCK_gds: 7612 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7613 case MCK_lds: 7614 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7615 case MCK_glc: 7616 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7617 case MCK_idxen: 7618 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7619 case MCK_offen: 7620 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7621 case MCK_SSrcB32: 7622 // When operands have expression values, they will return true for isToken, 7623 // because it is not possible to distinguish between a token and an 7624 // expression at parse time. MatchInstructionImpl() will always try to 7625 // match an operand as a token, when isToken returns true, and when the 7626 // name of the expression is not a valid token, the match will fail, 7627 // so we need to handle it here. 7628 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7629 case MCK_SSrcF32: 7630 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7631 case MCK_SoppBrTarget: 7632 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7633 case MCK_VReg32OrOff: 7634 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7635 case MCK_InterpSlot: 7636 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7637 case MCK_Attr: 7638 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7639 case MCK_AttrChan: 7640 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7641 case MCK_ImmSMEMOffset: 7642 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7643 case MCK_SReg_64: 7644 case MCK_SReg_64_XEXEC: 7645 // Null is defined as a 32-bit register but 7646 // it should also be enabled with 64-bit operands. 7647 // The following code enables it for SReg_64 operands 7648 // used as source and destination. Remaining source 7649 // operands are handled in isInlinableImm. 7650 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7651 default: 7652 return Match_InvalidOperand; 7653 } 7654 } 7655 7656 //===----------------------------------------------------------------------===// 7657 // endpgm 7658 //===----------------------------------------------------------------------===// 7659 7660 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7661 SMLoc S = Parser.getTok().getLoc(); 7662 int64_t Imm = 0; 7663 7664 if (!parseExpr(Imm)) { 7665 // The operand is optional, if not present default to 0 7666 Imm = 0; 7667 } 7668 7669 if (!isUInt<16>(Imm)) { 7670 Error(S, "expected a 16-bit value"); 7671 return MatchOperand_ParseFail; 7672 } 7673 7674 Operands.push_back( 7675 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7676 return MatchOperand_Success; 7677 } 7678 7679 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7680