1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX10() const { 1194 return AMDGPU::isGFX10(getSTI()); 1195 } 1196 1197 bool isGFX10_BEncoding() const { 1198 return AMDGPU::isGFX10_BEncoding(getSTI()); 1199 } 1200 1201 bool hasInv2PiInlineImm() const { 1202 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1203 } 1204 1205 bool hasFlatOffsets() const { 1206 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1207 } 1208 1209 bool hasSGPR102_SGPR103() const { 1210 return !isVI() && !isGFX9(); 1211 } 1212 1213 bool hasSGPR104_SGPR105() const { 1214 return isGFX10(); 1215 } 1216 1217 bool hasIntClamp() const { 1218 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1219 } 1220 1221 AMDGPUTargetStreamer &getTargetStreamer() { 1222 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1223 return static_cast<AMDGPUTargetStreamer &>(TS); 1224 } 1225 1226 const MCRegisterInfo *getMRI() const { 1227 // We need this const_cast because for some reason getContext() is not const 1228 // in MCAsmParser. 1229 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1230 } 1231 1232 const MCInstrInfo *getMII() const { 1233 return &MII; 1234 } 1235 1236 const FeatureBitset &getFeatureBits() const { 1237 return getSTI().getFeatureBits(); 1238 } 1239 1240 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1241 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1242 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1243 1244 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1245 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1246 bool isForcedDPP() const { return ForcedDPP; } 1247 bool isForcedSDWA() const { return ForcedSDWA; } 1248 ArrayRef<unsigned> getMatchedVariants() const; 1249 1250 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1251 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1252 bool RestoreOnFailure); 1253 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1254 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1255 SMLoc &EndLoc) override; 1256 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1257 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1258 unsigned Kind) override; 1259 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1260 OperandVector &Operands, MCStreamer &Out, 1261 uint64_t &ErrorInfo, 1262 bool MatchingInlineAsm) override; 1263 bool ParseDirective(AsmToken DirectiveID) override; 1264 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1265 OperandMode Mode = OperandMode_Default); 1266 StringRef parseMnemonicSuffix(StringRef Name); 1267 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1268 SMLoc NameLoc, OperandVector &Operands) override; 1269 //bool ProcessInstruction(MCInst &Inst); 1270 1271 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1272 1273 OperandMatchResultTy 1274 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1276 bool (*ConvertResult)(int64_t &) = nullptr); 1277 1278 OperandMatchResultTy 1279 parseOperandArrayWithPrefix(const char *Prefix, 1280 OperandVector &Operands, 1281 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1282 bool (*ConvertResult)(int64_t&) = nullptr); 1283 1284 OperandMatchResultTy 1285 parseNamedBit(const char *Name, OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1287 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1288 StringRef &Value); 1289 1290 bool isModifier(); 1291 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1292 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1293 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1294 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1295 bool parseSP3NegModifier(); 1296 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1297 OperandMatchResultTy parseReg(OperandVector &Operands); 1298 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1299 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1300 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1301 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1302 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1303 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1304 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1305 OperandMatchResultTy parseUfmt(int64_t &Format); 1306 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1307 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1308 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1309 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1310 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1311 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1312 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1313 1314 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1315 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1316 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1317 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1318 1319 bool parseCnt(int64_t &IntVal); 1320 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1321 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1322 1323 private: 1324 struct OperandInfoTy { 1325 int64_t Id; 1326 bool IsSymbolic = false; 1327 bool IsDefined = false; 1328 1329 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1330 }; 1331 1332 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1333 bool validateSendMsg(const OperandInfoTy &Msg, 1334 const OperandInfoTy &Op, 1335 const OperandInfoTy &Stream, 1336 const SMLoc Loc); 1337 1338 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1339 bool validateHwreg(const OperandInfoTy &HwReg, 1340 const int64_t Offset, 1341 const int64_t Width, 1342 const SMLoc Loc); 1343 1344 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1345 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1346 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1347 1348 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1349 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1350 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1351 bool validateSOPLiteral(const MCInst &Inst) const; 1352 bool validateConstantBusLimitations(const MCInst &Inst); 1353 bool validateEarlyClobberLimitations(const MCInst &Inst); 1354 bool validateIntClampSupported(const MCInst &Inst); 1355 bool validateMIMGAtomicDMask(const MCInst &Inst); 1356 bool validateMIMGGatherDMask(const MCInst &Inst); 1357 bool validateMovrels(const MCInst &Inst); 1358 bool validateMIMGDataSize(const MCInst &Inst); 1359 bool validateMIMGAddrSize(const MCInst &Inst); 1360 bool validateMIMGD16(const MCInst &Inst); 1361 bool validateMIMGDim(const MCInst &Inst); 1362 bool validateLdsDirect(const MCInst &Inst); 1363 bool validateOpSel(const MCInst &Inst); 1364 bool validateVccOperand(unsigned Reg) const; 1365 bool validateVOP3Literal(const MCInst &Inst) const; 1366 bool validateMAIAccWrite(const MCInst &Inst); 1367 unsigned getConstantBusLimit(unsigned Opcode) const; 1368 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1369 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1370 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1371 1372 bool isId(const StringRef Id) const; 1373 bool isId(const AsmToken &Token, const StringRef Id) const; 1374 bool isToken(const AsmToken::TokenKind Kind) const; 1375 bool trySkipId(const StringRef Id); 1376 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1377 bool trySkipToken(const AsmToken::TokenKind Kind); 1378 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1379 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1380 bool parseId(StringRef &Val, const StringRef ErrMsg); 1381 1382 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1383 AsmToken::TokenKind getTokenKind() const; 1384 bool parseExpr(int64_t &Imm); 1385 bool parseExpr(OperandVector &Operands); 1386 StringRef getTokenStr() const; 1387 AsmToken peekToken(); 1388 AsmToken getToken() const; 1389 SMLoc getLoc() const; 1390 void lex(); 1391 1392 public: 1393 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1394 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1395 1396 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1397 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1398 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1399 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1400 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1401 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1402 1403 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1404 const unsigned MinVal, 1405 const unsigned MaxVal, 1406 const StringRef ErrMsg); 1407 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1408 bool parseSwizzleOffset(int64_t &Imm); 1409 bool parseSwizzleMacro(int64_t &Imm); 1410 bool parseSwizzleQuadPerm(int64_t &Imm); 1411 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1412 bool parseSwizzleBroadcast(int64_t &Imm); 1413 bool parseSwizzleSwap(int64_t &Imm); 1414 bool parseSwizzleReverse(int64_t &Imm); 1415 1416 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1417 int64_t parseGPRIdxMacro(); 1418 1419 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1420 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1421 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1422 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1423 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1424 1425 AMDGPUOperand::Ptr defaultDLC() const; 1426 AMDGPUOperand::Ptr defaultGLC() const; 1427 AMDGPUOperand::Ptr defaultSLC() const; 1428 1429 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1430 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1431 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1432 AMDGPUOperand::Ptr defaultFlatOffset() const; 1433 1434 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1435 1436 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1437 OptionalImmIndexMap &OptionalIdx); 1438 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1439 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1440 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1441 1442 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1443 1444 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1445 bool IsAtomic = false); 1446 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1447 1448 OperandMatchResultTy parseDim(OperandVector &Operands); 1449 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1450 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1451 AMDGPUOperand::Ptr defaultRowMask() const; 1452 AMDGPUOperand::Ptr defaultBankMask() const; 1453 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1454 AMDGPUOperand::Ptr defaultFI() const; 1455 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1456 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1457 1458 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1459 AMDGPUOperand::ImmTy Type); 1460 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1461 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1462 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1463 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1464 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1465 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1466 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1467 uint64_t BasicInstType, 1468 bool SkipDstVcc = false, 1469 bool SkipSrcVcc = false); 1470 1471 AMDGPUOperand::Ptr defaultBLGP() const; 1472 AMDGPUOperand::Ptr defaultCBSZ() const; 1473 AMDGPUOperand::Ptr defaultABID() const; 1474 1475 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1476 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1477 }; 1478 1479 struct OptionalOperand { 1480 const char *Name; 1481 AMDGPUOperand::ImmTy Type; 1482 bool IsBit; 1483 bool (*ConvertResult)(int64_t&); 1484 }; 1485 1486 } // end anonymous namespace 1487 1488 // May be called with integer type with equivalent bitwidth. 1489 static const fltSemantics *getFltSemantics(unsigned Size) { 1490 switch (Size) { 1491 case 4: 1492 return &APFloat::IEEEsingle(); 1493 case 8: 1494 return &APFloat::IEEEdouble(); 1495 case 2: 1496 return &APFloat::IEEEhalf(); 1497 default: 1498 llvm_unreachable("unsupported fp type"); 1499 } 1500 } 1501 1502 static const fltSemantics *getFltSemantics(MVT VT) { 1503 return getFltSemantics(VT.getSizeInBits() / 8); 1504 } 1505 1506 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1507 switch (OperandType) { 1508 case AMDGPU::OPERAND_REG_IMM_INT32: 1509 case AMDGPU::OPERAND_REG_IMM_FP32: 1510 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1511 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1512 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1513 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1514 return &APFloat::IEEEsingle(); 1515 case AMDGPU::OPERAND_REG_IMM_INT64: 1516 case AMDGPU::OPERAND_REG_IMM_FP64: 1517 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1518 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1519 return &APFloat::IEEEdouble(); 1520 case AMDGPU::OPERAND_REG_IMM_INT16: 1521 case AMDGPU::OPERAND_REG_IMM_FP16: 1522 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1523 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1524 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1525 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1526 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1528 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1529 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1530 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1531 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1532 return &APFloat::IEEEhalf(); 1533 default: 1534 llvm_unreachable("unsupported fp type"); 1535 } 1536 } 1537 1538 //===----------------------------------------------------------------------===// 1539 // Operand 1540 //===----------------------------------------------------------------------===// 1541 1542 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1543 bool Lost; 1544 1545 // Convert literal to single precision 1546 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1547 APFloat::rmNearestTiesToEven, 1548 &Lost); 1549 // We allow precision lost but not overflow or underflow 1550 if (Status != APFloat::opOK && 1551 Lost && 1552 ((Status & APFloat::opOverflow) != 0 || 1553 (Status & APFloat::opUnderflow) != 0)) { 1554 return false; 1555 } 1556 1557 return true; 1558 } 1559 1560 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1561 return isUIntN(Size, Val) || isIntN(Size, Val); 1562 } 1563 1564 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1565 if (VT.getScalarType() == MVT::i16) { 1566 // FP immediate values are broken. 1567 return isInlinableIntLiteral(Val); 1568 } 1569 1570 // f16/v2f16 operands work correctly for all values. 1571 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1572 } 1573 1574 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1575 1576 // This is a hack to enable named inline values like 1577 // shared_base with both 32-bit and 64-bit operands. 1578 // Note that these values are defined as 1579 // 32-bit operands only. 1580 if (isInlineValue()) { 1581 return true; 1582 } 1583 1584 if (!isImmTy(ImmTyNone)) { 1585 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1586 return false; 1587 } 1588 // TODO: We should avoid using host float here. It would be better to 1589 // check the float bit values which is what a few other places do. 1590 // We've had bot failures before due to weird NaN support on mips hosts. 1591 1592 APInt Literal(64, Imm.Val); 1593 1594 if (Imm.IsFPImm) { // We got fp literal token 1595 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1596 return AMDGPU::isInlinableLiteral64(Imm.Val, 1597 AsmParser->hasInv2PiInlineImm()); 1598 } 1599 1600 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1601 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1602 return false; 1603 1604 if (type.getScalarSizeInBits() == 16) { 1605 return isInlineableLiteralOp16( 1606 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1607 type, AsmParser->hasInv2PiInlineImm()); 1608 } 1609 1610 // Check if single precision literal is inlinable 1611 return AMDGPU::isInlinableLiteral32( 1612 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1613 AsmParser->hasInv2PiInlineImm()); 1614 } 1615 1616 // We got int literal token. 1617 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1618 return AMDGPU::isInlinableLiteral64(Imm.Val, 1619 AsmParser->hasInv2PiInlineImm()); 1620 } 1621 1622 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1623 return false; 1624 } 1625 1626 if (type.getScalarSizeInBits() == 16) { 1627 return isInlineableLiteralOp16( 1628 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1629 type, AsmParser->hasInv2PiInlineImm()); 1630 } 1631 1632 return AMDGPU::isInlinableLiteral32( 1633 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1634 AsmParser->hasInv2PiInlineImm()); 1635 } 1636 1637 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1638 // Check that this immediate can be added as literal 1639 if (!isImmTy(ImmTyNone)) { 1640 return false; 1641 } 1642 1643 if (!Imm.IsFPImm) { 1644 // We got int literal token. 1645 1646 if (type == MVT::f64 && hasFPModifiers()) { 1647 // Cannot apply fp modifiers to int literals preserving the same semantics 1648 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1649 // disable these cases. 1650 return false; 1651 } 1652 1653 unsigned Size = type.getSizeInBits(); 1654 if (Size == 64) 1655 Size = 32; 1656 1657 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1658 // types. 1659 return isSafeTruncation(Imm.Val, Size); 1660 } 1661 1662 // We got fp literal token 1663 if (type == MVT::f64) { // Expected 64-bit fp operand 1664 // We would set low 64-bits of literal to zeroes but we accept this literals 1665 return true; 1666 } 1667 1668 if (type == MVT::i64) { // Expected 64-bit int operand 1669 // We don't allow fp literals in 64-bit integer instructions. It is 1670 // unclear how we should encode them. 1671 return false; 1672 } 1673 1674 // We allow fp literals with f16x2 operands assuming that the specified 1675 // literal goes into the lower half and the upper half is zero. We also 1676 // require that the literal may be losslesly converted to f16. 1677 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1678 (type == MVT::v2i16)? MVT::i16 : type; 1679 1680 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1681 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1682 } 1683 1684 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1685 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1686 } 1687 1688 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1689 if (AsmParser->isVI()) 1690 return isVReg32(); 1691 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1692 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1693 else 1694 return false; 1695 } 1696 1697 bool AMDGPUOperand::isSDWAFP16Operand() const { 1698 return isSDWAOperand(MVT::f16); 1699 } 1700 1701 bool AMDGPUOperand::isSDWAFP32Operand() const { 1702 return isSDWAOperand(MVT::f32); 1703 } 1704 1705 bool AMDGPUOperand::isSDWAInt16Operand() const { 1706 return isSDWAOperand(MVT::i16); 1707 } 1708 1709 bool AMDGPUOperand::isSDWAInt32Operand() const { 1710 return isSDWAOperand(MVT::i32); 1711 } 1712 1713 bool AMDGPUOperand::isBoolReg() const { 1714 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1715 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1716 } 1717 1718 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1719 { 1720 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1721 assert(Size == 2 || Size == 4 || Size == 8); 1722 1723 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1724 1725 if (Imm.Mods.Abs) { 1726 Val &= ~FpSignMask; 1727 } 1728 if (Imm.Mods.Neg) { 1729 Val ^= FpSignMask; 1730 } 1731 1732 return Val; 1733 } 1734 1735 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1736 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1737 Inst.getNumOperands())) { 1738 addLiteralImmOperand(Inst, Imm.Val, 1739 ApplyModifiers & 1740 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1741 } else { 1742 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1743 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1744 } 1745 } 1746 1747 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1748 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1749 auto OpNum = Inst.getNumOperands(); 1750 // Check that this operand accepts literals 1751 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1752 1753 if (ApplyModifiers) { 1754 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1755 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1756 Val = applyInputFPModifiers(Val, Size); 1757 } 1758 1759 APInt Literal(64, Val); 1760 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1761 1762 if (Imm.IsFPImm) { // We got fp literal token 1763 switch (OpTy) { 1764 case AMDGPU::OPERAND_REG_IMM_INT64: 1765 case AMDGPU::OPERAND_REG_IMM_FP64: 1766 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1767 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1768 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1769 AsmParser->hasInv2PiInlineImm())) { 1770 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1771 return; 1772 } 1773 1774 // Non-inlineable 1775 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1776 // For fp operands we check if low 32 bits are zeros 1777 if (Literal.getLoBits(32) != 0) { 1778 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1779 "Can't encode literal as exact 64-bit floating-point operand. " 1780 "Low 32-bits will be set to zero"); 1781 } 1782 1783 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1784 return; 1785 } 1786 1787 // We don't allow fp literals in 64-bit integer instructions. It is 1788 // unclear how we should encode them. This case should be checked earlier 1789 // in predicate methods (isLiteralImm()) 1790 llvm_unreachable("fp literal in 64-bit integer instruction."); 1791 1792 case AMDGPU::OPERAND_REG_IMM_INT32: 1793 case AMDGPU::OPERAND_REG_IMM_FP32: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1798 case AMDGPU::OPERAND_REG_IMM_INT16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1810 bool lost; 1811 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1812 // Convert literal to single precision 1813 FPLiteral.convert(*getOpFltSemantics(OpTy), 1814 APFloat::rmNearestTiesToEven, &lost); 1815 // We allow precision lost but not overflow or underflow. This should be 1816 // checked earlier in isLiteralImm() 1817 1818 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1819 Inst.addOperand(MCOperand::createImm(ImmVal)); 1820 return; 1821 } 1822 default: 1823 llvm_unreachable("invalid operand size"); 1824 } 1825 1826 return; 1827 } 1828 1829 // We got int literal token. 1830 // Only sign extend inline immediates. 1831 switch (OpTy) { 1832 case AMDGPU::OPERAND_REG_IMM_INT32: 1833 case AMDGPU::OPERAND_REG_IMM_FP32: 1834 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1835 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1836 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1838 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1839 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1840 if (isSafeTruncation(Val, 32) && 1841 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1842 AsmParser->hasInv2PiInlineImm())) { 1843 Inst.addOperand(MCOperand::createImm(Val)); 1844 return; 1845 } 1846 1847 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1848 return; 1849 1850 case AMDGPU::OPERAND_REG_IMM_INT64: 1851 case AMDGPU::OPERAND_REG_IMM_FP64: 1852 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1854 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1855 Inst.addOperand(MCOperand::createImm(Val)); 1856 return; 1857 } 1858 1859 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1860 return; 1861 1862 case AMDGPU::OPERAND_REG_IMM_INT16: 1863 case AMDGPU::OPERAND_REG_IMM_FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1868 if (isSafeTruncation(Val, 16) && 1869 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1870 AsmParser->hasInv2PiInlineImm())) { 1871 Inst.addOperand(MCOperand::createImm(Val)); 1872 return; 1873 } 1874 1875 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1876 return; 1877 1878 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1879 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1882 assert(isSafeTruncation(Val, 16)); 1883 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1884 AsmParser->hasInv2PiInlineImm())); 1885 1886 Inst.addOperand(MCOperand::createImm(Val)); 1887 return; 1888 } 1889 default: 1890 llvm_unreachable("invalid operand size"); 1891 } 1892 } 1893 1894 template <unsigned Bitwidth> 1895 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1896 APInt Literal(64, Imm.Val); 1897 1898 if (!Imm.IsFPImm) { 1899 // We got int literal token. 1900 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1901 return; 1902 } 1903 1904 bool Lost; 1905 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1906 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1907 APFloat::rmNearestTiesToEven, &Lost); 1908 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1909 } 1910 1911 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1912 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1913 } 1914 1915 static bool isInlineValue(unsigned Reg) { 1916 switch (Reg) { 1917 case AMDGPU::SRC_SHARED_BASE: 1918 case AMDGPU::SRC_SHARED_LIMIT: 1919 case AMDGPU::SRC_PRIVATE_BASE: 1920 case AMDGPU::SRC_PRIVATE_LIMIT: 1921 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1922 return true; 1923 case AMDGPU::SRC_VCCZ: 1924 case AMDGPU::SRC_EXECZ: 1925 case AMDGPU::SRC_SCC: 1926 return true; 1927 case AMDGPU::SGPR_NULL: 1928 return true; 1929 default: 1930 return false; 1931 } 1932 } 1933 1934 bool AMDGPUOperand::isInlineValue() const { 1935 return isRegKind() && ::isInlineValue(getReg()); 1936 } 1937 1938 //===----------------------------------------------------------------------===// 1939 // AsmParser 1940 //===----------------------------------------------------------------------===// 1941 1942 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1943 if (Is == IS_VGPR) { 1944 switch (RegWidth) { 1945 default: return -1; 1946 case 1: return AMDGPU::VGPR_32RegClassID; 1947 case 2: return AMDGPU::VReg_64RegClassID; 1948 case 3: return AMDGPU::VReg_96RegClassID; 1949 case 4: return AMDGPU::VReg_128RegClassID; 1950 case 5: return AMDGPU::VReg_160RegClassID; 1951 case 6: return AMDGPU::VReg_192RegClassID; 1952 case 8: return AMDGPU::VReg_256RegClassID; 1953 case 16: return AMDGPU::VReg_512RegClassID; 1954 case 32: return AMDGPU::VReg_1024RegClassID; 1955 } 1956 } else if (Is == IS_TTMP) { 1957 switch (RegWidth) { 1958 default: return -1; 1959 case 1: return AMDGPU::TTMP_32RegClassID; 1960 case 2: return AMDGPU::TTMP_64RegClassID; 1961 case 4: return AMDGPU::TTMP_128RegClassID; 1962 case 8: return AMDGPU::TTMP_256RegClassID; 1963 case 16: return AMDGPU::TTMP_512RegClassID; 1964 } 1965 } else if (Is == IS_SGPR) { 1966 switch (RegWidth) { 1967 default: return -1; 1968 case 1: return AMDGPU::SGPR_32RegClassID; 1969 case 2: return AMDGPU::SGPR_64RegClassID; 1970 case 3: return AMDGPU::SGPR_96RegClassID; 1971 case 4: return AMDGPU::SGPR_128RegClassID; 1972 case 5: return AMDGPU::SGPR_160RegClassID; 1973 case 6: return AMDGPU::SGPR_192RegClassID; 1974 case 8: return AMDGPU::SGPR_256RegClassID; 1975 case 16: return AMDGPU::SGPR_512RegClassID; 1976 } 1977 } else if (Is == IS_AGPR) { 1978 switch (RegWidth) { 1979 default: return -1; 1980 case 1: return AMDGPU::AGPR_32RegClassID; 1981 case 2: return AMDGPU::AReg_64RegClassID; 1982 case 3: return AMDGPU::AReg_96RegClassID; 1983 case 4: return AMDGPU::AReg_128RegClassID; 1984 case 5: return AMDGPU::AReg_160RegClassID; 1985 case 6: return AMDGPU::AReg_192RegClassID; 1986 case 8: return AMDGPU::AReg_256RegClassID; 1987 case 16: return AMDGPU::AReg_512RegClassID; 1988 case 32: return AMDGPU::AReg_1024RegClassID; 1989 } 1990 } 1991 return -1; 1992 } 1993 1994 static unsigned getSpecialRegForName(StringRef RegName) { 1995 return StringSwitch<unsigned>(RegName) 1996 .Case("exec", AMDGPU::EXEC) 1997 .Case("vcc", AMDGPU::VCC) 1998 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1999 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2000 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2001 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2002 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2003 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2004 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2005 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2006 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2007 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2008 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2009 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2010 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2011 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2012 .Case("m0", AMDGPU::M0) 2013 .Case("vccz", AMDGPU::SRC_VCCZ) 2014 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2015 .Case("execz", AMDGPU::SRC_EXECZ) 2016 .Case("src_execz", AMDGPU::SRC_EXECZ) 2017 .Case("scc", AMDGPU::SRC_SCC) 2018 .Case("src_scc", AMDGPU::SRC_SCC) 2019 .Case("tba", AMDGPU::TBA) 2020 .Case("tma", AMDGPU::TMA) 2021 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2022 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2023 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2024 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2025 .Case("vcc_lo", AMDGPU::VCC_LO) 2026 .Case("vcc_hi", AMDGPU::VCC_HI) 2027 .Case("exec_lo", AMDGPU::EXEC_LO) 2028 .Case("exec_hi", AMDGPU::EXEC_HI) 2029 .Case("tma_lo", AMDGPU::TMA_LO) 2030 .Case("tma_hi", AMDGPU::TMA_HI) 2031 .Case("tba_lo", AMDGPU::TBA_LO) 2032 .Case("tba_hi", AMDGPU::TBA_HI) 2033 .Case("pc", AMDGPU::PC_REG) 2034 .Case("null", AMDGPU::SGPR_NULL) 2035 .Default(AMDGPU::NoRegister); 2036 } 2037 2038 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2039 SMLoc &EndLoc, bool RestoreOnFailure) { 2040 auto R = parseRegister(); 2041 if (!R) return true; 2042 assert(R->isReg()); 2043 RegNo = R->getReg(); 2044 StartLoc = R->getStartLoc(); 2045 EndLoc = R->getEndLoc(); 2046 return false; 2047 } 2048 2049 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2050 SMLoc &EndLoc) { 2051 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2052 } 2053 2054 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2055 SMLoc &StartLoc, 2056 SMLoc &EndLoc) { 2057 bool Result = 2058 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2059 bool PendingErrors = getParser().hasPendingError(); 2060 getParser().clearPendingErrors(); 2061 if (PendingErrors) 2062 return MatchOperand_ParseFail; 2063 if (Result) 2064 return MatchOperand_NoMatch; 2065 return MatchOperand_Success; 2066 } 2067 2068 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2069 RegisterKind RegKind, unsigned Reg1, 2070 SMLoc Loc) { 2071 switch (RegKind) { 2072 case IS_SPECIAL: 2073 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2074 Reg = AMDGPU::EXEC; 2075 RegWidth = 2; 2076 return true; 2077 } 2078 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2079 Reg = AMDGPU::FLAT_SCR; 2080 RegWidth = 2; 2081 return true; 2082 } 2083 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2084 Reg = AMDGPU::XNACK_MASK; 2085 RegWidth = 2; 2086 return true; 2087 } 2088 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2089 Reg = AMDGPU::VCC; 2090 RegWidth = 2; 2091 return true; 2092 } 2093 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2094 Reg = AMDGPU::TBA; 2095 RegWidth = 2; 2096 return true; 2097 } 2098 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2099 Reg = AMDGPU::TMA; 2100 RegWidth = 2; 2101 return true; 2102 } 2103 Error(Loc, "register does not fit in the list"); 2104 return false; 2105 case IS_VGPR: 2106 case IS_SGPR: 2107 case IS_AGPR: 2108 case IS_TTMP: 2109 if (Reg1 != Reg + RegWidth) { 2110 Error(Loc, "registers in a list must have consecutive indices"); 2111 return false; 2112 } 2113 RegWidth++; 2114 return true; 2115 default: 2116 llvm_unreachable("unexpected register kind"); 2117 } 2118 } 2119 2120 struct RegInfo { 2121 StringLiteral Name; 2122 RegisterKind Kind; 2123 }; 2124 2125 static constexpr RegInfo RegularRegisters[] = { 2126 {{"v"}, IS_VGPR}, 2127 {{"s"}, IS_SGPR}, 2128 {{"ttmp"}, IS_TTMP}, 2129 {{"acc"}, IS_AGPR}, 2130 {{"a"}, IS_AGPR}, 2131 }; 2132 2133 static bool isRegularReg(RegisterKind Kind) { 2134 return Kind == IS_VGPR || 2135 Kind == IS_SGPR || 2136 Kind == IS_TTMP || 2137 Kind == IS_AGPR; 2138 } 2139 2140 static const RegInfo* getRegularRegInfo(StringRef Str) { 2141 for (const RegInfo &Reg : RegularRegisters) 2142 if (Str.startswith(Reg.Name)) 2143 return &Reg; 2144 return nullptr; 2145 } 2146 2147 static bool getRegNum(StringRef Str, unsigned& Num) { 2148 return !Str.getAsInteger(10, Num); 2149 } 2150 2151 bool 2152 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2153 const AsmToken &NextToken) const { 2154 2155 // A list of consecutive registers: [s0,s1,s2,s3] 2156 if (Token.is(AsmToken::LBrac)) 2157 return true; 2158 2159 if (!Token.is(AsmToken::Identifier)) 2160 return false; 2161 2162 // A single register like s0 or a range of registers like s[0:1] 2163 2164 StringRef Str = Token.getString(); 2165 const RegInfo *Reg = getRegularRegInfo(Str); 2166 if (Reg) { 2167 StringRef RegName = Reg->Name; 2168 StringRef RegSuffix = Str.substr(RegName.size()); 2169 if (!RegSuffix.empty()) { 2170 unsigned Num; 2171 // A single register with an index: rXX 2172 if (getRegNum(RegSuffix, Num)) 2173 return true; 2174 } else { 2175 // A range of registers: r[XX:YY]. 2176 if (NextToken.is(AsmToken::LBrac)) 2177 return true; 2178 } 2179 } 2180 2181 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2182 } 2183 2184 bool 2185 AMDGPUAsmParser::isRegister() 2186 { 2187 return isRegister(getToken(), peekToken()); 2188 } 2189 2190 unsigned 2191 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2192 unsigned RegNum, 2193 unsigned RegWidth, 2194 SMLoc Loc) { 2195 2196 assert(isRegularReg(RegKind)); 2197 2198 unsigned AlignSize = 1; 2199 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2200 // SGPR and TTMP registers must be aligned. 2201 // Max required alignment is 4 dwords. 2202 AlignSize = std::min(RegWidth, 4u); 2203 } 2204 2205 if (RegNum % AlignSize != 0) { 2206 Error(Loc, "invalid register alignment"); 2207 return AMDGPU::NoRegister; 2208 } 2209 2210 unsigned RegIdx = RegNum / AlignSize; 2211 int RCID = getRegClass(RegKind, RegWidth); 2212 if (RCID == -1) { 2213 Error(Loc, "invalid or unsupported register size"); 2214 return AMDGPU::NoRegister; 2215 } 2216 2217 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2218 const MCRegisterClass RC = TRI->getRegClass(RCID); 2219 if (RegIdx >= RC.getNumRegs()) { 2220 Error(Loc, "register index is out of range"); 2221 return AMDGPU::NoRegister; 2222 } 2223 2224 return RC.getRegister(RegIdx); 2225 } 2226 2227 bool 2228 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2229 int64_t RegLo, RegHi; 2230 if (!skipToken(AsmToken::LBrac, "missing register index")) 2231 return false; 2232 2233 SMLoc FirstIdxLoc = getLoc(); 2234 SMLoc SecondIdxLoc; 2235 2236 if (!parseExpr(RegLo)) 2237 return false; 2238 2239 if (trySkipToken(AsmToken::Colon)) { 2240 SecondIdxLoc = getLoc(); 2241 if (!parseExpr(RegHi)) 2242 return false; 2243 } else { 2244 RegHi = RegLo; 2245 } 2246 2247 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2248 return false; 2249 2250 if (!isUInt<32>(RegLo)) { 2251 Error(FirstIdxLoc, "invalid register index"); 2252 return false; 2253 } 2254 2255 if (!isUInt<32>(RegHi)) { 2256 Error(SecondIdxLoc, "invalid register index"); 2257 return false; 2258 } 2259 2260 if (RegLo > RegHi) { 2261 Error(FirstIdxLoc, "first register index should not exceed second index"); 2262 return false; 2263 } 2264 2265 Num = static_cast<unsigned>(RegLo); 2266 Width = (RegHi - RegLo) + 1; 2267 return true; 2268 } 2269 2270 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2271 unsigned &RegNum, unsigned &RegWidth, 2272 SmallVectorImpl<AsmToken> &Tokens) { 2273 assert(isToken(AsmToken::Identifier)); 2274 unsigned Reg = getSpecialRegForName(getTokenStr()); 2275 if (Reg) { 2276 RegNum = 0; 2277 RegWidth = 1; 2278 RegKind = IS_SPECIAL; 2279 Tokens.push_back(getToken()); 2280 lex(); // skip register name 2281 } 2282 return Reg; 2283 } 2284 2285 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2286 unsigned &RegNum, unsigned &RegWidth, 2287 SmallVectorImpl<AsmToken> &Tokens) { 2288 assert(isToken(AsmToken::Identifier)); 2289 StringRef RegName = getTokenStr(); 2290 auto Loc = getLoc(); 2291 2292 const RegInfo *RI = getRegularRegInfo(RegName); 2293 if (!RI) { 2294 Error(Loc, "invalid register name"); 2295 return AMDGPU::NoRegister; 2296 } 2297 2298 Tokens.push_back(getToken()); 2299 lex(); // skip register name 2300 2301 RegKind = RI->Kind; 2302 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2303 if (!RegSuffix.empty()) { 2304 // Single 32-bit register: vXX. 2305 if (!getRegNum(RegSuffix, RegNum)) { 2306 Error(Loc, "invalid register index"); 2307 return AMDGPU::NoRegister; 2308 } 2309 RegWidth = 1; 2310 } else { 2311 // Range of registers: v[XX:YY]. ":YY" is optional. 2312 if (!ParseRegRange(RegNum, RegWidth)) 2313 return AMDGPU::NoRegister; 2314 } 2315 2316 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2317 } 2318 2319 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2320 unsigned &RegWidth, 2321 SmallVectorImpl<AsmToken> &Tokens) { 2322 unsigned Reg = AMDGPU::NoRegister; 2323 auto ListLoc = getLoc(); 2324 2325 if (!skipToken(AsmToken::LBrac, 2326 "expected a register or a list of registers")) { 2327 return AMDGPU::NoRegister; 2328 } 2329 2330 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2331 2332 auto Loc = getLoc(); 2333 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2334 return AMDGPU::NoRegister; 2335 if (RegWidth != 1) { 2336 Error(Loc, "expected a single 32-bit register"); 2337 return AMDGPU::NoRegister; 2338 } 2339 2340 for (; trySkipToken(AsmToken::Comma); ) { 2341 RegisterKind NextRegKind; 2342 unsigned NextReg, NextRegNum, NextRegWidth; 2343 Loc = getLoc(); 2344 2345 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2346 NextRegNum, NextRegWidth, 2347 Tokens)) { 2348 return AMDGPU::NoRegister; 2349 } 2350 if (NextRegWidth != 1) { 2351 Error(Loc, "expected a single 32-bit register"); 2352 return AMDGPU::NoRegister; 2353 } 2354 if (NextRegKind != RegKind) { 2355 Error(Loc, "registers in a list must be of the same kind"); 2356 return AMDGPU::NoRegister; 2357 } 2358 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2359 return AMDGPU::NoRegister; 2360 } 2361 2362 if (!skipToken(AsmToken::RBrac, 2363 "expected a comma or a closing square bracket")) { 2364 return AMDGPU::NoRegister; 2365 } 2366 2367 if (isRegularReg(RegKind)) 2368 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2369 2370 return Reg; 2371 } 2372 2373 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2374 unsigned &RegNum, unsigned &RegWidth, 2375 SmallVectorImpl<AsmToken> &Tokens) { 2376 auto Loc = getLoc(); 2377 Reg = AMDGPU::NoRegister; 2378 2379 if (isToken(AsmToken::Identifier)) { 2380 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2381 if (Reg == AMDGPU::NoRegister) 2382 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2383 } else { 2384 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2385 } 2386 2387 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2388 if (Reg == AMDGPU::NoRegister) { 2389 assert(Parser.hasPendingError()); 2390 return false; 2391 } 2392 2393 if (!subtargetHasRegister(*TRI, Reg)) { 2394 if (Reg == AMDGPU::SGPR_NULL) { 2395 Error(Loc, "'null' operand is not supported on this GPU"); 2396 } else { 2397 Error(Loc, "register not available on this GPU"); 2398 } 2399 return false; 2400 } 2401 2402 return true; 2403 } 2404 2405 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2406 unsigned &RegNum, unsigned &RegWidth, 2407 bool RestoreOnFailure /*=false*/) { 2408 Reg = AMDGPU::NoRegister; 2409 2410 SmallVector<AsmToken, 1> Tokens; 2411 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2412 if (RestoreOnFailure) { 2413 while (!Tokens.empty()) { 2414 getLexer().UnLex(Tokens.pop_back_val()); 2415 } 2416 } 2417 return true; 2418 } 2419 return false; 2420 } 2421 2422 Optional<StringRef> 2423 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2424 switch (RegKind) { 2425 case IS_VGPR: 2426 return StringRef(".amdgcn.next_free_vgpr"); 2427 case IS_SGPR: 2428 return StringRef(".amdgcn.next_free_sgpr"); 2429 default: 2430 return None; 2431 } 2432 } 2433 2434 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2435 auto SymbolName = getGprCountSymbolName(RegKind); 2436 assert(SymbolName && "initializing invalid register kind"); 2437 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2438 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2439 } 2440 2441 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2442 unsigned DwordRegIndex, 2443 unsigned RegWidth) { 2444 // Symbols are only defined for GCN targets 2445 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2446 return true; 2447 2448 auto SymbolName = getGprCountSymbolName(RegKind); 2449 if (!SymbolName) 2450 return true; 2451 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2452 2453 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2454 int64_t OldCount; 2455 2456 if (!Sym->isVariable()) 2457 return !Error(getParser().getTok().getLoc(), 2458 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2459 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2460 return !Error( 2461 getParser().getTok().getLoc(), 2462 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2463 2464 if (OldCount <= NewMax) 2465 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2466 2467 return true; 2468 } 2469 2470 std::unique_ptr<AMDGPUOperand> 2471 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2472 const auto &Tok = Parser.getTok(); 2473 SMLoc StartLoc = Tok.getLoc(); 2474 SMLoc EndLoc = Tok.getEndLoc(); 2475 RegisterKind RegKind; 2476 unsigned Reg, RegNum, RegWidth; 2477 2478 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2479 return nullptr; 2480 } 2481 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2482 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2483 return nullptr; 2484 } else 2485 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2486 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2487 } 2488 2489 OperandMatchResultTy 2490 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2491 // TODO: add syntactic sugar for 1/(2*PI) 2492 2493 assert(!isRegister()); 2494 assert(!isModifier()); 2495 2496 const auto& Tok = getToken(); 2497 const auto& NextTok = peekToken(); 2498 bool IsReal = Tok.is(AsmToken::Real); 2499 SMLoc S = getLoc(); 2500 bool Negate = false; 2501 2502 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2503 lex(); 2504 IsReal = true; 2505 Negate = true; 2506 } 2507 2508 if (IsReal) { 2509 // Floating-point expressions are not supported. 2510 // Can only allow floating-point literals with an 2511 // optional sign. 2512 2513 StringRef Num = getTokenStr(); 2514 lex(); 2515 2516 APFloat RealVal(APFloat::IEEEdouble()); 2517 auto roundMode = APFloat::rmNearestTiesToEven; 2518 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2519 return MatchOperand_ParseFail; 2520 } 2521 if (Negate) 2522 RealVal.changeSign(); 2523 2524 Operands.push_back( 2525 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2526 AMDGPUOperand::ImmTyNone, true)); 2527 2528 return MatchOperand_Success; 2529 2530 } else { 2531 int64_t IntVal; 2532 const MCExpr *Expr; 2533 SMLoc S = getLoc(); 2534 2535 if (HasSP3AbsModifier) { 2536 // This is a workaround for handling expressions 2537 // as arguments of SP3 'abs' modifier, for example: 2538 // |1.0| 2539 // |-1| 2540 // |1+x| 2541 // This syntax is not compatible with syntax of standard 2542 // MC expressions (due to the trailing '|'). 2543 SMLoc EndLoc; 2544 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2545 return MatchOperand_ParseFail; 2546 } else { 2547 if (Parser.parseExpression(Expr)) 2548 return MatchOperand_ParseFail; 2549 } 2550 2551 if (Expr->evaluateAsAbsolute(IntVal)) { 2552 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2553 } else { 2554 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2555 } 2556 2557 return MatchOperand_Success; 2558 } 2559 2560 return MatchOperand_NoMatch; 2561 } 2562 2563 OperandMatchResultTy 2564 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2565 if (!isRegister()) 2566 return MatchOperand_NoMatch; 2567 2568 if (auto R = parseRegister()) { 2569 assert(R->isReg()); 2570 Operands.push_back(std::move(R)); 2571 return MatchOperand_Success; 2572 } 2573 return MatchOperand_ParseFail; 2574 } 2575 2576 OperandMatchResultTy 2577 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2578 auto res = parseReg(Operands); 2579 if (res != MatchOperand_NoMatch) { 2580 return res; 2581 } else if (isModifier()) { 2582 return MatchOperand_NoMatch; 2583 } else { 2584 return parseImm(Operands, HasSP3AbsMod); 2585 } 2586 } 2587 2588 bool 2589 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2590 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2591 const auto &str = Token.getString(); 2592 return str == "abs" || str == "neg" || str == "sext"; 2593 } 2594 return false; 2595 } 2596 2597 bool 2598 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2599 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2600 } 2601 2602 bool 2603 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2604 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2605 } 2606 2607 bool 2608 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2609 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2610 } 2611 2612 // Check if this is an operand modifier or an opcode modifier 2613 // which may look like an expression but it is not. We should 2614 // avoid parsing these modifiers as expressions. Currently 2615 // recognized sequences are: 2616 // |...| 2617 // abs(...) 2618 // neg(...) 2619 // sext(...) 2620 // -reg 2621 // -|...| 2622 // -abs(...) 2623 // name:... 2624 // Note that simple opcode modifiers like 'gds' may be parsed as 2625 // expressions; this is a special case. See getExpressionAsToken. 2626 // 2627 bool 2628 AMDGPUAsmParser::isModifier() { 2629 2630 AsmToken Tok = getToken(); 2631 AsmToken NextToken[2]; 2632 peekTokens(NextToken); 2633 2634 return isOperandModifier(Tok, NextToken[0]) || 2635 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2636 isOpcodeModifierWithVal(Tok, NextToken[0]); 2637 } 2638 2639 // Check if the current token is an SP3 'neg' modifier. 2640 // Currently this modifier is allowed in the following context: 2641 // 2642 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2643 // 2. Before an 'abs' modifier: -abs(...) 2644 // 3. Before an SP3 'abs' modifier: -|...| 2645 // 2646 // In all other cases "-" is handled as a part 2647 // of an expression that follows the sign. 2648 // 2649 // Note: When "-" is followed by an integer literal, 2650 // this is interpreted as integer negation rather 2651 // than a floating-point NEG modifier applied to N. 2652 // Beside being contr-intuitive, such use of floating-point 2653 // NEG modifier would have resulted in different meaning 2654 // of integer literals used with VOP1/2/C and VOP3, 2655 // for example: 2656 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2657 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2658 // Negative fp literals with preceding "-" are 2659 // handled likewise for unifomtity 2660 // 2661 bool 2662 AMDGPUAsmParser::parseSP3NegModifier() { 2663 2664 AsmToken NextToken[2]; 2665 peekTokens(NextToken); 2666 2667 if (isToken(AsmToken::Minus) && 2668 (isRegister(NextToken[0], NextToken[1]) || 2669 NextToken[0].is(AsmToken::Pipe) || 2670 isId(NextToken[0], "abs"))) { 2671 lex(); 2672 return true; 2673 } 2674 2675 return false; 2676 } 2677 2678 OperandMatchResultTy 2679 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2680 bool AllowImm) { 2681 bool Neg, SP3Neg; 2682 bool Abs, SP3Abs; 2683 SMLoc Loc; 2684 2685 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2686 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2687 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2688 return MatchOperand_ParseFail; 2689 } 2690 2691 SP3Neg = parseSP3NegModifier(); 2692 2693 Loc = getLoc(); 2694 Neg = trySkipId("neg"); 2695 if (Neg && SP3Neg) { 2696 Error(Loc, "expected register or immediate"); 2697 return MatchOperand_ParseFail; 2698 } 2699 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2700 return MatchOperand_ParseFail; 2701 2702 Abs = trySkipId("abs"); 2703 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2704 return MatchOperand_ParseFail; 2705 2706 Loc = getLoc(); 2707 SP3Abs = trySkipToken(AsmToken::Pipe); 2708 if (Abs && SP3Abs) { 2709 Error(Loc, "expected register or immediate"); 2710 return MatchOperand_ParseFail; 2711 } 2712 2713 OperandMatchResultTy Res; 2714 if (AllowImm) { 2715 Res = parseRegOrImm(Operands, SP3Abs); 2716 } else { 2717 Res = parseReg(Operands); 2718 } 2719 if (Res != MatchOperand_Success) { 2720 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2721 } 2722 2723 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2724 return MatchOperand_ParseFail; 2725 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2726 return MatchOperand_ParseFail; 2727 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2728 return MatchOperand_ParseFail; 2729 2730 AMDGPUOperand::Modifiers Mods; 2731 Mods.Abs = Abs || SP3Abs; 2732 Mods.Neg = Neg || SP3Neg; 2733 2734 if (Mods.hasFPModifiers()) { 2735 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2736 if (Op.isExpr()) { 2737 Error(Op.getStartLoc(), "expected an absolute expression"); 2738 return MatchOperand_ParseFail; 2739 } 2740 Op.setModifiers(Mods); 2741 } 2742 return MatchOperand_Success; 2743 } 2744 2745 OperandMatchResultTy 2746 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2747 bool AllowImm) { 2748 bool Sext = trySkipId("sext"); 2749 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2750 return MatchOperand_ParseFail; 2751 2752 OperandMatchResultTy Res; 2753 if (AllowImm) { 2754 Res = parseRegOrImm(Operands); 2755 } else { 2756 Res = parseReg(Operands); 2757 } 2758 if (Res != MatchOperand_Success) { 2759 return Sext? MatchOperand_ParseFail : Res; 2760 } 2761 2762 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2763 return MatchOperand_ParseFail; 2764 2765 AMDGPUOperand::Modifiers Mods; 2766 Mods.Sext = Sext; 2767 2768 if (Mods.hasIntModifiers()) { 2769 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2770 if (Op.isExpr()) { 2771 Error(Op.getStartLoc(), "expected an absolute expression"); 2772 return MatchOperand_ParseFail; 2773 } 2774 Op.setModifiers(Mods); 2775 } 2776 2777 return MatchOperand_Success; 2778 } 2779 2780 OperandMatchResultTy 2781 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2782 return parseRegOrImmWithFPInputMods(Operands, false); 2783 } 2784 2785 OperandMatchResultTy 2786 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2787 return parseRegOrImmWithIntInputMods(Operands, false); 2788 } 2789 2790 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2791 auto Loc = getLoc(); 2792 if (trySkipId("off")) { 2793 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2794 AMDGPUOperand::ImmTyOff, false)); 2795 return MatchOperand_Success; 2796 } 2797 2798 if (!isRegister()) 2799 return MatchOperand_NoMatch; 2800 2801 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2802 if (Reg) { 2803 Operands.push_back(std::move(Reg)); 2804 return MatchOperand_Success; 2805 } 2806 2807 return MatchOperand_ParseFail; 2808 2809 } 2810 2811 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2812 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2813 2814 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2815 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2816 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2817 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2818 return Match_InvalidOperand; 2819 2820 if ((TSFlags & SIInstrFlags::VOP3) && 2821 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2822 getForcedEncodingSize() != 64) 2823 return Match_PreferE32; 2824 2825 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2826 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2827 // v_mac_f32/16 allow only dst_sel == DWORD; 2828 auto OpNum = 2829 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2830 const auto &Op = Inst.getOperand(OpNum); 2831 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2832 return Match_InvalidOperand; 2833 } 2834 } 2835 2836 return Match_Success; 2837 } 2838 2839 // What asm variants we should check 2840 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2841 if (getForcedEncodingSize() == 32) { 2842 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2843 return makeArrayRef(Variants); 2844 } 2845 2846 if (isForcedVOP3()) { 2847 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2848 return makeArrayRef(Variants); 2849 } 2850 2851 if (isForcedSDWA()) { 2852 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2853 AMDGPUAsmVariants::SDWA9}; 2854 return makeArrayRef(Variants); 2855 } 2856 2857 if (isForcedDPP()) { 2858 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2859 return makeArrayRef(Variants); 2860 } 2861 2862 static const unsigned Variants[] = { 2863 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2864 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2865 }; 2866 2867 return makeArrayRef(Variants); 2868 } 2869 2870 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2871 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2872 const unsigned Num = Desc.getNumImplicitUses(); 2873 for (unsigned i = 0; i < Num; ++i) { 2874 unsigned Reg = Desc.ImplicitUses[i]; 2875 switch (Reg) { 2876 case AMDGPU::FLAT_SCR: 2877 case AMDGPU::VCC: 2878 case AMDGPU::VCC_LO: 2879 case AMDGPU::VCC_HI: 2880 case AMDGPU::M0: 2881 return Reg; 2882 default: 2883 break; 2884 } 2885 } 2886 return AMDGPU::NoRegister; 2887 } 2888 2889 // NB: This code is correct only when used to check constant 2890 // bus limitations because GFX7 support no f16 inline constants. 2891 // Note that there are no cases when a GFX7 opcode violates 2892 // constant bus limitations due to the use of an f16 constant. 2893 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2894 unsigned OpIdx) const { 2895 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2896 2897 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2898 return false; 2899 } 2900 2901 const MCOperand &MO = Inst.getOperand(OpIdx); 2902 2903 int64_t Val = MO.getImm(); 2904 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2905 2906 switch (OpSize) { // expected operand size 2907 case 8: 2908 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2909 case 4: 2910 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2911 case 2: { 2912 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2913 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2914 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2915 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2916 return AMDGPU::isInlinableIntLiteral(Val); 2917 2918 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2919 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2920 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2921 return AMDGPU::isInlinableIntLiteralV216(Val); 2922 2923 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2924 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2925 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2926 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2927 2928 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2929 } 2930 default: 2931 llvm_unreachable("invalid operand size"); 2932 } 2933 } 2934 2935 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2936 if (!isGFX10()) 2937 return 1; 2938 2939 switch (Opcode) { 2940 // 64-bit shift instructions can use only one scalar value input 2941 case AMDGPU::V_LSHLREV_B64: 2942 case AMDGPU::V_LSHLREV_B64_gfx10: 2943 case AMDGPU::V_LSHL_B64: 2944 case AMDGPU::V_LSHRREV_B64: 2945 case AMDGPU::V_LSHRREV_B64_gfx10: 2946 case AMDGPU::V_LSHR_B64: 2947 case AMDGPU::V_ASHRREV_I64: 2948 case AMDGPU::V_ASHRREV_I64_gfx10: 2949 case AMDGPU::V_ASHR_I64: 2950 return 1; 2951 default: 2952 return 2; 2953 } 2954 } 2955 2956 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2957 const MCOperand &MO = Inst.getOperand(OpIdx); 2958 if (MO.isImm()) { 2959 return !isInlineConstant(Inst, OpIdx); 2960 } else if (MO.isReg()) { 2961 auto Reg = MO.getReg(); 2962 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2963 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2964 } else { 2965 return true; 2966 } 2967 } 2968 2969 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2970 const unsigned Opcode = Inst.getOpcode(); 2971 const MCInstrDesc &Desc = MII.get(Opcode); 2972 unsigned ConstantBusUseCount = 0; 2973 unsigned NumLiterals = 0; 2974 unsigned LiteralSize; 2975 2976 if (Desc.TSFlags & 2977 (SIInstrFlags::VOPC | 2978 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2979 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2980 SIInstrFlags::SDWA)) { 2981 // Check special imm operands (used by madmk, etc) 2982 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2983 ++ConstantBusUseCount; 2984 } 2985 2986 SmallDenseSet<unsigned> SGPRsUsed; 2987 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2988 if (SGPRUsed != AMDGPU::NoRegister) { 2989 SGPRsUsed.insert(SGPRUsed); 2990 ++ConstantBusUseCount; 2991 } 2992 2993 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2994 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2995 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2996 2997 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2998 2999 for (int OpIdx : OpIndices) { 3000 if (OpIdx == -1) break; 3001 3002 const MCOperand &MO = Inst.getOperand(OpIdx); 3003 if (usesConstantBus(Inst, OpIdx)) { 3004 if (MO.isReg()) { 3005 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3006 // Pairs of registers with a partial intersections like these 3007 // s0, s[0:1] 3008 // flat_scratch_lo, flat_scratch 3009 // flat_scratch_lo, flat_scratch_hi 3010 // are theoretically valid but they are disabled anyway. 3011 // Note that this code mimics SIInstrInfo::verifyInstruction 3012 if (!SGPRsUsed.count(Reg)) { 3013 SGPRsUsed.insert(Reg); 3014 ++ConstantBusUseCount; 3015 } 3016 } else { // Expression or a literal 3017 3018 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3019 continue; // special operand like VINTERP attr_chan 3020 3021 // An instruction may use only one literal. 3022 // This has been validated on the previous step. 3023 // See validateVOP3Literal. 3024 // This literal may be used as more than one operand. 3025 // If all these operands are of the same size, 3026 // this literal counts as one scalar value. 3027 // Otherwise it counts as 2 scalar values. 3028 // See "GFX10 Shader Programming", section 3.6.2.3. 3029 3030 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3031 if (Size < 4) Size = 4; 3032 3033 if (NumLiterals == 0) { 3034 NumLiterals = 1; 3035 LiteralSize = Size; 3036 } else if (LiteralSize != Size) { 3037 NumLiterals = 2; 3038 } 3039 } 3040 } 3041 } 3042 } 3043 ConstantBusUseCount += NumLiterals; 3044 3045 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3046 } 3047 3048 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3049 const unsigned Opcode = Inst.getOpcode(); 3050 const MCInstrDesc &Desc = MII.get(Opcode); 3051 3052 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3053 if (DstIdx == -1 || 3054 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3055 return true; 3056 } 3057 3058 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3059 3060 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3061 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3062 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3063 3064 assert(DstIdx != -1); 3065 const MCOperand &Dst = Inst.getOperand(DstIdx); 3066 assert(Dst.isReg()); 3067 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3068 3069 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3070 3071 for (int SrcIdx : SrcIndices) { 3072 if (SrcIdx == -1) break; 3073 const MCOperand &Src = Inst.getOperand(SrcIdx); 3074 if (Src.isReg()) { 3075 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3076 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3077 return false; 3078 } 3079 } 3080 } 3081 3082 return true; 3083 } 3084 3085 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3086 3087 const unsigned Opc = Inst.getOpcode(); 3088 const MCInstrDesc &Desc = MII.get(Opc); 3089 3090 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3091 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3092 assert(ClampIdx != -1); 3093 return Inst.getOperand(ClampIdx).getImm() == 0; 3094 } 3095 3096 return true; 3097 } 3098 3099 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3100 3101 const unsigned Opc = Inst.getOpcode(); 3102 const MCInstrDesc &Desc = MII.get(Opc); 3103 3104 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3105 return true; 3106 3107 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3108 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3109 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3110 3111 assert(VDataIdx != -1); 3112 assert(DMaskIdx != -1); 3113 assert(TFEIdx != -1); 3114 3115 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3116 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3117 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3118 if (DMask == 0) 3119 DMask = 1; 3120 3121 unsigned DataSize = 3122 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3123 if (hasPackedD16()) { 3124 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3125 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3126 DataSize = (DataSize + 1) / 2; 3127 } 3128 3129 return (VDataSize / 4) == DataSize + TFESize; 3130 } 3131 3132 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3133 const unsigned Opc = Inst.getOpcode(); 3134 const MCInstrDesc &Desc = MII.get(Opc); 3135 3136 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3137 return true; 3138 3139 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3140 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3141 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3142 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3143 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3144 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3145 3146 assert(VAddr0Idx != -1); 3147 assert(SrsrcIdx != -1); 3148 assert(DimIdx != -1); 3149 assert(SrsrcIdx > VAddr0Idx); 3150 3151 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3152 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3153 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3154 unsigned VAddrSize = 3155 IsNSA ? SrsrcIdx - VAddr0Idx 3156 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3157 3158 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3159 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3160 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3161 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3162 if (!IsNSA) { 3163 if (AddrSize > 8) 3164 AddrSize = 16; 3165 else if (AddrSize > 4) 3166 AddrSize = 8; 3167 } 3168 3169 return VAddrSize == AddrSize; 3170 } 3171 3172 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3173 3174 const unsigned Opc = Inst.getOpcode(); 3175 const MCInstrDesc &Desc = MII.get(Opc); 3176 3177 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3178 return true; 3179 if (!Desc.mayLoad() || !Desc.mayStore()) 3180 return true; // Not atomic 3181 3182 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3183 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3184 3185 // This is an incomplete check because image_atomic_cmpswap 3186 // may only use 0x3 and 0xf while other atomic operations 3187 // may use 0x1 and 0x3. However these limitations are 3188 // verified when we check that dmask matches dst size. 3189 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3190 } 3191 3192 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3193 3194 const unsigned Opc = Inst.getOpcode(); 3195 const MCInstrDesc &Desc = MII.get(Opc); 3196 3197 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3198 return true; 3199 3200 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3201 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3202 3203 // GATHER4 instructions use dmask in a different fashion compared to 3204 // other MIMG instructions. The only useful DMASK values are 3205 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3206 // (red,red,red,red) etc.) The ISA document doesn't mention 3207 // this. 3208 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3209 } 3210 3211 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3212 { 3213 switch (Opcode) { 3214 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3215 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3216 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3217 return true; 3218 default: 3219 return false; 3220 } 3221 } 3222 3223 // movrels* opcodes should only allow VGPRS as src0. 3224 // This is specified in .td description for vop1/vop3, 3225 // but sdwa is handled differently. See isSDWAOperand. 3226 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3227 3228 const unsigned Opc = Inst.getOpcode(); 3229 const MCInstrDesc &Desc = MII.get(Opc); 3230 3231 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3232 return true; 3233 3234 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3235 assert(Src0Idx != -1); 3236 3237 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3238 if (!Src0.isReg()) 3239 return false; 3240 3241 auto Reg = Src0.getReg(); 3242 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3243 return !isSGPR(mc2PseudoReg(Reg), TRI); 3244 } 3245 3246 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3247 3248 const unsigned Opc = Inst.getOpcode(); 3249 3250 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3251 return true; 3252 3253 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3254 assert(Src0Idx != -1); 3255 3256 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3257 if (!Src0.isReg()) 3258 return true; 3259 3260 auto Reg = Src0.getReg(); 3261 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3262 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3263 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3264 return false; 3265 } 3266 3267 return true; 3268 } 3269 3270 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3271 3272 const unsigned Opc = Inst.getOpcode(); 3273 const MCInstrDesc &Desc = MII.get(Opc); 3274 3275 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3276 return true; 3277 3278 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3279 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3280 if (isCI() || isSI()) 3281 return false; 3282 } 3283 3284 return true; 3285 } 3286 3287 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3288 const unsigned Opc = Inst.getOpcode(); 3289 const MCInstrDesc &Desc = MII.get(Opc); 3290 3291 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3292 return true; 3293 3294 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3295 if (DimIdx < 0) 3296 return true; 3297 3298 long Imm = Inst.getOperand(DimIdx).getImm(); 3299 if (Imm < 0 || Imm >= 8) 3300 return false; 3301 3302 return true; 3303 } 3304 3305 static bool IsRevOpcode(const unsigned Opcode) 3306 { 3307 switch (Opcode) { 3308 case AMDGPU::V_SUBREV_F32_e32: 3309 case AMDGPU::V_SUBREV_F32_e64: 3310 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3311 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3312 case AMDGPU::V_SUBREV_F32_e32_vi: 3313 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3314 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3315 case AMDGPU::V_SUBREV_F32_e64_vi: 3316 3317 case AMDGPU::V_SUBREV_CO_U32_e32: 3318 case AMDGPU::V_SUBREV_CO_U32_e64: 3319 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3320 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3321 3322 case AMDGPU::V_SUBBREV_U32_e32: 3323 case AMDGPU::V_SUBBREV_U32_e64: 3324 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3325 case AMDGPU::V_SUBBREV_U32_e32_vi: 3326 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3327 case AMDGPU::V_SUBBREV_U32_e64_vi: 3328 3329 case AMDGPU::V_SUBREV_U32_e32: 3330 case AMDGPU::V_SUBREV_U32_e64: 3331 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3332 case AMDGPU::V_SUBREV_U32_e32_vi: 3333 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3334 case AMDGPU::V_SUBREV_U32_e64_vi: 3335 3336 case AMDGPU::V_SUBREV_F16_e32: 3337 case AMDGPU::V_SUBREV_F16_e64: 3338 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3339 case AMDGPU::V_SUBREV_F16_e32_vi: 3340 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3341 case AMDGPU::V_SUBREV_F16_e64_vi: 3342 3343 case AMDGPU::V_SUBREV_U16_e32: 3344 case AMDGPU::V_SUBREV_U16_e64: 3345 case AMDGPU::V_SUBREV_U16_e32_vi: 3346 case AMDGPU::V_SUBREV_U16_e64_vi: 3347 3348 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3349 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3350 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3351 3352 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3353 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3354 3355 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3356 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3357 3358 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3359 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3360 3361 case AMDGPU::V_LSHRREV_B32_e32: 3362 case AMDGPU::V_LSHRREV_B32_e64: 3363 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3364 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3365 case AMDGPU::V_LSHRREV_B32_e32_vi: 3366 case AMDGPU::V_LSHRREV_B32_e64_vi: 3367 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3368 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3369 3370 case AMDGPU::V_ASHRREV_I32_e32: 3371 case AMDGPU::V_ASHRREV_I32_e64: 3372 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3373 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3374 case AMDGPU::V_ASHRREV_I32_e32_vi: 3375 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3376 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3377 case AMDGPU::V_ASHRREV_I32_e64_vi: 3378 3379 case AMDGPU::V_LSHLREV_B32_e32: 3380 case AMDGPU::V_LSHLREV_B32_e64: 3381 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3382 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3383 case AMDGPU::V_LSHLREV_B32_e32_vi: 3384 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3385 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3386 case AMDGPU::V_LSHLREV_B32_e64_vi: 3387 3388 case AMDGPU::V_LSHLREV_B16_e32: 3389 case AMDGPU::V_LSHLREV_B16_e64: 3390 case AMDGPU::V_LSHLREV_B16_e32_vi: 3391 case AMDGPU::V_LSHLREV_B16_e64_vi: 3392 case AMDGPU::V_LSHLREV_B16_gfx10: 3393 3394 case AMDGPU::V_LSHRREV_B16_e32: 3395 case AMDGPU::V_LSHRREV_B16_e64: 3396 case AMDGPU::V_LSHRREV_B16_e32_vi: 3397 case AMDGPU::V_LSHRREV_B16_e64_vi: 3398 case AMDGPU::V_LSHRREV_B16_gfx10: 3399 3400 case AMDGPU::V_ASHRREV_I16_e32: 3401 case AMDGPU::V_ASHRREV_I16_e64: 3402 case AMDGPU::V_ASHRREV_I16_e32_vi: 3403 case AMDGPU::V_ASHRREV_I16_e64_vi: 3404 case AMDGPU::V_ASHRREV_I16_gfx10: 3405 3406 case AMDGPU::V_LSHLREV_B64: 3407 case AMDGPU::V_LSHLREV_B64_gfx10: 3408 case AMDGPU::V_LSHLREV_B64_vi: 3409 3410 case AMDGPU::V_LSHRREV_B64: 3411 case AMDGPU::V_LSHRREV_B64_gfx10: 3412 case AMDGPU::V_LSHRREV_B64_vi: 3413 3414 case AMDGPU::V_ASHRREV_I64: 3415 case AMDGPU::V_ASHRREV_I64_gfx10: 3416 case AMDGPU::V_ASHRREV_I64_vi: 3417 3418 case AMDGPU::V_PK_LSHLREV_B16: 3419 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3420 case AMDGPU::V_PK_LSHLREV_B16_vi: 3421 3422 case AMDGPU::V_PK_LSHRREV_B16: 3423 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3424 case AMDGPU::V_PK_LSHRREV_B16_vi: 3425 case AMDGPU::V_PK_ASHRREV_I16: 3426 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3427 case AMDGPU::V_PK_ASHRREV_I16_vi: 3428 return true; 3429 default: 3430 return false; 3431 } 3432 } 3433 3434 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3435 3436 using namespace SIInstrFlags; 3437 const unsigned Opcode = Inst.getOpcode(); 3438 const MCInstrDesc &Desc = MII.get(Opcode); 3439 3440 // lds_direct register is defined so that it can be used 3441 // with 9-bit operands only. Ignore encodings which do not accept these. 3442 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3443 return true; 3444 3445 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3446 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3447 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3448 3449 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3450 3451 // lds_direct cannot be specified as either src1 or src2. 3452 for (int SrcIdx : SrcIndices) { 3453 if (SrcIdx == -1) break; 3454 const MCOperand &Src = Inst.getOperand(SrcIdx); 3455 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3456 return false; 3457 } 3458 } 3459 3460 if (Src0Idx == -1) 3461 return true; 3462 3463 const MCOperand &Src = Inst.getOperand(Src0Idx); 3464 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3465 return true; 3466 3467 // lds_direct is specified as src0. Check additional limitations. 3468 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3469 } 3470 3471 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3472 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3473 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3474 if (Op.isFlatOffset()) 3475 return Op.getStartLoc(); 3476 } 3477 return getLoc(); 3478 } 3479 3480 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3481 const OperandVector &Operands) { 3482 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3483 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3484 return true; 3485 3486 auto Opcode = Inst.getOpcode(); 3487 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3488 assert(OpNum != -1); 3489 3490 const auto &Op = Inst.getOperand(OpNum); 3491 if (!hasFlatOffsets() && Op.getImm() != 0) { 3492 Error(getFlatOffsetLoc(Operands), 3493 "flat offset modifier is not supported on this GPU"); 3494 return false; 3495 } 3496 3497 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3498 // For FLAT segment the offset must be positive; 3499 // MSB is ignored and forced to zero. 3500 unsigned OffsetSize = isGFX9() ? 13 : 12; 3501 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3502 if (!isIntN(OffsetSize, Op.getImm())) { 3503 Error(getFlatOffsetLoc(Operands), 3504 isGFX9() ? "expected a 13-bit signed offset" : 3505 "expected a 12-bit signed offset"); 3506 return false; 3507 } 3508 } else { 3509 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3510 Error(getFlatOffsetLoc(Operands), 3511 isGFX9() ? "expected a 12-bit unsigned offset" : 3512 "expected an 11-bit unsigned offset"); 3513 return false; 3514 } 3515 } 3516 3517 return true; 3518 } 3519 3520 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3521 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3522 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3523 if (Op.isSMEMOffset()) 3524 return Op.getStartLoc(); 3525 } 3526 return getLoc(); 3527 } 3528 3529 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3530 const OperandVector &Operands) { 3531 if (isCI() || isSI()) 3532 return true; 3533 3534 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3535 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3536 return true; 3537 3538 auto Opcode = Inst.getOpcode(); 3539 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3540 if (OpNum == -1) 3541 return true; 3542 3543 const auto &Op = Inst.getOperand(OpNum); 3544 if (!Op.isImm()) 3545 return true; 3546 3547 uint64_t Offset = Op.getImm(); 3548 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3549 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3550 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3551 return true; 3552 3553 Error(getSMEMOffsetLoc(Operands), 3554 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3555 "expected a 21-bit signed offset"); 3556 3557 return false; 3558 } 3559 3560 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3561 unsigned Opcode = Inst.getOpcode(); 3562 const MCInstrDesc &Desc = MII.get(Opcode); 3563 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3564 return true; 3565 3566 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3567 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3568 3569 const int OpIndices[] = { Src0Idx, Src1Idx }; 3570 3571 unsigned NumExprs = 0; 3572 unsigned NumLiterals = 0; 3573 uint32_t LiteralValue; 3574 3575 for (int OpIdx : OpIndices) { 3576 if (OpIdx == -1) break; 3577 3578 const MCOperand &MO = Inst.getOperand(OpIdx); 3579 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3580 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3581 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3582 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3583 if (NumLiterals == 0 || LiteralValue != Value) { 3584 LiteralValue = Value; 3585 ++NumLiterals; 3586 } 3587 } else if (MO.isExpr()) { 3588 ++NumExprs; 3589 } 3590 } 3591 } 3592 3593 return NumLiterals + NumExprs <= 1; 3594 } 3595 3596 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3597 const unsigned Opc = Inst.getOpcode(); 3598 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3599 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3600 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3601 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3602 3603 if (OpSel & ~3) 3604 return false; 3605 } 3606 return true; 3607 } 3608 3609 // Check if VCC register matches wavefront size 3610 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3611 auto FB = getFeatureBits(); 3612 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3613 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3614 } 3615 3616 // VOP3 literal is only allowed in GFX10+ and only one can be used 3617 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3618 unsigned Opcode = Inst.getOpcode(); 3619 const MCInstrDesc &Desc = MII.get(Opcode); 3620 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3621 return true; 3622 3623 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3624 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3625 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3626 3627 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3628 3629 unsigned NumExprs = 0; 3630 unsigned NumLiterals = 0; 3631 uint32_t LiteralValue; 3632 3633 for (int OpIdx : OpIndices) { 3634 if (OpIdx == -1) break; 3635 3636 const MCOperand &MO = Inst.getOperand(OpIdx); 3637 if (!MO.isImm() && !MO.isExpr()) 3638 continue; 3639 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3640 continue; 3641 3642 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3643 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3644 return false; 3645 3646 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3647 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3648 if (NumLiterals == 0 || LiteralValue != Value) { 3649 LiteralValue = Value; 3650 ++NumLiterals; 3651 } 3652 } else if (MO.isExpr()) { 3653 ++NumExprs; 3654 } 3655 } 3656 NumLiterals += NumExprs; 3657 3658 return !NumLiterals || 3659 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3660 } 3661 3662 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3663 const SMLoc &IDLoc, 3664 const OperandVector &Operands) { 3665 if (!validateLdsDirect(Inst)) { 3666 Error(IDLoc, 3667 "invalid use of lds_direct"); 3668 return false; 3669 } 3670 if (!validateSOPLiteral(Inst)) { 3671 Error(IDLoc, 3672 "only one literal operand is allowed"); 3673 return false; 3674 } 3675 if (!validateVOP3Literal(Inst)) { 3676 Error(IDLoc, 3677 "invalid literal operand"); 3678 return false; 3679 } 3680 if (!validateConstantBusLimitations(Inst)) { 3681 Error(IDLoc, 3682 "invalid operand (violates constant bus restrictions)"); 3683 return false; 3684 } 3685 if (!validateEarlyClobberLimitations(Inst)) { 3686 Error(IDLoc, 3687 "destination must be different than all sources"); 3688 return false; 3689 } 3690 if (!validateIntClampSupported(Inst)) { 3691 Error(IDLoc, 3692 "integer clamping is not supported on this GPU"); 3693 return false; 3694 } 3695 if (!validateOpSel(Inst)) { 3696 Error(IDLoc, 3697 "invalid op_sel operand"); 3698 return false; 3699 } 3700 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3701 if (!validateMIMGD16(Inst)) { 3702 Error(IDLoc, 3703 "d16 modifier is not supported on this GPU"); 3704 return false; 3705 } 3706 if (!validateMIMGDim(Inst)) { 3707 Error(IDLoc, "dim modifier is required on this GPU"); 3708 return false; 3709 } 3710 if (!validateMIMGDataSize(Inst)) { 3711 Error(IDLoc, 3712 "image data size does not match dmask and tfe"); 3713 return false; 3714 } 3715 if (!validateMIMGAddrSize(Inst)) { 3716 Error(IDLoc, 3717 "image address size does not match dim and a16"); 3718 return false; 3719 } 3720 if (!validateMIMGAtomicDMask(Inst)) { 3721 Error(IDLoc, 3722 "invalid atomic image dmask"); 3723 return false; 3724 } 3725 if (!validateMIMGGatherDMask(Inst)) { 3726 Error(IDLoc, 3727 "invalid image_gather dmask: only one bit must be set"); 3728 return false; 3729 } 3730 if (!validateMovrels(Inst)) { 3731 Error(IDLoc, "source operand must be a VGPR"); 3732 return false; 3733 } 3734 if (!validateFlatOffset(Inst, Operands)) { 3735 return false; 3736 } 3737 if (!validateSMEMOffset(Inst, Operands)) { 3738 return false; 3739 } 3740 if (!validateMAIAccWrite(Inst)) { 3741 return false; 3742 } 3743 3744 return true; 3745 } 3746 3747 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3748 const FeatureBitset &FBS, 3749 unsigned VariantID = 0); 3750 3751 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3752 OperandVector &Operands, 3753 MCStreamer &Out, 3754 uint64_t &ErrorInfo, 3755 bool MatchingInlineAsm) { 3756 MCInst Inst; 3757 unsigned Result = Match_Success; 3758 for (auto Variant : getMatchedVariants()) { 3759 uint64_t EI; 3760 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3761 Variant); 3762 // We order match statuses from least to most specific. We use most specific 3763 // status as resulting 3764 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3765 if ((R == Match_Success) || 3766 (R == Match_PreferE32) || 3767 (R == Match_MissingFeature && Result != Match_PreferE32) || 3768 (R == Match_InvalidOperand && Result != Match_MissingFeature 3769 && Result != Match_PreferE32) || 3770 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3771 && Result != Match_MissingFeature 3772 && Result != Match_PreferE32)) { 3773 Result = R; 3774 ErrorInfo = EI; 3775 } 3776 if (R == Match_Success) 3777 break; 3778 } 3779 3780 switch (Result) { 3781 default: break; 3782 case Match_Success: 3783 if (!validateInstruction(Inst, IDLoc, Operands)) { 3784 return true; 3785 } 3786 Inst.setLoc(IDLoc); 3787 Out.emitInstruction(Inst, getSTI()); 3788 return false; 3789 3790 case Match_MissingFeature: 3791 return Error(IDLoc, "instruction not supported on this GPU"); 3792 3793 case Match_MnemonicFail: { 3794 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3795 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3796 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3797 return Error(IDLoc, "invalid instruction" + Suggestion, 3798 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3799 } 3800 3801 case Match_InvalidOperand: { 3802 SMLoc ErrorLoc = IDLoc; 3803 if (ErrorInfo != ~0ULL) { 3804 if (ErrorInfo >= Operands.size()) { 3805 return Error(IDLoc, "too few operands for instruction"); 3806 } 3807 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3808 if (ErrorLoc == SMLoc()) 3809 ErrorLoc = IDLoc; 3810 } 3811 return Error(ErrorLoc, "invalid operand for instruction"); 3812 } 3813 3814 case Match_PreferE32: 3815 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3816 "should be encoded as e32"); 3817 } 3818 llvm_unreachable("Implement any new match types added!"); 3819 } 3820 3821 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3822 int64_t Tmp = -1; 3823 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3824 return true; 3825 } 3826 if (getParser().parseAbsoluteExpression(Tmp)) { 3827 return true; 3828 } 3829 Ret = static_cast<uint32_t>(Tmp); 3830 return false; 3831 } 3832 3833 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3834 uint32_t &Minor) { 3835 if (ParseAsAbsoluteExpression(Major)) 3836 return TokError("invalid major version"); 3837 3838 if (getLexer().isNot(AsmToken::Comma)) 3839 return TokError("minor version number required, comma expected"); 3840 Lex(); 3841 3842 if (ParseAsAbsoluteExpression(Minor)) 3843 return TokError("invalid minor version"); 3844 3845 return false; 3846 } 3847 3848 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3849 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3850 return TokError("directive only supported for amdgcn architecture"); 3851 3852 std::string Target; 3853 3854 SMLoc TargetStart = getTok().getLoc(); 3855 if (getParser().parseEscapedString(Target)) 3856 return true; 3857 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3858 3859 std::string ExpectedTarget; 3860 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3861 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3862 3863 if (Target != ExpectedTargetOS.str()) 3864 return getParser().Error(TargetRange.Start, "target must match options", 3865 TargetRange); 3866 3867 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3868 return false; 3869 } 3870 3871 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3872 return getParser().Error(Range.Start, "value out of range", Range); 3873 } 3874 3875 bool AMDGPUAsmParser::calculateGPRBlocks( 3876 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3877 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3878 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3879 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3880 // TODO(scott.linder): These calculations are duplicated from 3881 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3882 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3883 3884 unsigned NumVGPRs = NextFreeVGPR; 3885 unsigned NumSGPRs = NextFreeSGPR; 3886 3887 if (Version.Major >= 10) 3888 NumSGPRs = 0; 3889 else { 3890 unsigned MaxAddressableNumSGPRs = 3891 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3892 3893 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3894 NumSGPRs > MaxAddressableNumSGPRs) 3895 return OutOfRangeError(SGPRRange); 3896 3897 NumSGPRs += 3898 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3899 3900 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3901 NumSGPRs > MaxAddressableNumSGPRs) 3902 return OutOfRangeError(SGPRRange); 3903 3904 if (Features.test(FeatureSGPRInitBug)) 3905 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3906 } 3907 3908 VGPRBlocks = 3909 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3910 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3911 3912 return false; 3913 } 3914 3915 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3916 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3917 return TokError("directive only supported for amdgcn architecture"); 3918 3919 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3920 return TokError("directive only supported for amdhsa OS"); 3921 3922 StringRef KernelName; 3923 if (getParser().parseIdentifier(KernelName)) 3924 return true; 3925 3926 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3927 3928 StringSet<> Seen; 3929 3930 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3931 3932 SMRange VGPRRange; 3933 uint64_t NextFreeVGPR = 0; 3934 SMRange SGPRRange; 3935 uint64_t NextFreeSGPR = 0; 3936 unsigned UserSGPRCount = 0; 3937 bool ReserveVCC = true; 3938 bool ReserveFlatScr = true; 3939 bool ReserveXNACK = hasXNACK(); 3940 Optional<bool> EnableWavefrontSize32; 3941 3942 while (true) { 3943 while (getLexer().is(AsmToken::EndOfStatement)) 3944 Lex(); 3945 3946 if (getLexer().isNot(AsmToken::Identifier)) 3947 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3948 3949 StringRef ID = getTok().getIdentifier(); 3950 SMRange IDRange = getTok().getLocRange(); 3951 Lex(); 3952 3953 if (ID == ".end_amdhsa_kernel") 3954 break; 3955 3956 if (Seen.find(ID) != Seen.end()) 3957 return TokError(".amdhsa_ directives cannot be repeated"); 3958 Seen.insert(ID); 3959 3960 SMLoc ValStart = getTok().getLoc(); 3961 int64_t IVal; 3962 if (getParser().parseAbsoluteExpression(IVal)) 3963 return true; 3964 SMLoc ValEnd = getTok().getLoc(); 3965 SMRange ValRange = SMRange(ValStart, ValEnd); 3966 3967 if (IVal < 0) 3968 return OutOfRangeError(ValRange); 3969 3970 uint64_t Val = IVal; 3971 3972 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3973 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3974 return OutOfRangeError(RANGE); \ 3975 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3976 3977 if (ID == ".amdhsa_group_segment_fixed_size") { 3978 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3979 return OutOfRangeError(ValRange); 3980 KD.group_segment_fixed_size = Val; 3981 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3982 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3983 return OutOfRangeError(ValRange); 3984 KD.private_segment_fixed_size = Val; 3985 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3986 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3987 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3988 Val, ValRange); 3989 if (Val) 3990 UserSGPRCount += 4; 3991 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3992 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3993 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3994 ValRange); 3995 if (Val) 3996 UserSGPRCount += 2; 3997 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3998 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3999 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4000 ValRange); 4001 if (Val) 4002 UserSGPRCount += 2; 4003 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4004 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4005 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4006 Val, ValRange); 4007 if (Val) 4008 UserSGPRCount += 2; 4009 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4010 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4011 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4012 ValRange); 4013 if (Val) 4014 UserSGPRCount += 2; 4015 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4016 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4017 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4018 ValRange); 4019 if (Val) 4020 UserSGPRCount += 2; 4021 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4022 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4023 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4024 Val, ValRange); 4025 if (Val) 4026 UserSGPRCount += 1; 4027 } else if (ID == ".amdhsa_wavefront_size32") { 4028 if (IVersion.Major < 10) 4029 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4030 IDRange); 4031 EnableWavefrontSize32 = Val; 4032 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4033 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4034 Val, ValRange); 4035 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4036 PARSE_BITS_ENTRY( 4037 KD.compute_pgm_rsrc2, 4038 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4039 ValRange); 4040 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4041 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4042 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4043 ValRange); 4044 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4045 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4046 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4047 ValRange); 4048 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4049 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4050 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4051 ValRange); 4052 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4053 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4054 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4055 ValRange); 4056 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4057 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4058 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4059 ValRange); 4060 } else if (ID == ".amdhsa_next_free_vgpr") { 4061 VGPRRange = ValRange; 4062 NextFreeVGPR = Val; 4063 } else if (ID == ".amdhsa_next_free_sgpr") { 4064 SGPRRange = ValRange; 4065 NextFreeSGPR = Val; 4066 } else if (ID == ".amdhsa_reserve_vcc") { 4067 if (!isUInt<1>(Val)) 4068 return OutOfRangeError(ValRange); 4069 ReserveVCC = Val; 4070 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4071 if (IVersion.Major < 7) 4072 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4073 IDRange); 4074 if (!isUInt<1>(Val)) 4075 return OutOfRangeError(ValRange); 4076 ReserveFlatScr = Val; 4077 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4078 if (IVersion.Major < 8) 4079 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4080 IDRange); 4081 if (!isUInt<1>(Val)) 4082 return OutOfRangeError(ValRange); 4083 ReserveXNACK = Val; 4084 } else if (ID == ".amdhsa_float_round_mode_32") { 4085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4086 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4087 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4089 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4090 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4091 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4092 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4093 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4094 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4095 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4096 ValRange); 4097 } else if (ID == ".amdhsa_dx10_clamp") { 4098 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4099 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4100 } else if (ID == ".amdhsa_ieee_mode") { 4101 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4102 Val, ValRange); 4103 } else if (ID == ".amdhsa_fp16_overflow") { 4104 if (IVersion.Major < 9) 4105 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4106 IDRange); 4107 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4108 ValRange); 4109 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4110 if (IVersion.Major < 10) 4111 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4112 IDRange); 4113 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4114 ValRange); 4115 } else if (ID == ".amdhsa_memory_ordered") { 4116 if (IVersion.Major < 10) 4117 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4118 IDRange); 4119 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4120 ValRange); 4121 } else if (ID == ".amdhsa_forward_progress") { 4122 if (IVersion.Major < 10) 4123 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4124 IDRange); 4125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4126 ValRange); 4127 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4128 PARSE_BITS_ENTRY( 4129 KD.compute_pgm_rsrc2, 4130 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4131 ValRange); 4132 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4133 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4134 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4135 Val, ValRange); 4136 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4137 PARSE_BITS_ENTRY( 4138 KD.compute_pgm_rsrc2, 4139 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4140 ValRange); 4141 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4142 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4144 Val, ValRange); 4145 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4147 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4148 Val, ValRange); 4149 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4151 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4152 Val, ValRange); 4153 } else if (ID == ".amdhsa_exception_int_div_zero") { 4154 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4155 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4156 Val, ValRange); 4157 } else { 4158 return getParser().Error(IDRange.Start, 4159 "unknown .amdhsa_kernel directive", IDRange); 4160 } 4161 4162 #undef PARSE_BITS_ENTRY 4163 } 4164 4165 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4166 return TokError(".amdhsa_next_free_vgpr directive is required"); 4167 4168 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4169 return TokError(".amdhsa_next_free_sgpr directive is required"); 4170 4171 unsigned VGPRBlocks; 4172 unsigned SGPRBlocks; 4173 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4174 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4175 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4176 SGPRBlocks)) 4177 return true; 4178 4179 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4180 VGPRBlocks)) 4181 return OutOfRangeError(VGPRRange); 4182 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4183 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4184 4185 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4186 SGPRBlocks)) 4187 return OutOfRangeError(SGPRRange); 4188 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4189 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4190 SGPRBlocks); 4191 4192 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4193 return TokError("too many user SGPRs enabled"); 4194 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4195 UserSGPRCount); 4196 4197 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4198 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4199 ReserveFlatScr, ReserveXNACK); 4200 return false; 4201 } 4202 4203 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4204 uint32_t Major; 4205 uint32_t Minor; 4206 4207 if (ParseDirectiveMajorMinor(Major, Minor)) 4208 return true; 4209 4210 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4211 return false; 4212 } 4213 4214 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4215 uint32_t Major; 4216 uint32_t Minor; 4217 uint32_t Stepping; 4218 StringRef VendorName; 4219 StringRef ArchName; 4220 4221 // If this directive has no arguments, then use the ISA version for the 4222 // targeted GPU. 4223 if (getLexer().is(AsmToken::EndOfStatement)) { 4224 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4225 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4226 ISA.Stepping, 4227 "AMD", "AMDGPU"); 4228 return false; 4229 } 4230 4231 if (ParseDirectiveMajorMinor(Major, Minor)) 4232 return true; 4233 4234 if (getLexer().isNot(AsmToken::Comma)) 4235 return TokError("stepping version number required, comma expected"); 4236 Lex(); 4237 4238 if (ParseAsAbsoluteExpression(Stepping)) 4239 return TokError("invalid stepping version"); 4240 4241 if (getLexer().isNot(AsmToken::Comma)) 4242 return TokError("vendor name required, comma expected"); 4243 Lex(); 4244 4245 if (getLexer().isNot(AsmToken::String)) 4246 return TokError("invalid vendor name"); 4247 4248 VendorName = getLexer().getTok().getStringContents(); 4249 Lex(); 4250 4251 if (getLexer().isNot(AsmToken::Comma)) 4252 return TokError("arch name required, comma expected"); 4253 Lex(); 4254 4255 if (getLexer().isNot(AsmToken::String)) 4256 return TokError("invalid arch name"); 4257 4258 ArchName = getLexer().getTok().getStringContents(); 4259 Lex(); 4260 4261 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4262 VendorName, ArchName); 4263 return false; 4264 } 4265 4266 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4267 amd_kernel_code_t &Header) { 4268 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4269 // assembly for backwards compatibility. 4270 if (ID == "max_scratch_backing_memory_byte_size") { 4271 Parser.eatToEndOfStatement(); 4272 return false; 4273 } 4274 4275 SmallString<40> ErrStr; 4276 raw_svector_ostream Err(ErrStr); 4277 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4278 return TokError(Err.str()); 4279 } 4280 Lex(); 4281 4282 if (ID == "enable_wavefront_size32") { 4283 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4284 if (!isGFX10()) 4285 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4286 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4287 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4288 } else { 4289 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4290 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4291 } 4292 } 4293 4294 if (ID == "wavefront_size") { 4295 if (Header.wavefront_size == 5) { 4296 if (!isGFX10()) 4297 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4298 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4299 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4300 } else if (Header.wavefront_size == 6) { 4301 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4302 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4303 } 4304 } 4305 4306 if (ID == "enable_wgp_mode") { 4307 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4308 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4309 } 4310 4311 if (ID == "enable_mem_ordered") { 4312 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4313 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4314 } 4315 4316 if (ID == "enable_fwd_progress") { 4317 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4318 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4319 } 4320 4321 return false; 4322 } 4323 4324 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4325 amd_kernel_code_t Header; 4326 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4327 4328 while (true) { 4329 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4330 // will set the current token to EndOfStatement. 4331 while(getLexer().is(AsmToken::EndOfStatement)) 4332 Lex(); 4333 4334 if (getLexer().isNot(AsmToken::Identifier)) 4335 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4336 4337 StringRef ID = getLexer().getTok().getIdentifier(); 4338 Lex(); 4339 4340 if (ID == ".end_amd_kernel_code_t") 4341 break; 4342 4343 if (ParseAMDKernelCodeTValue(ID, Header)) 4344 return true; 4345 } 4346 4347 getTargetStreamer().EmitAMDKernelCodeT(Header); 4348 4349 return false; 4350 } 4351 4352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4353 if (getLexer().isNot(AsmToken::Identifier)) 4354 return TokError("expected symbol name"); 4355 4356 StringRef KernelName = Parser.getTok().getString(); 4357 4358 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4359 ELF::STT_AMDGPU_HSA_KERNEL); 4360 Lex(); 4361 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4362 KernelScope.initialize(getContext()); 4363 return false; 4364 } 4365 4366 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4367 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4368 return Error(getParser().getTok().getLoc(), 4369 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4370 "architectures"); 4371 } 4372 4373 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4374 4375 std::string ISAVersionStringFromSTI; 4376 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4377 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4378 4379 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4380 return Error(getParser().getTok().getLoc(), 4381 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4382 "arguments specified through the command line"); 4383 } 4384 4385 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4386 Lex(); 4387 4388 return false; 4389 } 4390 4391 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4392 const char *AssemblerDirectiveBegin; 4393 const char *AssemblerDirectiveEnd; 4394 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4395 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4396 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4397 HSAMD::V3::AssemblerDirectiveEnd) 4398 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4399 HSAMD::AssemblerDirectiveEnd); 4400 4401 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4402 return Error(getParser().getTok().getLoc(), 4403 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4404 "not available on non-amdhsa OSes")).str()); 4405 } 4406 4407 std::string HSAMetadataString; 4408 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4409 HSAMetadataString)) 4410 return true; 4411 4412 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4413 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4414 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4415 } else { 4416 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4417 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4418 } 4419 4420 return false; 4421 } 4422 4423 /// Common code to parse out a block of text (typically YAML) between start and 4424 /// end directives. 4425 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4426 const char *AssemblerDirectiveEnd, 4427 std::string &CollectString) { 4428 4429 raw_string_ostream CollectStream(CollectString); 4430 4431 getLexer().setSkipSpace(false); 4432 4433 bool FoundEnd = false; 4434 while (!getLexer().is(AsmToken::Eof)) { 4435 while (getLexer().is(AsmToken::Space)) { 4436 CollectStream << getLexer().getTok().getString(); 4437 Lex(); 4438 } 4439 4440 if (getLexer().is(AsmToken::Identifier)) { 4441 StringRef ID = getLexer().getTok().getIdentifier(); 4442 if (ID == AssemblerDirectiveEnd) { 4443 Lex(); 4444 FoundEnd = true; 4445 break; 4446 } 4447 } 4448 4449 CollectStream << Parser.parseStringToEndOfStatement() 4450 << getContext().getAsmInfo()->getSeparatorString(); 4451 4452 Parser.eatToEndOfStatement(); 4453 } 4454 4455 getLexer().setSkipSpace(true); 4456 4457 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4458 return TokError(Twine("expected directive ") + 4459 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4460 } 4461 4462 CollectStream.flush(); 4463 return false; 4464 } 4465 4466 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4467 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4468 std::string String; 4469 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4470 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4471 return true; 4472 4473 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4474 if (!PALMetadata->setFromString(String)) 4475 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4476 return false; 4477 } 4478 4479 /// Parse the assembler directive for old linear-format PAL metadata. 4480 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4481 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4482 return Error(getParser().getTok().getLoc(), 4483 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4484 "not available on non-amdpal OSes")).str()); 4485 } 4486 4487 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4488 PALMetadata->setLegacy(); 4489 for (;;) { 4490 uint32_t Key, Value; 4491 if (ParseAsAbsoluteExpression(Key)) { 4492 return TokError(Twine("invalid value in ") + 4493 Twine(PALMD::AssemblerDirective)); 4494 } 4495 if (getLexer().isNot(AsmToken::Comma)) { 4496 return TokError(Twine("expected an even number of values in ") + 4497 Twine(PALMD::AssemblerDirective)); 4498 } 4499 Lex(); 4500 if (ParseAsAbsoluteExpression(Value)) { 4501 return TokError(Twine("invalid value in ") + 4502 Twine(PALMD::AssemblerDirective)); 4503 } 4504 PALMetadata->setRegister(Key, Value); 4505 if (getLexer().isNot(AsmToken::Comma)) 4506 break; 4507 Lex(); 4508 } 4509 return false; 4510 } 4511 4512 /// ParseDirectiveAMDGPULDS 4513 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4514 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4515 if (getParser().checkForValidSection()) 4516 return true; 4517 4518 StringRef Name; 4519 SMLoc NameLoc = getLexer().getLoc(); 4520 if (getParser().parseIdentifier(Name)) 4521 return TokError("expected identifier in directive"); 4522 4523 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4524 if (parseToken(AsmToken::Comma, "expected ','")) 4525 return true; 4526 4527 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4528 4529 int64_t Size; 4530 SMLoc SizeLoc = getLexer().getLoc(); 4531 if (getParser().parseAbsoluteExpression(Size)) 4532 return true; 4533 if (Size < 0) 4534 return Error(SizeLoc, "size must be non-negative"); 4535 if (Size > LocalMemorySize) 4536 return Error(SizeLoc, "size is too large"); 4537 4538 int64_t Alignment = 4; 4539 if (getLexer().is(AsmToken::Comma)) { 4540 Lex(); 4541 SMLoc AlignLoc = getLexer().getLoc(); 4542 if (getParser().parseAbsoluteExpression(Alignment)) 4543 return true; 4544 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4545 return Error(AlignLoc, "alignment must be a power of two"); 4546 4547 // Alignment larger than the size of LDS is possible in theory, as long 4548 // as the linker manages to place to symbol at address 0, but we do want 4549 // to make sure the alignment fits nicely into a 32-bit integer. 4550 if (Alignment >= 1u << 31) 4551 return Error(AlignLoc, "alignment is too large"); 4552 } 4553 4554 if (parseToken(AsmToken::EndOfStatement, 4555 "unexpected token in '.amdgpu_lds' directive")) 4556 return true; 4557 4558 Symbol->redefineIfPossible(); 4559 if (!Symbol->isUndefined()) 4560 return Error(NameLoc, "invalid symbol redefinition"); 4561 4562 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4563 return false; 4564 } 4565 4566 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4567 StringRef IDVal = DirectiveID.getString(); 4568 4569 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4570 if (IDVal == ".amdgcn_target") 4571 return ParseDirectiveAMDGCNTarget(); 4572 4573 if (IDVal == ".amdhsa_kernel") 4574 return ParseDirectiveAMDHSAKernel(); 4575 4576 // TODO: Restructure/combine with PAL metadata directive. 4577 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4578 return ParseDirectiveHSAMetadata(); 4579 } else { 4580 if (IDVal == ".hsa_code_object_version") 4581 return ParseDirectiveHSACodeObjectVersion(); 4582 4583 if (IDVal == ".hsa_code_object_isa") 4584 return ParseDirectiveHSACodeObjectISA(); 4585 4586 if (IDVal == ".amd_kernel_code_t") 4587 return ParseDirectiveAMDKernelCodeT(); 4588 4589 if (IDVal == ".amdgpu_hsa_kernel") 4590 return ParseDirectiveAMDGPUHsaKernel(); 4591 4592 if (IDVal == ".amd_amdgpu_isa") 4593 return ParseDirectiveISAVersion(); 4594 4595 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4596 return ParseDirectiveHSAMetadata(); 4597 } 4598 4599 if (IDVal == ".amdgpu_lds") 4600 return ParseDirectiveAMDGPULDS(); 4601 4602 if (IDVal == PALMD::AssemblerDirectiveBegin) 4603 return ParseDirectivePALMetadataBegin(); 4604 4605 if (IDVal == PALMD::AssemblerDirective) 4606 return ParseDirectivePALMetadata(); 4607 4608 return true; 4609 } 4610 4611 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4612 unsigned RegNo) const { 4613 4614 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4615 R.isValid(); ++R) { 4616 if (*R == RegNo) 4617 return isGFX9() || isGFX10(); 4618 } 4619 4620 // GFX10 has 2 more SGPRs 104 and 105. 4621 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4622 R.isValid(); ++R) { 4623 if (*R == RegNo) 4624 return hasSGPR104_SGPR105(); 4625 } 4626 4627 switch (RegNo) { 4628 case AMDGPU::SRC_SHARED_BASE: 4629 case AMDGPU::SRC_SHARED_LIMIT: 4630 case AMDGPU::SRC_PRIVATE_BASE: 4631 case AMDGPU::SRC_PRIVATE_LIMIT: 4632 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4633 return !isCI() && !isSI() && !isVI(); 4634 case AMDGPU::TBA: 4635 case AMDGPU::TBA_LO: 4636 case AMDGPU::TBA_HI: 4637 case AMDGPU::TMA: 4638 case AMDGPU::TMA_LO: 4639 case AMDGPU::TMA_HI: 4640 return !isGFX9() && !isGFX10(); 4641 case AMDGPU::XNACK_MASK: 4642 case AMDGPU::XNACK_MASK_LO: 4643 case AMDGPU::XNACK_MASK_HI: 4644 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4645 case AMDGPU::SGPR_NULL: 4646 return isGFX10(); 4647 default: 4648 break; 4649 } 4650 4651 if (isCI()) 4652 return true; 4653 4654 if (isSI() || isGFX10()) { 4655 // No flat_scr on SI. 4656 // On GFX10 flat scratch is not a valid register operand and can only be 4657 // accessed with s_setreg/s_getreg. 4658 switch (RegNo) { 4659 case AMDGPU::FLAT_SCR: 4660 case AMDGPU::FLAT_SCR_LO: 4661 case AMDGPU::FLAT_SCR_HI: 4662 return false; 4663 default: 4664 return true; 4665 } 4666 } 4667 4668 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4669 // SI/CI have. 4670 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4671 R.isValid(); ++R) { 4672 if (*R == RegNo) 4673 return hasSGPR102_SGPR103(); 4674 } 4675 4676 return true; 4677 } 4678 4679 OperandMatchResultTy 4680 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4681 OperandMode Mode) { 4682 // Try to parse with a custom parser 4683 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4684 4685 // If we successfully parsed the operand or if there as an error parsing, 4686 // we are done. 4687 // 4688 // If we are parsing after we reach EndOfStatement then this means we 4689 // are appending default values to the Operands list. This is only done 4690 // by custom parser, so we shouldn't continue on to the generic parsing. 4691 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4692 getLexer().is(AsmToken::EndOfStatement)) 4693 return ResTy; 4694 4695 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4696 unsigned Prefix = Operands.size(); 4697 SMLoc LBraceLoc = getTok().getLoc(); 4698 Parser.Lex(); // eat the '[' 4699 4700 for (;;) { 4701 ResTy = parseReg(Operands); 4702 if (ResTy != MatchOperand_Success) 4703 return ResTy; 4704 4705 if (getLexer().is(AsmToken::RBrac)) 4706 break; 4707 4708 if (getLexer().isNot(AsmToken::Comma)) 4709 return MatchOperand_ParseFail; 4710 Parser.Lex(); 4711 } 4712 4713 if (Operands.size() - Prefix > 1) { 4714 Operands.insert(Operands.begin() + Prefix, 4715 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4716 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4717 getTok().getLoc())); 4718 } 4719 4720 Parser.Lex(); // eat the ']' 4721 return MatchOperand_Success; 4722 } 4723 4724 return parseRegOrImm(Operands); 4725 } 4726 4727 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4728 // Clear any forced encodings from the previous instruction. 4729 setForcedEncodingSize(0); 4730 setForcedDPP(false); 4731 setForcedSDWA(false); 4732 4733 if (Name.endswith("_e64")) { 4734 setForcedEncodingSize(64); 4735 return Name.substr(0, Name.size() - 4); 4736 } else if (Name.endswith("_e32")) { 4737 setForcedEncodingSize(32); 4738 return Name.substr(0, Name.size() - 4); 4739 } else if (Name.endswith("_dpp")) { 4740 setForcedDPP(true); 4741 return Name.substr(0, Name.size() - 4); 4742 } else if (Name.endswith("_sdwa")) { 4743 setForcedSDWA(true); 4744 return Name.substr(0, Name.size() - 5); 4745 } 4746 return Name; 4747 } 4748 4749 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4750 StringRef Name, 4751 SMLoc NameLoc, OperandVector &Operands) { 4752 // Add the instruction mnemonic 4753 Name = parseMnemonicSuffix(Name); 4754 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4755 4756 bool IsMIMG = Name.startswith("image_"); 4757 4758 while (!getLexer().is(AsmToken::EndOfStatement)) { 4759 OperandMode Mode = OperandMode_Default; 4760 if (IsMIMG && isGFX10() && Operands.size() == 2) 4761 Mode = OperandMode_NSA; 4762 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4763 4764 // Eat the comma or space if there is one. 4765 if (getLexer().is(AsmToken::Comma)) 4766 Parser.Lex(); 4767 4768 if (Res != MatchOperand_Success) { 4769 if (!Parser.hasPendingError()) { 4770 // FIXME: use real operand location rather than the current location. 4771 StringRef Msg = 4772 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4773 "not a valid operand."; 4774 Error(getLexer().getLoc(), Msg); 4775 } 4776 while (!getLexer().is(AsmToken::EndOfStatement)) { 4777 Parser.Lex(); 4778 } 4779 return true; 4780 } 4781 } 4782 4783 return false; 4784 } 4785 4786 //===----------------------------------------------------------------------===// 4787 // Utility functions 4788 //===----------------------------------------------------------------------===// 4789 4790 OperandMatchResultTy 4791 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4792 4793 if (!trySkipId(Prefix, AsmToken::Colon)) 4794 return MatchOperand_NoMatch; 4795 4796 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4797 } 4798 4799 OperandMatchResultTy 4800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4801 AMDGPUOperand::ImmTy ImmTy, 4802 bool (*ConvertResult)(int64_t&)) { 4803 SMLoc S = getLoc(); 4804 int64_t Value = 0; 4805 4806 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4807 if (Res != MatchOperand_Success) 4808 return Res; 4809 4810 if (ConvertResult && !ConvertResult(Value)) { 4811 Error(S, "invalid " + StringRef(Prefix) + " value."); 4812 } 4813 4814 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4815 return MatchOperand_Success; 4816 } 4817 4818 OperandMatchResultTy 4819 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4820 OperandVector &Operands, 4821 AMDGPUOperand::ImmTy ImmTy, 4822 bool (*ConvertResult)(int64_t&)) { 4823 SMLoc S = getLoc(); 4824 if (!trySkipId(Prefix, AsmToken::Colon)) 4825 return MatchOperand_NoMatch; 4826 4827 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4828 return MatchOperand_ParseFail; 4829 4830 unsigned Val = 0; 4831 const unsigned MaxSize = 4; 4832 4833 // FIXME: How to verify the number of elements matches the number of src 4834 // operands? 4835 for (int I = 0; ; ++I) { 4836 int64_t Op; 4837 SMLoc Loc = getLoc(); 4838 if (!parseExpr(Op)) 4839 return MatchOperand_ParseFail; 4840 4841 if (Op != 0 && Op != 1) { 4842 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4843 return MatchOperand_ParseFail; 4844 } 4845 4846 Val |= (Op << I); 4847 4848 if (trySkipToken(AsmToken::RBrac)) 4849 break; 4850 4851 if (I + 1 == MaxSize) { 4852 Error(getLoc(), "expected a closing square bracket"); 4853 return MatchOperand_ParseFail; 4854 } 4855 4856 if (!skipToken(AsmToken::Comma, "expected a comma")) 4857 return MatchOperand_ParseFail; 4858 } 4859 4860 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4861 return MatchOperand_Success; 4862 } 4863 4864 OperandMatchResultTy 4865 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4866 AMDGPUOperand::ImmTy ImmTy) { 4867 int64_t Bit = 0; 4868 SMLoc S = Parser.getTok().getLoc(); 4869 4870 // We are at the end of the statement, and this is a default argument, so 4871 // use a default value. 4872 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4873 switch(getLexer().getKind()) { 4874 case AsmToken::Identifier: { 4875 StringRef Tok = Parser.getTok().getString(); 4876 if (Tok == Name) { 4877 if (Tok == "r128" && !hasMIMG_R128()) 4878 Error(S, "r128 modifier is not supported on this GPU"); 4879 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4880 Error(S, "a16 modifier is not supported on this GPU"); 4881 Bit = 1; 4882 Parser.Lex(); 4883 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4884 Bit = 0; 4885 Parser.Lex(); 4886 } else { 4887 return MatchOperand_NoMatch; 4888 } 4889 break; 4890 } 4891 default: 4892 return MatchOperand_NoMatch; 4893 } 4894 } 4895 4896 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4897 return MatchOperand_ParseFail; 4898 4899 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4900 ImmTy = AMDGPUOperand::ImmTyR128A16; 4901 4902 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4903 return MatchOperand_Success; 4904 } 4905 4906 static void addOptionalImmOperand( 4907 MCInst& Inst, const OperandVector& Operands, 4908 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4909 AMDGPUOperand::ImmTy ImmT, 4910 int64_t Default = 0) { 4911 auto i = OptionalIdx.find(ImmT); 4912 if (i != OptionalIdx.end()) { 4913 unsigned Idx = i->second; 4914 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4915 } else { 4916 Inst.addOperand(MCOperand::createImm(Default)); 4917 } 4918 } 4919 4920 OperandMatchResultTy 4921 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4922 if (getLexer().isNot(AsmToken::Identifier)) { 4923 return MatchOperand_NoMatch; 4924 } 4925 StringRef Tok = Parser.getTok().getString(); 4926 if (Tok != Prefix) { 4927 return MatchOperand_NoMatch; 4928 } 4929 4930 Parser.Lex(); 4931 if (getLexer().isNot(AsmToken::Colon)) { 4932 return MatchOperand_ParseFail; 4933 } 4934 4935 Parser.Lex(); 4936 if (getLexer().isNot(AsmToken::Identifier)) { 4937 return MatchOperand_ParseFail; 4938 } 4939 4940 Value = Parser.getTok().getString(); 4941 return MatchOperand_Success; 4942 } 4943 4944 //===----------------------------------------------------------------------===// 4945 // MTBUF format 4946 //===----------------------------------------------------------------------===// 4947 4948 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 4949 int64_t MaxVal, 4950 int64_t &Fmt) { 4951 int64_t Val; 4952 SMLoc Loc = getLoc(); 4953 4954 auto Res = parseIntWithPrefix(Pref, Val); 4955 if (Res == MatchOperand_ParseFail) 4956 return false; 4957 if (Res == MatchOperand_NoMatch) 4958 return true; 4959 4960 if (Val < 0 || Val > MaxVal) { 4961 Error(Loc, Twine("out of range ", StringRef(Pref))); 4962 return false; 4963 } 4964 4965 Fmt = Val; 4966 return true; 4967 } 4968 4969 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4970 // values to live in a joint format operand in the MCInst encoding. 4971 OperandMatchResultTy 4972 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 4973 using namespace llvm::AMDGPU::MTBUFFormat; 4974 4975 int64_t Dfmt = DFMT_UNDEF; 4976 int64_t Nfmt = NFMT_UNDEF; 4977 4978 // dfmt and nfmt can appear in either order, and each is optional. 4979 for (int I = 0; I < 2; ++I) { 4980 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 4981 return MatchOperand_ParseFail; 4982 4983 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 4984 return MatchOperand_ParseFail; 4985 } 4986 // Skip optional comma between dfmt/nfmt 4987 // but guard against 2 commas following each other. 4988 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 4989 !peekToken().is(AsmToken::Comma)) { 4990 trySkipToken(AsmToken::Comma); 4991 } 4992 } 4993 4994 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 4995 return MatchOperand_NoMatch; 4996 4997 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 4998 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 4999 5000 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5001 return MatchOperand_Success; 5002 } 5003 5004 OperandMatchResultTy 5005 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5006 using namespace llvm::AMDGPU::MTBUFFormat; 5007 5008 int64_t Fmt = UFMT_UNDEF; 5009 5010 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5011 return MatchOperand_ParseFail; 5012 5013 if (Fmt == UFMT_UNDEF) 5014 return MatchOperand_NoMatch; 5015 5016 Format = Fmt; 5017 return MatchOperand_Success; 5018 } 5019 5020 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5021 int64_t &Nfmt, 5022 StringRef FormatStr, 5023 SMLoc Loc) { 5024 using namespace llvm::AMDGPU::MTBUFFormat; 5025 int64_t Format; 5026 5027 Format = getDfmt(FormatStr); 5028 if (Format != DFMT_UNDEF) { 5029 Dfmt = Format; 5030 return true; 5031 } 5032 5033 Format = getNfmt(FormatStr, getSTI()); 5034 if (Format != NFMT_UNDEF) { 5035 Nfmt = Format; 5036 return true; 5037 } 5038 5039 Error(Loc, "unsupported format"); 5040 return false; 5041 } 5042 5043 OperandMatchResultTy 5044 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5045 SMLoc FormatLoc, 5046 int64_t &Format) { 5047 using namespace llvm::AMDGPU::MTBUFFormat; 5048 5049 int64_t Dfmt = DFMT_UNDEF; 5050 int64_t Nfmt = NFMT_UNDEF; 5051 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5052 return MatchOperand_ParseFail; 5053 5054 if (trySkipToken(AsmToken::Comma)) { 5055 StringRef Str; 5056 SMLoc Loc = getLoc(); 5057 if (!parseId(Str, "expected a format string") || 5058 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5059 return MatchOperand_ParseFail; 5060 } 5061 if (Dfmt == DFMT_UNDEF) { 5062 Error(Loc, "duplicate numeric format"); 5063 return MatchOperand_ParseFail; 5064 } else if (Nfmt == NFMT_UNDEF) { 5065 Error(Loc, "duplicate data format"); 5066 return MatchOperand_ParseFail; 5067 } 5068 } 5069 5070 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5071 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5072 5073 if (isGFX10()) { 5074 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5075 if (Ufmt == UFMT_UNDEF) { 5076 Error(FormatLoc, "unsupported format"); 5077 return MatchOperand_ParseFail; 5078 } 5079 Format = Ufmt; 5080 } else { 5081 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5082 } 5083 5084 return MatchOperand_Success; 5085 } 5086 5087 OperandMatchResultTy 5088 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5089 SMLoc Loc, 5090 int64_t &Format) { 5091 using namespace llvm::AMDGPU::MTBUFFormat; 5092 5093 auto Id = getUnifiedFormat(FormatStr); 5094 if (Id == UFMT_UNDEF) 5095 return MatchOperand_NoMatch; 5096 5097 if (!isGFX10()) { 5098 Error(Loc, "unified format is not supported on this GPU"); 5099 return MatchOperand_ParseFail; 5100 } 5101 5102 Format = Id; 5103 return MatchOperand_Success; 5104 } 5105 5106 OperandMatchResultTy 5107 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5108 using namespace llvm::AMDGPU::MTBUFFormat; 5109 SMLoc Loc = getLoc(); 5110 5111 if (!parseExpr(Format)) 5112 return MatchOperand_ParseFail; 5113 if (!isValidFormatEncoding(Format, getSTI())) { 5114 Error(Loc, "out of range format"); 5115 return MatchOperand_ParseFail; 5116 } 5117 5118 return MatchOperand_Success; 5119 } 5120 5121 OperandMatchResultTy 5122 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5123 using namespace llvm::AMDGPU::MTBUFFormat; 5124 5125 if (!trySkipId("format", AsmToken::Colon)) 5126 return MatchOperand_NoMatch; 5127 5128 if (trySkipToken(AsmToken::LBrac)) { 5129 StringRef FormatStr; 5130 SMLoc Loc = getLoc(); 5131 if (!parseId(FormatStr, "expected a format string")) 5132 return MatchOperand_ParseFail; 5133 5134 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5135 if (Res == MatchOperand_NoMatch) 5136 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5137 if (Res != MatchOperand_Success) 5138 return Res; 5139 5140 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5141 return MatchOperand_ParseFail; 5142 5143 return MatchOperand_Success; 5144 } 5145 5146 return parseNumericFormat(Format); 5147 } 5148 5149 OperandMatchResultTy 5150 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5151 using namespace llvm::AMDGPU::MTBUFFormat; 5152 5153 int64_t Format = getDefaultFormatEncoding(getSTI()); 5154 OperandMatchResultTy Res; 5155 SMLoc Loc = getLoc(); 5156 5157 // Parse legacy format syntax. 5158 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5159 if (Res == MatchOperand_ParseFail) 5160 return Res; 5161 5162 bool FormatFound = (Res == MatchOperand_Success); 5163 5164 Operands.push_back( 5165 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5166 5167 if (FormatFound) 5168 trySkipToken(AsmToken::Comma); 5169 5170 if (isToken(AsmToken::EndOfStatement)) { 5171 // We are expecting an soffset operand, 5172 // but let matcher handle the error. 5173 return MatchOperand_Success; 5174 } 5175 5176 // Parse soffset. 5177 Res = parseRegOrImm(Operands); 5178 if (Res != MatchOperand_Success) 5179 return Res; 5180 5181 trySkipToken(AsmToken::Comma); 5182 5183 if (!FormatFound) { 5184 Res = parseSymbolicOrNumericFormat(Format); 5185 if (Res == MatchOperand_ParseFail) 5186 return Res; 5187 if (Res == MatchOperand_Success) { 5188 auto Size = Operands.size(); 5189 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5190 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5191 Op.setImm(Format); 5192 } 5193 return MatchOperand_Success; 5194 } 5195 5196 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5197 Error(getLoc(), "duplicate format"); 5198 return MatchOperand_ParseFail; 5199 } 5200 return MatchOperand_Success; 5201 } 5202 5203 //===----------------------------------------------------------------------===// 5204 // ds 5205 //===----------------------------------------------------------------------===// 5206 5207 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5208 const OperandVector &Operands) { 5209 OptionalImmIndexMap OptionalIdx; 5210 5211 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5212 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5213 5214 // Add the register arguments 5215 if (Op.isReg()) { 5216 Op.addRegOperands(Inst, 1); 5217 continue; 5218 } 5219 5220 // Handle optional arguments 5221 OptionalIdx[Op.getImmTy()] = i; 5222 } 5223 5224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5227 5228 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5229 } 5230 5231 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5232 bool IsGdsHardcoded) { 5233 OptionalImmIndexMap OptionalIdx; 5234 5235 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5236 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5237 5238 // Add the register arguments 5239 if (Op.isReg()) { 5240 Op.addRegOperands(Inst, 1); 5241 continue; 5242 } 5243 5244 if (Op.isToken() && Op.getToken() == "gds") { 5245 IsGdsHardcoded = true; 5246 continue; 5247 } 5248 5249 // Handle optional arguments 5250 OptionalIdx[Op.getImmTy()] = i; 5251 } 5252 5253 AMDGPUOperand::ImmTy OffsetType = 5254 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5255 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5256 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5257 AMDGPUOperand::ImmTyOffset; 5258 5259 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5260 5261 if (!IsGdsHardcoded) { 5262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5263 } 5264 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5265 } 5266 5267 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5268 OptionalImmIndexMap OptionalIdx; 5269 5270 unsigned OperandIdx[4]; 5271 unsigned EnMask = 0; 5272 int SrcIdx = 0; 5273 5274 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5276 5277 // Add the register arguments 5278 if (Op.isReg()) { 5279 assert(SrcIdx < 4); 5280 OperandIdx[SrcIdx] = Inst.size(); 5281 Op.addRegOperands(Inst, 1); 5282 ++SrcIdx; 5283 continue; 5284 } 5285 5286 if (Op.isOff()) { 5287 assert(SrcIdx < 4); 5288 OperandIdx[SrcIdx] = Inst.size(); 5289 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5290 ++SrcIdx; 5291 continue; 5292 } 5293 5294 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5295 Op.addImmOperands(Inst, 1); 5296 continue; 5297 } 5298 5299 if (Op.isToken() && Op.getToken() == "done") 5300 continue; 5301 5302 // Handle optional arguments 5303 OptionalIdx[Op.getImmTy()] = i; 5304 } 5305 5306 assert(SrcIdx == 4); 5307 5308 bool Compr = false; 5309 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5310 Compr = true; 5311 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5312 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5313 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5314 } 5315 5316 for (auto i = 0; i < SrcIdx; ++i) { 5317 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5318 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5319 } 5320 } 5321 5322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5324 5325 Inst.addOperand(MCOperand::createImm(EnMask)); 5326 } 5327 5328 //===----------------------------------------------------------------------===// 5329 // s_waitcnt 5330 //===----------------------------------------------------------------------===// 5331 5332 static bool 5333 encodeCnt( 5334 const AMDGPU::IsaVersion ISA, 5335 int64_t &IntVal, 5336 int64_t CntVal, 5337 bool Saturate, 5338 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5339 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5340 { 5341 bool Failed = false; 5342 5343 IntVal = encode(ISA, IntVal, CntVal); 5344 if (CntVal != decode(ISA, IntVal)) { 5345 if (Saturate) { 5346 IntVal = encode(ISA, IntVal, -1); 5347 } else { 5348 Failed = true; 5349 } 5350 } 5351 return Failed; 5352 } 5353 5354 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5355 5356 SMLoc CntLoc = getLoc(); 5357 StringRef CntName = getTokenStr(); 5358 5359 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5360 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5361 return false; 5362 5363 int64_t CntVal; 5364 SMLoc ValLoc = getLoc(); 5365 if (!parseExpr(CntVal)) 5366 return false; 5367 5368 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5369 5370 bool Failed = true; 5371 bool Sat = CntName.endswith("_sat"); 5372 5373 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5374 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5375 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5376 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5377 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5378 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5379 } else { 5380 Error(CntLoc, "invalid counter name " + CntName); 5381 return false; 5382 } 5383 5384 if (Failed) { 5385 Error(ValLoc, "too large value for " + CntName); 5386 return false; 5387 } 5388 5389 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5390 return false; 5391 5392 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5393 if (isToken(AsmToken::EndOfStatement)) { 5394 Error(getLoc(), "expected a counter name"); 5395 return false; 5396 } 5397 } 5398 5399 return true; 5400 } 5401 5402 OperandMatchResultTy 5403 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5404 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5405 int64_t Waitcnt = getWaitcntBitMask(ISA); 5406 SMLoc S = getLoc(); 5407 5408 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5409 while (!isToken(AsmToken::EndOfStatement)) { 5410 if (!parseCnt(Waitcnt)) 5411 return MatchOperand_ParseFail; 5412 } 5413 } else { 5414 if (!parseExpr(Waitcnt)) 5415 return MatchOperand_ParseFail; 5416 } 5417 5418 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5419 return MatchOperand_Success; 5420 } 5421 5422 bool 5423 AMDGPUOperand::isSWaitCnt() const { 5424 return isImm(); 5425 } 5426 5427 //===----------------------------------------------------------------------===// 5428 // hwreg 5429 //===----------------------------------------------------------------------===// 5430 5431 bool 5432 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5433 int64_t &Offset, 5434 int64_t &Width) { 5435 using namespace llvm::AMDGPU::Hwreg; 5436 5437 // The register may be specified by name or using a numeric code 5438 if (isToken(AsmToken::Identifier) && 5439 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5440 HwReg.IsSymbolic = true; 5441 lex(); // skip message name 5442 } else if (!parseExpr(HwReg.Id)) { 5443 return false; 5444 } 5445 5446 if (trySkipToken(AsmToken::RParen)) 5447 return true; 5448 5449 // parse optional params 5450 return 5451 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5452 parseExpr(Offset) && 5453 skipToken(AsmToken::Comma, "expected a comma") && 5454 parseExpr(Width) && 5455 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5456 } 5457 5458 bool 5459 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5460 const int64_t Offset, 5461 const int64_t Width, 5462 const SMLoc Loc) { 5463 5464 using namespace llvm::AMDGPU::Hwreg; 5465 5466 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5467 Error(Loc, "specified hardware register is not supported on this GPU"); 5468 return false; 5469 } else if (!isValidHwreg(HwReg.Id)) { 5470 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5471 return false; 5472 } else if (!isValidHwregOffset(Offset)) { 5473 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5474 return false; 5475 } else if (!isValidHwregWidth(Width)) { 5476 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5477 return false; 5478 } 5479 return true; 5480 } 5481 5482 OperandMatchResultTy 5483 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5484 using namespace llvm::AMDGPU::Hwreg; 5485 5486 int64_t ImmVal = 0; 5487 SMLoc Loc = getLoc(); 5488 5489 if (trySkipId("hwreg", AsmToken::LParen)) { 5490 OperandInfoTy HwReg(ID_UNKNOWN_); 5491 int64_t Offset = OFFSET_DEFAULT_; 5492 int64_t Width = WIDTH_DEFAULT_; 5493 if (parseHwregBody(HwReg, Offset, Width) && 5494 validateHwreg(HwReg, Offset, Width, Loc)) { 5495 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5496 } else { 5497 return MatchOperand_ParseFail; 5498 } 5499 } else if (parseExpr(ImmVal)) { 5500 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5501 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5502 return MatchOperand_ParseFail; 5503 } 5504 } else { 5505 return MatchOperand_ParseFail; 5506 } 5507 5508 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5509 return MatchOperand_Success; 5510 } 5511 5512 bool AMDGPUOperand::isHwreg() const { 5513 return isImmTy(ImmTyHwreg); 5514 } 5515 5516 //===----------------------------------------------------------------------===// 5517 // sendmsg 5518 //===----------------------------------------------------------------------===// 5519 5520 bool 5521 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5522 OperandInfoTy &Op, 5523 OperandInfoTy &Stream) { 5524 using namespace llvm::AMDGPU::SendMsg; 5525 5526 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5527 Msg.IsSymbolic = true; 5528 lex(); // skip message name 5529 } else if (!parseExpr(Msg.Id)) { 5530 return false; 5531 } 5532 5533 if (trySkipToken(AsmToken::Comma)) { 5534 Op.IsDefined = true; 5535 if (isToken(AsmToken::Identifier) && 5536 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5537 lex(); // skip operation name 5538 } else if (!parseExpr(Op.Id)) { 5539 return false; 5540 } 5541 5542 if (trySkipToken(AsmToken::Comma)) { 5543 Stream.IsDefined = true; 5544 if (!parseExpr(Stream.Id)) 5545 return false; 5546 } 5547 } 5548 5549 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5550 } 5551 5552 bool 5553 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5554 const OperandInfoTy &Op, 5555 const OperandInfoTy &Stream, 5556 const SMLoc S) { 5557 using namespace llvm::AMDGPU::SendMsg; 5558 5559 // Validation strictness depends on whether message is specified 5560 // in a symbolc or in a numeric form. In the latter case 5561 // only encoding possibility is checked. 5562 bool Strict = Msg.IsSymbolic; 5563 5564 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5565 Error(S, "invalid message id"); 5566 return false; 5567 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5568 Error(S, Op.IsDefined ? 5569 "message does not support operations" : 5570 "missing message operation"); 5571 return false; 5572 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5573 Error(S, "invalid operation id"); 5574 return false; 5575 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5576 Error(S, "message operation does not support streams"); 5577 return false; 5578 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5579 Error(S, "invalid message stream id"); 5580 return false; 5581 } 5582 return true; 5583 } 5584 5585 OperandMatchResultTy 5586 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5587 using namespace llvm::AMDGPU::SendMsg; 5588 5589 int64_t ImmVal = 0; 5590 SMLoc Loc = getLoc(); 5591 5592 if (trySkipId("sendmsg", AsmToken::LParen)) { 5593 OperandInfoTy Msg(ID_UNKNOWN_); 5594 OperandInfoTy Op(OP_NONE_); 5595 OperandInfoTy Stream(STREAM_ID_NONE_); 5596 if (parseSendMsgBody(Msg, Op, Stream) && 5597 validateSendMsg(Msg, Op, Stream, Loc)) { 5598 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5599 } else { 5600 return MatchOperand_ParseFail; 5601 } 5602 } else if (parseExpr(ImmVal)) { 5603 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5604 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5605 return MatchOperand_ParseFail; 5606 } 5607 } else { 5608 return MatchOperand_ParseFail; 5609 } 5610 5611 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5612 return MatchOperand_Success; 5613 } 5614 5615 bool AMDGPUOperand::isSendMsg() const { 5616 return isImmTy(ImmTySendMsg); 5617 } 5618 5619 //===----------------------------------------------------------------------===// 5620 // v_interp 5621 //===----------------------------------------------------------------------===// 5622 5623 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5624 if (getLexer().getKind() != AsmToken::Identifier) 5625 return MatchOperand_NoMatch; 5626 5627 StringRef Str = Parser.getTok().getString(); 5628 int Slot = StringSwitch<int>(Str) 5629 .Case("p10", 0) 5630 .Case("p20", 1) 5631 .Case("p0", 2) 5632 .Default(-1); 5633 5634 SMLoc S = Parser.getTok().getLoc(); 5635 if (Slot == -1) 5636 return MatchOperand_ParseFail; 5637 5638 Parser.Lex(); 5639 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5640 AMDGPUOperand::ImmTyInterpSlot)); 5641 return MatchOperand_Success; 5642 } 5643 5644 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5645 if (getLexer().getKind() != AsmToken::Identifier) 5646 return MatchOperand_NoMatch; 5647 5648 StringRef Str = Parser.getTok().getString(); 5649 if (!Str.startswith("attr")) 5650 return MatchOperand_NoMatch; 5651 5652 StringRef Chan = Str.take_back(2); 5653 int AttrChan = StringSwitch<int>(Chan) 5654 .Case(".x", 0) 5655 .Case(".y", 1) 5656 .Case(".z", 2) 5657 .Case(".w", 3) 5658 .Default(-1); 5659 if (AttrChan == -1) 5660 return MatchOperand_ParseFail; 5661 5662 Str = Str.drop_back(2).drop_front(4); 5663 5664 uint8_t Attr; 5665 if (Str.getAsInteger(10, Attr)) 5666 return MatchOperand_ParseFail; 5667 5668 SMLoc S = Parser.getTok().getLoc(); 5669 Parser.Lex(); 5670 if (Attr > 63) { 5671 Error(S, "out of bounds attr"); 5672 return MatchOperand_ParseFail; 5673 } 5674 5675 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5676 5677 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5678 AMDGPUOperand::ImmTyInterpAttr)); 5679 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5680 AMDGPUOperand::ImmTyAttrChan)); 5681 return MatchOperand_Success; 5682 } 5683 5684 //===----------------------------------------------------------------------===// 5685 // exp 5686 //===----------------------------------------------------------------------===// 5687 5688 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5689 uint8_t &Val) { 5690 if (Str == "null") { 5691 Val = 9; 5692 return MatchOperand_Success; 5693 } 5694 5695 if (Str.startswith("mrt")) { 5696 Str = Str.drop_front(3); 5697 if (Str == "z") { // == mrtz 5698 Val = 8; 5699 return MatchOperand_Success; 5700 } 5701 5702 if (Str.getAsInteger(10, Val)) 5703 return MatchOperand_ParseFail; 5704 5705 if (Val > 7) { 5706 Error(getLoc(), "invalid exp target"); 5707 return MatchOperand_ParseFail; 5708 } 5709 5710 return MatchOperand_Success; 5711 } 5712 5713 if (Str.startswith("pos")) { 5714 Str = Str.drop_front(3); 5715 if (Str.getAsInteger(10, Val)) 5716 return MatchOperand_ParseFail; 5717 5718 if (Val > 4 || (Val == 4 && !isGFX10())) { 5719 Error(getLoc(), "invalid exp target"); 5720 return MatchOperand_ParseFail; 5721 } 5722 5723 Val += 12; 5724 return MatchOperand_Success; 5725 } 5726 5727 if (isGFX10() && Str == "prim") { 5728 Val = 20; 5729 return MatchOperand_Success; 5730 } 5731 5732 if (Str.startswith("param")) { 5733 Str = Str.drop_front(5); 5734 if (Str.getAsInteger(10, Val)) 5735 return MatchOperand_ParseFail; 5736 5737 if (Val >= 32) { 5738 Error(getLoc(), "invalid exp target"); 5739 return MatchOperand_ParseFail; 5740 } 5741 5742 Val += 32; 5743 return MatchOperand_Success; 5744 } 5745 5746 if (Str.startswith("invalid_target_")) { 5747 Str = Str.drop_front(15); 5748 if (Str.getAsInteger(10, Val)) 5749 return MatchOperand_ParseFail; 5750 5751 Error(getLoc(), "invalid exp target"); 5752 return MatchOperand_ParseFail; 5753 } 5754 5755 return MatchOperand_NoMatch; 5756 } 5757 5758 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5759 uint8_t Val; 5760 StringRef Str = Parser.getTok().getString(); 5761 5762 auto Res = parseExpTgtImpl(Str, Val); 5763 if (Res != MatchOperand_Success) 5764 return Res; 5765 5766 SMLoc S = Parser.getTok().getLoc(); 5767 Parser.Lex(); 5768 5769 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5770 AMDGPUOperand::ImmTyExpTgt)); 5771 return MatchOperand_Success; 5772 } 5773 5774 //===----------------------------------------------------------------------===// 5775 // parser helpers 5776 //===----------------------------------------------------------------------===// 5777 5778 bool 5779 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5780 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5781 } 5782 5783 bool 5784 AMDGPUAsmParser::isId(const StringRef Id) const { 5785 return isId(getToken(), Id); 5786 } 5787 5788 bool 5789 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5790 return getTokenKind() == Kind; 5791 } 5792 5793 bool 5794 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5795 if (isId(Id)) { 5796 lex(); 5797 return true; 5798 } 5799 return false; 5800 } 5801 5802 bool 5803 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5804 if (isId(Id) && peekToken().is(Kind)) { 5805 lex(); 5806 lex(); 5807 return true; 5808 } 5809 return false; 5810 } 5811 5812 bool 5813 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5814 if (isToken(Kind)) { 5815 lex(); 5816 return true; 5817 } 5818 return false; 5819 } 5820 5821 bool 5822 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5823 const StringRef ErrMsg) { 5824 if (!trySkipToken(Kind)) { 5825 Error(getLoc(), ErrMsg); 5826 return false; 5827 } 5828 return true; 5829 } 5830 5831 bool 5832 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5833 return !getParser().parseAbsoluteExpression(Imm); 5834 } 5835 5836 bool 5837 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5838 SMLoc S = getLoc(); 5839 5840 const MCExpr *Expr; 5841 if (Parser.parseExpression(Expr)) 5842 return false; 5843 5844 int64_t IntVal; 5845 if (Expr->evaluateAsAbsolute(IntVal)) { 5846 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5847 } else { 5848 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5849 } 5850 return true; 5851 } 5852 5853 bool 5854 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5855 if (isToken(AsmToken::String)) { 5856 Val = getToken().getStringContents(); 5857 lex(); 5858 return true; 5859 } else { 5860 Error(getLoc(), ErrMsg); 5861 return false; 5862 } 5863 } 5864 5865 bool 5866 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5867 if (isToken(AsmToken::Identifier)) { 5868 Val = getTokenStr(); 5869 lex(); 5870 return true; 5871 } else { 5872 Error(getLoc(), ErrMsg); 5873 return false; 5874 } 5875 } 5876 5877 AsmToken 5878 AMDGPUAsmParser::getToken() const { 5879 return Parser.getTok(); 5880 } 5881 5882 AsmToken 5883 AMDGPUAsmParser::peekToken() { 5884 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5885 } 5886 5887 void 5888 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5889 auto TokCount = getLexer().peekTokens(Tokens); 5890 5891 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5892 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5893 } 5894 5895 AsmToken::TokenKind 5896 AMDGPUAsmParser::getTokenKind() const { 5897 return getLexer().getKind(); 5898 } 5899 5900 SMLoc 5901 AMDGPUAsmParser::getLoc() const { 5902 return getToken().getLoc(); 5903 } 5904 5905 StringRef 5906 AMDGPUAsmParser::getTokenStr() const { 5907 return getToken().getString(); 5908 } 5909 5910 void 5911 AMDGPUAsmParser::lex() { 5912 Parser.Lex(); 5913 } 5914 5915 //===----------------------------------------------------------------------===// 5916 // swizzle 5917 //===----------------------------------------------------------------------===// 5918 5919 LLVM_READNONE 5920 static unsigned 5921 encodeBitmaskPerm(const unsigned AndMask, 5922 const unsigned OrMask, 5923 const unsigned XorMask) { 5924 using namespace llvm::AMDGPU::Swizzle; 5925 5926 return BITMASK_PERM_ENC | 5927 (AndMask << BITMASK_AND_SHIFT) | 5928 (OrMask << BITMASK_OR_SHIFT) | 5929 (XorMask << BITMASK_XOR_SHIFT); 5930 } 5931 5932 bool 5933 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5934 const unsigned MinVal, 5935 const unsigned MaxVal, 5936 const StringRef ErrMsg) { 5937 for (unsigned i = 0; i < OpNum; ++i) { 5938 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5939 return false; 5940 } 5941 SMLoc ExprLoc = Parser.getTok().getLoc(); 5942 if (!parseExpr(Op[i])) { 5943 return false; 5944 } 5945 if (Op[i] < MinVal || Op[i] > MaxVal) { 5946 Error(ExprLoc, ErrMsg); 5947 return false; 5948 } 5949 } 5950 5951 return true; 5952 } 5953 5954 bool 5955 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5956 using namespace llvm::AMDGPU::Swizzle; 5957 5958 int64_t Lane[LANE_NUM]; 5959 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5960 "expected a 2-bit lane id")) { 5961 Imm = QUAD_PERM_ENC; 5962 for (unsigned I = 0; I < LANE_NUM; ++I) { 5963 Imm |= Lane[I] << (LANE_SHIFT * I); 5964 } 5965 return true; 5966 } 5967 return false; 5968 } 5969 5970 bool 5971 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5972 using namespace llvm::AMDGPU::Swizzle; 5973 5974 SMLoc S = Parser.getTok().getLoc(); 5975 int64_t GroupSize; 5976 int64_t LaneIdx; 5977 5978 if (!parseSwizzleOperands(1, &GroupSize, 5979 2, 32, 5980 "group size must be in the interval [2,32]")) { 5981 return false; 5982 } 5983 if (!isPowerOf2_64(GroupSize)) { 5984 Error(S, "group size must be a power of two"); 5985 return false; 5986 } 5987 if (parseSwizzleOperands(1, &LaneIdx, 5988 0, GroupSize - 1, 5989 "lane id must be in the interval [0,group size - 1]")) { 5990 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5991 return true; 5992 } 5993 return false; 5994 } 5995 5996 bool 5997 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5998 using namespace llvm::AMDGPU::Swizzle; 5999 6000 SMLoc S = Parser.getTok().getLoc(); 6001 int64_t GroupSize; 6002 6003 if (!parseSwizzleOperands(1, &GroupSize, 6004 2, 32, "group size must be in the interval [2,32]")) { 6005 return false; 6006 } 6007 if (!isPowerOf2_64(GroupSize)) { 6008 Error(S, "group size must be a power of two"); 6009 return false; 6010 } 6011 6012 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6013 return true; 6014 } 6015 6016 bool 6017 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6018 using namespace llvm::AMDGPU::Swizzle; 6019 6020 SMLoc S = Parser.getTok().getLoc(); 6021 int64_t GroupSize; 6022 6023 if (!parseSwizzleOperands(1, &GroupSize, 6024 1, 16, "group size must be in the interval [1,16]")) { 6025 return false; 6026 } 6027 if (!isPowerOf2_64(GroupSize)) { 6028 Error(S, "group size must be a power of two"); 6029 return false; 6030 } 6031 6032 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6033 return true; 6034 } 6035 6036 bool 6037 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6038 using namespace llvm::AMDGPU::Swizzle; 6039 6040 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6041 return false; 6042 } 6043 6044 StringRef Ctl; 6045 SMLoc StrLoc = Parser.getTok().getLoc(); 6046 if (!parseString(Ctl)) { 6047 return false; 6048 } 6049 if (Ctl.size() != BITMASK_WIDTH) { 6050 Error(StrLoc, "expected a 5-character mask"); 6051 return false; 6052 } 6053 6054 unsigned AndMask = 0; 6055 unsigned OrMask = 0; 6056 unsigned XorMask = 0; 6057 6058 for (size_t i = 0; i < Ctl.size(); ++i) { 6059 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6060 switch(Ctl[i]) { 6061 default: 6062 Error(StrLoc, "invalid mask"); 6063 return false; 6064 case '0': 6065 break; 6066 case '1': 6067 OrMask |= Mask; 6068 break; 6069 case 'p': 6070 AndMask |= Mask; 6071 break; 6072 case 'i': 6073 AndMask |= Mask; 6074 XorMask |= Mask; 6075 break; 6076 } 6077 } 6078 6079 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6080 return true; 6081 } 6082 6083 bool 6084 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6085 6086 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6087 6088 if (!parseExpr(Imm)) { 6089 return false; 6090 } 6091 if (!isUInt<16>(Imm)) { 6092 Error(OffsetLoc, "expected a 16-bit offset"); 6093 return false; 6094 } 6095 return true; 6096 } 6097 6098 bool 6099 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6100 using namespace llvm::AMDGPU::Swizzle; 6101 6102 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6103 6104 SMLoc ModeLoc = Parser.getTok().getLoc(); 6105 bool Ok = false; 6106 6107 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6108 Ok = parseSwizzleQuadPerm(Imm); 6109 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6110 Ok = parseSwizzleBitmaskPerm(Imm); 6111 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6112 Ok = parseSwizzleBroadcast(Imm); 6113 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6114 Ok = parseSwizzleSwap(Imm); 6115 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6116 Ok = parseSwizzleReverse(Imm); 6117 } else { 6118 Error(ModeLoc, "expected a swizzle mode"); 6119 } 6120 6121 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6122 } 6123 6124 return false; 6125 } 6126 6127 OperandMatchResultTy 6128 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6129 SMLoc S = Parser.getTok().getLoc(); 6130 int64_t Imm = 0; 6131 6132 if (trySkipId("offset")) { 6133 6134 bool Ok = false; 6135 if (skipToken(AsmToken::Colon, "expected a colon")) { 6136 if (trySkipId("swizzle")) { 6137 Ok = parseSwizzleMacro(Imm); 6138 } else { 6139 Ok = parseSwizzleOffset(Imm); 6140 } 6141 } 6142 6143 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6144 6145 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6146 } else { 6147 // Swizzle "offset" operand is optional. 6148 // If it is omitted, try parsing other optional operands. 6149 return parseOptionalOpr(Operands); 6150 } 6151 } 6152 6153 bool 6154 AMDGPUOperand::isSwizzle() const { 6155 return isImmTy(ImmTySwizzle); 6156 } 6157 6158 //===----------------------------------------------------------------------===// 6159 // VGPR Index Mode 6160 //===----------------------------------------------------------------------===// 6161 6162 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6163 6164 using namespace llvm::AMDGPU::VGPRIndexMode; 6165 6166 if (trySkipToken(AsmToken::RParen)) { 6167 return OFF; 6168 } 6169 6170 int64_t Imm = 0; 6171 6172 while (true) { 6173 unsigned Mode = 0; 6174 SMLoc S = Parser.getTok().getLoc(); 6175 6176 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6177 if (trySkipId(IdSymbolic[ModeId])) { 6178 Mode = 1 << ModeId; 6179 break; 6180 } 6181 } 6182 6183 if (Mode == 0) { 6184 Error(S, (Imm == 0)? 6185 "expected a VGPR index mode or a closing parenthesis" : 6186 "expected a VGPR index mode"); 6187 return UNDEF; 6188 } 6189 6190 if (Imm & Mode) { 6191 Error(S, "duplicate VGPR index mode"); 6192 return UNDEF; 6193 } 6194 Imm |= Mode; 6195 6196 if (trySkipToken(AsmToken::RParen)) 6197 break; 6198 if (!skipToken(AsmToken::Comma, 6199 "expected a comma or a closing parenthesis")) 6200 return UNDEF; 6201 } 6202 6203 return Imm; 6204 } 6205 6206 OperandMatchResultTy 6207 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6208 6209 using namespace llvm::AMDGPU::VGPRIndexMode; 6210 6211 int64_t Imm = 0; 6212 SMLoc S = Parser.getTok().getLoc(); 6213 6214 if (getLexer().getKind() == AsmToken::Identifier && 6215 Parser.getTok().getString() == "gpr_idx" && 6216 getLexer().peekTok().is(AsmToken::LParen)) { 6217 6218 Parser.Lex(); 6219 Parser.Lex(); 6220 6221 Imm = parseGPRIdxMacro(); 6222 if (Imm == UNDEF) 6223 return MatchOperand_ParseFail; 6224 6225 } else { 6226 if (getParser().parseAbsoluteExpression(Imm)) 6227 return MatchOperand_ParseFail; 6228 if (Imm < 0 || !isUInt<4>(Imm)) { 6229 Error(S, "invalid immediate: only 4-bit values are legal"); 6230 return MatchOperand_ParseFail; 6231 } 6232 } 6233 6234 Operands.push_back( 6235 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6236 return MatchOperand_Success; 6237 } 6238 6239 bool AMDGPUOperand::isGPRIdxMode() const { 6240 return isImmTy(ImmTyGprIdxMode); 6241 } 6242 6243 //===----------------------------------------------------------------------===// 6244 // sopp branch targets 6245 //===----------------------------------------------------------------------===// 6246 6247 OperandMatchResultTy 6248 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6249 6250 // Make sure we are not parsing something 6251 // that looks like a label or an expression but is not. 6252 // This will improve error messages. 6253 if (isRegister() || isModifier()) 6254 return MatchOperand_NoMatch; 6255 6256 if (!parseExpr(Operands)) 6257 return MatchOperand_ParseFail; 6258 6259 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6260 assert(Opr.isImm() || Opr.isExpr()); 6261 SMLoc Loc = Opr.getStartLoc(); 6262 6263 // Currently we do not support arbitrary expressions as branch targets. 6264 // Only labels and absolute expressions are accepted. 6265 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6266 Error(Loc, "expected an absolute expression or a label"); 6267 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6268 Error(Loc, "expected a 16-bit signed jump offset"); 6269 } 6270 6271 return MatchOperand_Success; 6272 } 6273 6274 //===----------------------------------------------------------------------===// 6275 // Boolean holding registers 6276 //===----------------------------------------------------------------------===// 6277 6278 OperandMatchResultTy 6279 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6280 return parseReg(Operands); 6281 } 6282 6283 //===----------------------------------------------------------------------===// 6284 // mubuf 6285 //===----------------------------------------------------------------------===// 6286 6287 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6288 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6289 } 6290 6291 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6292 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6293 } 6294 6295 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6296 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6297 } 6298 6299 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6300 const OperandVector &Operands, 6301 bool IsAtomic, 6302 bool IsAtomicReturn, 6303 bool IsLds) { 6304 bool IsLdsOpcode = IsLds; 6305 bool HasLdsModifier = false; 6306 OptionalImmIndexMap OptionalIdx; 6307 assert(IsAtomicReturn ? IsAtomic : true); 6308 unsigned FirstOperandIdx = 1; 6309 6310 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6311 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6312 6313 // Add the register arguments 6314 if (Op.isReg()) { 6315 Op.addRegOperands(Inst, 1); 6316 // Insert a tied src for atomic return dst. 6317 // This cannot be postponed as subsequent calls to 6318 // addImmOperands rely on correct number of MC operands. 6319 if (IsAtomicReturn && i == FirstOperandIdx) 6320 Op.addRegOperands(Inst, 1); 6321 continue; 6322 } 6323 6324 // Handle the case where soffset is an immediate 6325 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6326 Op.addImmOperands(Inst, 1); 6327 continue; 6328 } 6329 6330 HasLdsModifier |= Op.isLDS(); 6331 6332 // Handle tokens like 'offen' which are sometimes hard-coded into the 6333 // asm string. There are no MCInst operands for these. 6334 if (Op.isToken()) { 6335 continue; 6336 } 6337 assert(Op.isImm()); 6338 6339 // Handle optional arguments 6340 OptionalIdx[Op.getImmTy()] = i; 6341 } 6342 6343 // This is a workaround for an llvm quirk which may result in an 6344 // incorrect instruction selection. Lds and non-lds versions of 6345 // MUBUF instructions are identical except that lds versions 6346 // have mandatory 'lds' modifier. However this modifier follows 6347 // optional modifiers and llvm asm matcher regards this 'lds' 6348 // modifier as an optional one. As a result, an lds version 6349 // of opcode may be selected even if it has no 'lds' modifier. 6350 if (IsLdsOpcode && !HasLdsModifier) { 6351 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6352 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6353 Inst.setOpcode(NoLdsOpcode); 6354 IsLdsOpcode = false; 6355 } 6356 } 6357 6358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6359 if (!IsAtomic) { // glc is hard-coded. 6360 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6361 } 6362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6363 6364 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6366 } 6367 6368 if (isGFX10()) 6369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6370 } 6371 6372 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6373 OptionalImmIndexMap OptionalIdx; 6374 6375 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6376 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6377 6378 // Add the register arguments 6379 if (Op.isReg()) { 6380 Op.addRegOperands(Inst, 1); 6381 continue; 6382 } 6383 6384 // Handle the case where soffset is an immediate 6385 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6386 Op.addImmOperands(Inst, 1); 6387 continue; 6388 } 6389 6390 // Handle tokens like 'offen' which are sometimes hard-coded into the 6391 // asm string. There are no MCInst operands for these. 6392 if (Op.isToken()) { 6393 continue; 6394 } 6395 assert(Op.isImm()); 6396 6397 // Handle optional arguments 6398 OptionalIdx[Op.getImmTy()] = i; 6399 } 6400 6401 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6402 AMDGPUOperand::ImmTyOffset); 6403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6405 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6406 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6407 6408 if (isGFX10()) 6409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6410 } 6411 6412 //===----------------------------------------------------------------------===// 6413 // mimg 6414 //===----------------------------------------------------------------------===// 6415 6416 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6417 bool IsAtomic) { 6418 unsigned I = 1; 6419 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6420 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6421 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6422 } 6423 6424 if (IsAtomic) { 6425 // Add src, same as dst 6426 assert(Desc.getNumDefs() == 1); 6427 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6428 } 6429 6430 OptionalImmIndexMap OptionalIdx; 6431 6432 for (unsigned E = Operands.size(); I != E; ++I) { 6433 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6434 6435 // Add the register arguments 6436 if (Op.isReg()) { 6437 Op.addRegOperands(Inst, 1); 6438 } else if (Op.isImmModifier()) { 6439 OptionalIdx[Op.getImmTy()] = I; 6440 } else if (!Op.isToken()) { 6441 llvm_unreachable("unexpected operand type"); 6442 } 6443 } 6444 6445 bool IsGFX10 = isGFX10(); 6446 6447 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6448 if (IsGFX10) 6449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6451 if (IsGFX10) 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6456 if (IsGFX10) 6457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6460 if (!IsGFX10) 6461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6463 } 6464 6465 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6466 cvtMIMG(Inst, Operands, true); 6467 } 6468 6469 //===----------------------------------------------------------------------===// 6470 // smrd 6471 //===----------------------------------------------------------------------===// 6472 6473 bool AMDGPUOperand::isSMRDOffset8() const { 6474 return isImm() && isUInt<8>(getImm()); 6475 } 6476 6477 bool AMDGPUOperand::isSMEMOffset() const { 6478 return isImm(); // Offset range is checked later by validator. 6479 } 6480 6481 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6482 // 32-bit literals are only supported on CI and we only want to use them 6483 // when the offset is > 8-bits. 6484 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6485 } 6486 6487 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6488 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6489 } 6490 6491 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6492 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6493 } 6494 6495 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6496 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6497 } 6498 6499 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6500 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6501 } 6502 6503 //===----------------------------------------------------------------------===// 6504 // vop3 6505 //===----------------------------------------------------------------------===// 6506 6507 static bool ConvertOmodMul(int64_t &Mul) { 6508 if (Mul != 1 && Mul != 2 && Mul != 4) 6509 return false; 6510 6511 Mul >>= 1; 6512 return true; 6513 } 6514 6515 static bool ConvertOmodDiv(int64_t &Div) { 6516 if (Div == 1) { 6517 Div = 0; 6518 return true; 6519 } 6520 6521 if (Div == 2) { 6522 Div = 3; 6523 return true; 6524 } 6525 6526 return false; 6527 } 6528 6529 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6530 if (BoundCtrl == 0) { 6531 BoundCtrl = 1; 6532 return true; 6533 } 6534 6535 if (BoundCtrl == -1) { 6536 BoundCtrl = 0; 6537 return true; 6538 } 6539 6540 return false; 6541 } 6542 6543 // Note: the order in this table matches the order of operands in AsmString. 6544 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6545 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6546 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6547 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6548 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6549 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6550 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6551 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6552 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6553 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6554 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6555 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6556 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6557 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6558 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6559 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6560 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6561 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6562 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6563 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6564 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6565 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6566 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6567 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6568 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6569 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6570 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6571 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6572 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6573 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6574 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6575 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6576 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6577 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6578 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6579 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6580 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6581 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6582 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6583 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6584 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6585 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6586 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6587 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6588 }; 6589 6590 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6591 6592 OperandMatchResultTy res = parseOptionalOpr(Operands); 6593 6594 // This is a hack to enable hardcoded mandatory operands which follow 6595 // optional operands. 6596 // 6597 // Current design assumes that all operands after the first optional operand 6598 // are also optional. However implementation of some instructions violates 6599 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6600 // 6601 // To alleviate this problem, we have to (implicitly) parse extra operands 6602 // to make sure autogenerated parser of custom operands never hit hardcoded 6603 // mandatory operands. 6604 6605 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6606 if (res != MatchOperand_Success || 6607 isToken(AsmToken::EndOfStatement)) 6608 break; 6609 6610 trySkipToken(AsmToken::Comma); 6611 res = parseOptionalOpr(Operands); 6612 } 6613 6614 return res; 6615 } 6616 6617 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6618 OperandMatchResultTy res; 6619 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6620 // try to parse any optional operand here 6621 if (Op.IsBit) { 6622 res = parseNamedBit(Op.Name, Operands, Op.Type); 6623 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6624 res = parseOModOperand(Operands); 6625 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6626 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6627 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6628 res = parseSDWASel(Operands, Op.Name, Op.Type); 6629 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6630 res = parseSDWADstUnused(Operands); 6631 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6632 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6633 Op.Type == AMDGPUOperand::ImmTyNegLo || 6634 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6635 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6636 Op.ConvertResult); 6637 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6638 res = parseDim(Operands); 6639 } else { 6640 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6641 } 6642 if (res != MatchOperand_NoMatch) { 6643 return res; 6644 } 6645 } 6646 return MatchOperand_NoMatch; 6647 } 6648 6649 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6650 StringRef Name = Parser.getTok().getString(); 6651 if (Name == "mul") { 6652 return parseIntWithPrefix("mul", Operands, 6653 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6654 } 6655 6656 if (Name == "div") { 6657 return parseIntWithPrefix("div", Operands, 6658 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6659 } 6660 6661 return MatchOperand_NoMatch; 6662 } 6663 6664 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6665 cvtVOP3P(Inst, Operands); 6666 6667 int Opc = Inst.getOpcode(); 6668 6669 int SrcNum; 6670 const int Ops[] = { AMDGPU::OpName::src0, 6671 AMDGPU::OpName::src1, 6672 AMDGPU::OpName::src2 }; 6673 for (SrcNum = 0; 6674 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6675 ++SrcNum); 6676 assert(SrcNum > 0); 6677 6678 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6679 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6680 6681 if ((OpSel & (1 << SrcNum)) != 0) { 6682 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6683 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6684 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6685 } 6686 } 6687 6688 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6689 // 1. This operand is input modifiers 6690 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6691 // 2. This is not last operand 6692 && Desc.NumOperands > (OpNum + 1) 6693 // 3. Next operand is register class 6694 && Desc.OpInfo[OpNum + 1].RegClass != -1 6695 // 4. Next register is not tied to any other operand 6696 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6697 } 6698 6699 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6700 { 6701 OptionalImmIndexMap OptionalIdx; 6702 unsigned Opc = Inst.getOpcode(); 6703 6704 unsigned I = 1; 6705 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6706 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6707 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6708 } 6709 6710 for (unsigned E = Operands.size(); I != E; ++I) { 6711 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6712 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6713 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6714 } else if (Op.isInterpSlot() || 6715 Op.isInterpAttr() || 6716 Op.isAttrChan()) { 6717 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6718 } else if (Op.isImmModifier()) { 6719 OptionalIdx[Op.getImmTy()] = I; 6720 } else { 6721 llvm_unreachable("unhandled operand type"); 6722 } 6723 } 6724 6725 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6727 } 6728 6729 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6731 } 6732 6733 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6734 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6735 } 6736 } 6737 6738 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6739 OptionalImmIndexMap &OptionalIdx) { 6740 unsigned Opc = Inst.getOpcode(); 6741 6742 unsigned I = 1; 6743 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6744 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6745 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6746 } 6747 6748 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6749 // This instruction has src modifiers 6750 for (unsigned E = Operands.size(); I != E; ++I) { 6751 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6752 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6753 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6754 } else if (Op.isImmModifier()) { 6755 OptionalIdx[Op.getImmTy()] = I; 6756 } else if (Op.isRegOrImm()) { 6757 Op.addRegOrImmOperands(Inst, 1); 6758 } else { 6759 llvm_unreachable("unhandled operand type"); 6760 } 6761 } 6762 } else { 6763 // No src modifiers 6764 for (unsigned E = Operands.size(); I != E; ++I) { 6765 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6766 if (Op.isMod()) { 6767 OptionalIdx[Op.getImmTy()] = I; 6768 } else { 6769 Op.addRegOrImmOperands(Inst, 1); 6770 } 6771 } 6772 } 6773 6774 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6776 } 6777 6778 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6780 } 6781 6782 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6783 // it has src2 register operand that is tied to dst operand 6784 // we don't allow modifiers for this operand in assembler so src2_modifiers 6785 // should be 0. 6786 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6787 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6788 Opc == AMDGPU::V_MAC_F32_e64_vi || 6789 Opc == AMDGPU::V_MAC_F16_e64_vi || 6790 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6791 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6792 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6793 auto it = Inst.begin(); 6794 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6795 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6796 ++it; 6797 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6798 } 6799 } 6800 6801 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6802 OptionalImmIndexMap OptionalIdx; 6803 cvtVOP3(Inst, Operands, OptionalIdx); 6804 } 6805 6806 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6807 const OperandVector &Operands) { 6808 OptionalImmIndexMap OptIdx; 6809 const int Opc = Inst.getOpcode(); 6810 const MCInstrDesc &Desc = MII.get(Opc); 6811 6812 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6813 6814 cvtVOP3(Inst, Operands, OptIdx); 6815 6816 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6817 assert(!IsPacked); 6818 Inst.addOperand(Inst.getOperand(0)); 6819 } 6820 6821 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6822 // instruction, and then figure out where to actually put the modifiers 6823 6824 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6825 6826 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6827 if (OpSelHiIdx != -1) { 6828 int DefaultVal = IsPacked ? -1 : 0; 6829 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6830 DefaultVal); 6831 } 6832 6833 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6834 if (NegLoIdx != -1) { 6835 assert(IsPacked); 6836 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6837 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6838 } 6839 6840 const int Ops[] = { AMDGPU::OpName::src0, 6841 AMDGPU::OpName::src1, 6842 AMDGPU::OpName::src2 }; 6843 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6844 AMDGPU::OpName::src1_modifiers, 6845 AMDGPU::OpName::src2_modifiers }; 6846 6847 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6848 6849 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6850 unsigned OpSelHi = 0; 6851 unsigned NegLo = 0; 6852 unsigned NegHi = 0; 6853 6854 if (OpSelHiIdx != -1) { 6855 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6856 } 6857 6858 if (NegLoIdx != -1) { 6859 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6860 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6861 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6862 } 6863 6864 for (int J = 0; J < 3; ++J) { 6865 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6866 if (OpIdx == -1) 6867 break; 6868 6869 uint32_t ModVal = 0; 6870 6871 if ((OpSel & (1 << J)) != 0) 6872 ModVal |= SISrcMods::OP_SEL_0; 6873 6874 if ((OpSelHi & (1 << J)) != 0) 6875 ModVal |= SISrcMods::OP_SEL_1; 6876 6877 if ((NegLo & (1 << J)) != 0) 6878 ModVal |= SISrcMods::NEG; 6879 6880 if ((NegHi & (1 << J)) != 0) 6881 ModVal |= SISrcMods::NEG_HI; 6882 6883 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6884 6885 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6886 } 6887 } 6888 6889 //===----------------------------------------------------------------------===// 6890 // dpp 6891 //===----------------------------------------------------------------------===// 6892 6893 bool AMDGPUOperand::isDPP8() const { 6894 return isImmTy(ImmTyDPP8); 6895 } 6896 6897 bool AMDGPUOperand::isDPPCtrl() const { 6898 using namespace AMDGPU::DPP; 6899 6900 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6901 if (result) { 6902 int64_t Imm = getImm(); 6903 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6904 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6905 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6906 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6907 (Imm == DppCtrl::WAVE_SHL1) || 6908 (Imm == DppCtrl::WAVE_ROL1) || 6909 (Imm == DppCtrl::WAVE_SHR1) || 6910 (Imm == DppCtrl::WAVE_ROR1) || 6911 (Imm == DppCtrl::ROW_MIRROR) || 6912 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6913 (Imm == DppCtrl::BCAST15) || 6914 (Imm == DppCtrl::BCAST31) || 6915 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6916 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6917 } 6918 return false; 6919 } 6920 6921 //===----------------------------------------------------------------------===// 6922 // mAI 6923 //===----------------------------------------------------------------------===// 6924 6925 bool AMDGPUOperand::isBLGP() const { 6926 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6927 } 6928 6929 bool AMDGPUOperand::isCBSZ() const { 6930 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6931 } 6932 6933 bool AMDGPUOperand::isABID() const { 6934 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6935 } 6936 6937 bool AMDGPUOperand::isS16Imm() const { 6938 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6939 } 6940 6941 bool AMDGPUOperand::isU16Imm() const { 6942 return isImm() && isUInt<16>(getImm()); 6943 } 6944 6945 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6946 if (!isGFX10()) 6947 return MatchOperand_NoMatch; 6948 6949 SMLoc S = Parser.getTok().getLoc(); 6950 6951 if (getLexer().isNot(AsmToken::Identifier)) 6952 return MatchOperand_NoMatch; 6953 if (getLexer().getTok().getString() != "dim") 6954 return MatchOperand_NoMatch; 6955 6956 Parser.Lex(); 6957 if (getLexer().isNot(AsmToken::Colon)) 6958 return MatchOperand_ParseFail; 6959 6960 Parser.Lex(); 6961 6962 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6963 // integer. 6964 std::string Token; 6965 if (getLexer().is(AsmToken::Integer)) { 6966 SMLoc Loc = getLexer().getTok().getEndLoc(); 6967 Token = std::string(getLexer().getTok().getString()); 6968 Parser.Lex(); 6969 if (getLexer().getTok().getLoc() != Loc) 6970 return MatchOperand_ParseFail; 6971 } 6972 if (getLexer().isNot(AsmToken::Identifier)) 6973 return MatchOperand_ParseFail; 6974 Token += getLexer().getTok().getString(); 6975 6976 StringRef DimId = Token; 6977 if (DimId.startswith("SQ_RSRC_IMG_")) 6978 DimId = DimId.substr(12); 6979 6980 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6981 if (!DimInfo) 6982 return MatchOperand_ParseFail; 6983 6984 Parser.Lex(); 6985 6986 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6987 AMDGPUOperand::ImmTyDim)); 6988 return MatchOperand_Success; 6989 } 6990 6991 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6992 SMLoc S = Parser.getTok().getLoc(); 6993 StringRef Prefix; 6994 6995 if (getLexer().getKind() == AsmToken::Identifier) { 6996 Prefix = Parser.getTok().getString(); 6997 } else { 6998 return MatchOperand_NoMatch; 6999 } 7000 7001 if (Prefix != "dpp8") 7002 return parseDPPCtrl(Operands); 7003 if (!isGFX10()) 7004 return MatchOperand_NoMatch; 7005 7006 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7007 7008 int64_t Sels[8]; 7009 7010 Parser.Lex(); 7011 if (getLexer().isNot(AsmToken::Colon)) 7012 return MatchOperand_ParseFail; 7013 7014 Parser.Lex(); 7015 if (getLexer().isNot(AsmToken::LBrac)) 7016 return MatchOperand_ParseFail; 7017 7018 Parser.Lex(); 7019 if (getParser().parseAbsoluteExpression(Sels[0])) 7020 return MatchOperand_ParseFail; 7021 if (0 > Sels[0] || 7 < Sels[0]) 7022 return MatchOperand_ParseFail; 7023 7024 for (size_t i = 1; i < 8; ++i) { 7025 if (getLexer().isNot(AsmToken::Comma)) 7026 return MatchOperand_ParseFail; 7027 7028 Parser.Lex(); 7029 if (getParser().parseAbsoluteExpression(Sels[i])) 7030 return MatchOperand_ParseFail; 7031 if (0 > Sels[i] || 7 < Sels[i]) 7032 return MatchOperand_ParseFail; 7033 } 7034 7035 if (getLexer().isNot(AsmToken::RBrac)) 7036 return MatchOperand_ParseFail; 7037 Parser.Lex(); 7038 7039 unsigned DPP8 = 0; 7040 for (size_t i = 0; i < 8; ++i) 7041 DPP8 |= (Sels[i] << (i * 3)); 7042 7043 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7044 return MatchOperand_Success; 7045 } 7046 7047 OperandMatchResultTy 7048 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7049 using namespace AMDGPU::DPP; 7050 7051 SMLoc S = Parser.getTok().getLoc(); 7052 StringRef Prefix; 7053 int64_t Int; 7054 7055 if (getLexer().getKind() == AsmToken::Identifier) { 7056 Prefix = Parser.getTok().getString(); 7057 } else { 7058 return MatchOperand_NoMatch; 7059 } 7060 7061 if (Prefix == "row_mirror") { 7062 Int = DppCtrl::ROW_MIRROR; 7063 Parser.Lex(); 7064 } else if (Prefix == "row_half_mirror") { 7065 Int = DppCtrl::ROW_HALF_MIRROR; 7066 Parser.Lex(); 7067 } else { 7068 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7069 if (Prefix != "quad_perm" 7070 && Prefix != "row_shl" 7071 && Prefix != "row_shr" 7072 && Prefix != "row_ror" 7073 && Prefix != "wave_shl" 7074 && Prefix != "wave_rol" 7075 && Prefix != "wave_shr" 7076 && Prefix != "wave_ror" 7077 && Prefix != "row_bcast" 7078 && Prefix != "row_share" 7079 && Prefix != "row_xmask") { 7080 return MatchOperand_NoMatch; 7081 } 7082 7083 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7084 return MatchOperand_NoMatch; 7085 7086 if (!isVI() && !isGFX9() && 7087 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7088 Prefix == "wave_rol" || Prefix == "wave_ror" || 7089 Prefix == "row_bcast")) 7090 return MatchOperand_NoMatch; 7091 7092 Parser.Lex(); 7093 if (getLexer().isNot(AsmToken::Colon)) 7094 return MatchOperand_ParseFail; 7095 7096 if (Prefix == "quad_perm") { 7097 // quad_perm:[%d,%d,%d,%d] 7098 Parser.Lex(); 7099 if (getLexer().isNot(AsmToken::LBrac)) 7100 return MatchOperand_ParseFail; 7101 Parser.Lex(); 7102 7103 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7104 return MatchOperand_ParseFail; 7105 7106 for (int i = 0; i < 3; ++i) { 7107 if (getLexer().isNot(AsmToken::Comma)) 7108 return MatchOperand_ParseFail; 7109 Parser.Lex(); 7110 7111 int64_t Temp; 7112 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7113 return MatchOperand_ParseFail; 7114 const int shift = i*2 + 2; 7115 Int += (Temp << shift); 7116 } 7117 7118 if (getLexer().isNot(AsmToken::RBrac)) 7119 return MatchOperand_ParseFail; 7120 Parser.Lex(); 7121 } else { 7122 // sel:%d 7123 Parser.Lex(); 7124 if (getParser().parseAbsoluteExpression(Int)) 7125 return MatchOperand_ParseFail; 7126 7127 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7128 Int |= DppCtrl::ROW_SHL0; 7129 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7130 Int |= DppCtrl::ROW_SHR0; 7131 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7132 Int |= DppCtrl::ROW_ROR0; 7133 } else if (Prefix == "wave_shl" && 1 == Int) { 7134 Int = DppCtrl::WAVE_SHL1; 7135 } else if (Prefix == "wave_rol" && 1 == Int) { 7136 Int = DppCtrl::WAVE_ROL1; 7137 } else if (Prefix == "wave_shr" && 1 == Int) { 7138 Int = DppCtrl::WAVE_SHR1; 7139 } else if (Prefix == "wave_ror" && 1 == Int) { 7140 Int = DppCtrl::WAVE_ROR1; 7141 } else if (Prefix == "row_bcast") { 7142 if (Int == 15) { 7143 Int = DppCtrl::BCAST15; 7144 } else if (Int == 31) { 7145 Int = DppCtrl::BCAST31; 7146 } else { 7147 return MatchOperand_ParseFail; 7148 } 7149 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7150 Int |= DppCtrl::ROW_SHARE_FIRST; 7151 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7152 Int |= DppCtrl::ROW_XMASK_FIRST; 7153 } else { 7154 return MatchOperand_ParseFail; 7155 } 7156 } 7157 } 7158 7159 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7160 return MatchOperand_Success; 7161 } 7162 7163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7164 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7165 } 7166 7167 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7168 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7169 } 7170 7171 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7172 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7173 } 7174 7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7176 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7177 } 7178 7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7180 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7181 } 7182 7183 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7184 OptionalImmIndexMap OptionalIdx; 7185 7186 unsigned I = 1; 7187 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7188 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7189 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7190 } 7191 7192 int Fi = 0; 7193 for (unsigned E = Operands.size(); I != E; ++I) { 7194 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7195 MCOI::TIED_TO); 7196 if (TiedTo != -1) { 7197 assert((unsigned)TiedTo < Inst.getNumOperands()); 7198 // handle tied old or src2 for MAC instructions 7199 Inst.addOperand(Inst.getOperand(TiedTo)); 7200 } 7201 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7202 // Add the register arguments 7203 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7204 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7205 // Skip it. 7206 continue; 7207 } 7208 7209 if (IsDPP8) { 7210 if (Op.isDPP8()) { 7211 Op.addImmOperands(Inst, 1); 7212 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7213 Op.addRegWithFPInputModsOperands(Inst, 2); 7214 } else if (Op.isFI()) { 7215 Fi = Op.getImm(); 7216 } else if (Op.isReg()) { 7217 Op.addRegOperands(Inst, 1); 7218 } else { 7219 llvm_unreachable("Invalid operand type"); 7220 } 7221 } else { 7222 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7223 Op.addRegWithFPInputModsOperands(Inst, 2); 7224 } else if (Op.isDPPCtrl()) { 7225 Op.addImmOperands(Inst, 1); 7226 } else if (Op.isImm()) { 7227 // Handle optional arguments 7228 OptionalIdx[Op.getImmTy()] = I; 7229 } else { 7230 llvm_unreachable("Invalid operand type"); 7231 } 7232 } 7233 } 7234 7235 if (IsDPP8) { 7236 using namespace llvm::AMDGPU::DPP; 7237 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7238 } else { 7239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7242 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7244 } 7245 } 7246 } 7247 7248 //===----------------------------------------------------------------------===// 7249 // sdwa 7250 //===----------------------------------------------------------------------===// 7251 7252 OperandMatchResultTy 7253 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7254 AMDGPUOperand::ImmTy Type) { 7255 using namespace llvm::AMDGPU::SDWA; 7256 7257 SMLoc S = Parser.getTok().getLoc(); 7258 StringRef Value; 7259 OperandMatchResultTy res; 7260 7261 res = parseStringWithPrefix(Prefix, Value); 7262 if (res != MatchOperand_Success) { 7263 return res; 7264 } 7265 7266 int64_t Int; 7267 Int = StringSwitch<int64_t>(Value) 7268 .Case("BYTE_0", SdwaSel::BYTE_0) 7269 .Case("BYTE_1", SdwaSel::BYTE_1) 7270 .Case("BYTE_2", SdwaSel::BYTE_2) 7271 .Case("BYTE_3", SdwaSel::BYTE_3) 7272 .Case("WORD_0", SdwaSel::WORD_0) 7273 .Case("WORD_1", SdwaSel::WORD_1) 7274 .Case("DWORD", SdwaSel::DWORD) 7275 .Default(0xffffffff); 7276 Parser.Lex(); // eat last token 7277 7278 if (Int == 0xffffffff) { 7279 return MatchOperand_ParseFail; 7280 } 7281 7282 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7283 return MatchOperand_Success; 7284 } 7285 7286 OperandMatchResultTy 7287 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7288 using namespace llvm::AMDGPU::SDWA; 7289 7290 SMLoc S = Parser.getTok().getLoc(); 7291 StringRef Value; 7292 OperandMatchResultTy res; 7293 7294 res = parseStringWithPrefix("dst_unused", Value); 7295 if (res != MatchOperand_Success) { 7296 return res; 7297 } 7298 7299 int64_t Int; 7300 Int = StringSwitch<int64_t>(Value) 7301 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7302 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7303 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7304 .Default(0xffffffff); 7305 Parser.Lex(); // eat last token 7306 7307 if (Int == 0xffffffff) { 7308 return MatchOperand_ParseFail; 7309 } 7310 7311 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7312 return MatchOperand_Success; 7313 } 7314 7315 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7316 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7317 } 7318 7319 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7320 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7321 } 7322 7323 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7324 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7325 } 7326 7327 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7328 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7329 } 7330 7331 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7332 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7333 } 7334 7335 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7336 uint64_t BasicInstType, 7337 bool SkipDstVcc, 7338 bool SkipSrcVcc) { 7339 using namespace llvm::AMDGPU::SDWA; 7340 7341 OptionalImmIndexMap OptionalIdx; 7342 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7343 bool SkippedVcc = false; 7344 7345 unsigned I = 1; 7346 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7347 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7348 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7349 } 7350 7351 for (unsigned E = Operands.size(); I != E; ++I) { 7352 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7353 if (SkipVcc && !SkippedVcc && Op.isReg() && 7354 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7355 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7356 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7357 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7358 // Skip VCC only if we didn't skip it on previous iteration. 7359 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7360 if (BasicInstType == SIInstrFlags::VOP2 && 7361 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7362 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7363 SkippedVcc = true; 7364 continue; 7365 } else if (BasicInstType == SIInstrFlags::VOPC && 7366 Inst.getNumOperands() == 0) { 7367 SkippedVcc = true; 7368 continue; 7369 } 7370 } 7371 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7372 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7373 } else if (Op.isImm()) { 7374 // Handle optional arguments 7375 OptionalIdx[Op.getImmTy()] = I; 7376 } else { 7377 llvm_unreachable("Invalid operand type"); 7378 } 7379 SkippedVcc = false; 7380 } 7381 7382 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7383 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7384 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7385 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7386 switch (BasicInstType) { 7387 case SIInstrFlags::VOP1: 7388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7389 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7391 } 7392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7395 break; 7396 7397 case SIInstrFlags::VOP2: 7398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7399 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7401 } 7402 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7405 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7406 break; 7407 7408 case SIInstrFlags::VOPC: 7409 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7413 break; 7414 7415 default: 7416 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7417 } 7418 } 7419 7420 // special case v_mac_{f16, f32}: 7421 // it has src2 register operand that is tied to dst operand 7422 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7423 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7424 auto it = Inst.begin(); 7425 std::advance( 7426 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7427 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7428 } 7429 } 7430 7431 //===----------------------------------------------------------------------===// 7432 // mAI 7433 //===----------------------------------------------------------------------===// 7434 7435 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7436 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7437 } 7438 7439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7440 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7441 } 7442 7443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7444 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7445 } 7446 7447 /// Force static initialization. 7448 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7449 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7450 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7451 } 7452 7453 #define GET_REGISTER_MATCHER 7454 #define GET_MATCHER_IMPLEMENTATION 7455 #define GET_MNEMONIC_SPELL_CHECKER 7456 #include "AMDGPUGenAsmMatcher.inc" 7457 7458 // This fuction should be defined after auto-generated include so that we have 7459 // MatchClassKind enum defined 7460 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7461 unsigned Kind) { 7462 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7463 // But MatchInstructionImpl() expects to meet token and fails to validate 7464 // operand. This method checks if we are given immediate operand but expect to 7465 // get corresponding token. 7466 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7467 switch (Kind) { 7468 case MCK_addr64: 7469 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7470 case MCK_gds: 7471 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7472 case MCK_lds: 7473 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7474 case MCK_glc: 7475 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7476 case MCK_idxen: 7477 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7478 case MCK_offen: 7479 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7480 case MCK_SSrcB32: 7481 // When operands have expression values, they will return true for isToken, 7482 // because it is not possible to distinguish between a token and an 7483 // expression at parse time. MatchInstructionImpl() will always try to 7484 // match an operand as a token, when isToken returns true, and when the 7485 // name of the expression is not a valid token, the match will fail, 7486 // so we need to handle it here. 7487 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7488 case MCK_SSrcF32: 7489 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7490 case MCK_SoppBrTarget: 7491 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7492 case MCK_VReg32OrOff: 7493 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7494 case MCK_InterpSlot: 7495 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7496 case MCK_Attr: 7497 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7498 case MCK_AttrChan: 7499 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7500 case MCK_ImmSMEMOffset: 7501 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7502 case MCK_SReg_64: 7503 case MCK_SReg_64_XEXEC: 7504 // Null is defined as a 32-bit register but 7505 // it should also be enabled with 64-bit operands. 7506 // The following code enables it for SReg_64 operands 7507 // used as source and destination. Remaining source 7508 // operands are handled in isInlinableImm. 7509 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7510 default: 7511 return Match_InvalidOperand; 7512 } 7513 } 7514 7515 //===----------------------------------------------------------------------===// 7516 // endpgm 7517 //===----------------------------------------------------------------------===// 7518 7519 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7520 SMLoc S = Parser.getTok().getLoc(); 7521 int64_t Imm = 0; 7522 7523 if (!parseExpr(Imm)) { 7524 // The operand is optional, if not present default to 0 7525 Imm = 0; 7526 } 7527 7528 if (!isUInt<16>(Imm)) { 7529 Error(S, "expected a 16-bit value"); 7530 return MatchOperand_ParseFail; 7531 } 7532 7533 Operands.push_back( 7534 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7535 return MatchOperand_Success; 7536 } 7537 7538 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7539