1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_256RegClassID) || 282 isRegClass(AMDGPU::VReg_512RegClassID) || 283 isRegClass(AMDGPU::VReg_1024RegClassID); 284 } 285 286 bool isVReg32() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID); 288 } 289 290 bool isVReg32OrOff() const { 291 return isOff() || isVReg32(); 292 } 293 294 bool isNull() const { 295 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 296 } 297 298 bool isSDWAOperand(MVT type) const; 299 bool isSDWAFP16Operand() const; 300 bool isSDWAFP32Operand() const; 301 bool isSDWAInt16Operand() const; 302 bool isSDWAInt32Operand() const; 303 304 bool isImmTy(ImmTy ImmT) const { 305 return isImm() && Imm.Type == ImmT; 306 } 307 308 bool isImmModifier() const { 309 return isImm() && Imm.Type != ImmTyNone; 310 } 311 312 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 313 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 314 bool isDMask() const { return isImmTy(ImmTyDMask); } 315 bool isDim() const { return isImmTy(ImmTyDim); } 316 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 317 bool isDA() const { return isImmTy(ImmTyDA); } 318 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 319 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 320 bool isLWE() const { return isImmTy(ImmTyLWE); } 321 bool isOff() const { return isImmTy(ImmTyOff); } 322 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 323 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 324 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 325 bool isOffen() const { return isImmTy(ImmTyOffen); } 326 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 327 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 328 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 329 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 330 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 331 332 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 333 bool isGDS() const { return isImmTy(ImmTyGDS); } 334 bool isLDS() const { return isImmTy(ImmTyLDS); } 335 bool isDLC() const { return isImmTy(ImmTyDLC); } 336 bool isGLC() const { return isImmTy(ImmTyGLC); } 337 bool isSLC() const { return isImmTy(ImmTySLC); } 338 bool isSWZ() const { return isImmTy(ImmTySWZ); } 339 bool isTFE() const { return isImmTy(ImmTyTFE); } 340 bool isD16() const { return isImmTy(ImmTyD16); } 341 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 342 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 343 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 344 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 345 bool isFI() const { return isImmTy(ImmTyDppFi); } 346 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 347 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 348 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 349 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 350 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 351 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 352 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 353 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 354 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 355 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 356 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 357 bool isHigh() const { return isImmTy(ImmTyHigh); } 358 359 bool isMod() const { 360 return isClampSI() || isOModSI(); 361 } 362 363 bool isRegOrImm() const { 364 return isReg() || isImm(); 365 } 366 367 bool isRegClass(unsigned RCID) const; 368 369 bool isInlineValue() const; 370 371 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 372 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 373 } 374 375 bool isSCSrcB16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 377 } 378 379 bool isSCSrcV2B16() const { 380 return isSCSrcB16(); 381 } 382 383 bool isSCSrcB32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 385 } 386 387 bool isSCSrcB64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 389 } 390 391 bool isBoolReg() const; 392 393 bool isSCSrcF16() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 395 } 396 397 bool isSCSrcV2F16() const { 398 return isSCSrcF16(); 399 } 400 401 bool isSCSrcF32() const { 402 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 403 } 404 405 bool isSCSrcF64() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 407 } 408 409 bool isSSrcB32() const { 410 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 411 } 412 413 bool isSSrcB16() const { 414 return isSCSrcB16() || isLiteralImm(MVT::i16); 415 } 416 417 bool isSSrcV2B16() const { 418 llvm_unreachable("cannot happen"); 419 return isSSrcB16(); 420 } 421 422 bool isSSrcB64() const { 423 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 424 // See isVSrc64(). 425 return isSCSrcB64() || isLiteralImm(MVT::i64); 426 } 427 428 bool isSSrcF32() const { 429 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 430 } 431 432 bool isSSrcF64() const { 433 return isSCSrcB64() || isLiteralImm(MVT::f64); 434 } 435 436 bool isSSrcF16() const { 437 return isSCSrcB16() || isLiteralImm(MVT::f16); 438 } 439 440 bool isSSrcV2F16() const { 441 llvm_unreachable("cannot happen"); 442 return isSSrcF16(); 443 } 444 445 bool isSSrcOrLdsB32() const { 446 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 447 isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isVCSrcB32() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 452 } 453 454 bool isVCSrcB64() const { 455 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 456 } 457 458 bool isVCSrcB16() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 460 } 461 462 bool isVCSrcV2B16() const { 463 return isVCSrcB16(); 464 } 465 466 bool isVCSrcF32() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 468 } 469 470 bool isVCSrcF64() const { 471 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 472 } 473 474 bool isVCSrcF16() const { 475 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 476 } 477 478 bool isVCSrcV2F16() const { 479 return isVCSrcF16(); 480 } 481 482 bool isVSrcB32() const { 483 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 484 } 485 486 bool isVSrcB64() const { 487 return isVCSrcF64() || isLiteralImm(MVT::i64); 488 } 489 490 bool isVSrcB16() const { 491 return isVCSrcF16() || isLiteralImm(MVT::i16); 492 } 493 494 bool isVSrcV2B16() const { 495 return isVSrcB16() || isLiteralImm(MVT::v2i16); 496 } 497 498 bool isVSrcF32() const { 499 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 500 } 501 502 bool isVSrcF64() const { 503 return isVCSrcF64() || isLiteralImm(MVT::f64); 504 } 505 506 bool isVSrcF16() const { 507 return isVCSrcF16() || isLiteralImm(MVT::f16); 508 } 509 510 bool isVSrcV2F16() const { 511 return isVSrcF16() || isLiteralImm(MVT::v2f16); 512 } 513 514 bool isVISrcB32() const { 515 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 516 } 517 518 bool isVISrcB16() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 520 } 521 522 bool isVISrcV2B16() const { 523 return isVISrcB16(); 524 } 525 526 bool isVISrcF32() const { 527 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 528 } 529 530 bool isVISrcF16() const { 531 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 532 } 533 534 bool isVISrcV2F16() const { 535 return isVISrcF16() || isVISrcB32(); 536 } 537 538 bool isAISrcB32() const { 539 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 540 } 541 542 bool isAISrcB16() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 544 } 545 546 bool isAISrcV2B16() const { 547 return isAISrcB16(); 548 } 549 550 bool isAISrcF32() const { 551 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 552 } 553 554 bool isAISrcF16() const { 555 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 556 } 557 558 bool isAISrcV2F16() const { 559 return isAISrcF16() || isAISrcB32(); 560 } 561 562 bool isAISrc_128B32() const { 563 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 564 } 565 566 bool isAISrc_128B16() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 568 } 569 570 bool isAISrc_128V2B16() const { 571 return isAISrc_128B16(); 572 } 573 574 bool isAISrc_128F32() const { 575 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 576 } 577 578 bool isAISrc_128F16() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 580 } 581 582 bool isAISrc_128V2F16() const { 583 return isAISrc_128F16() || isAISrc_128B32(); 584 } 585 586 bool isAISrc_512B32() const { 587 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 588 } 589 590 bool isAISrc_512B16() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 592 } 593 594 bool isAISrc_512V2B16() const { 595 return isAISrc_512B16(); 596 } 597 598 bool isAISrc_512F32() const { 599 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 600 } 601 602 bool isAISrc_512F16() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 604 } 605 606 bool isAISrc_512V2F16() const { 607 return isAISrc_512F16() || isAISrc_512B32(); 608 } 609 610 bool isAISrc_1024B32() const { 611 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 612 } 613 614 bool isAISrc_1024B16() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 616 } 617 618 bool isAISrc_1024V2B16() const { 619 return isAISrc_1024B16(); 620 } 621 622 bool isAISrc_1024F32() const { 623 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 624 } 625 626 bool isAISrc_1024F16() const { 627 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 628 } 629 630 bool isAISrc_1024V2F16() const { 631 return isAISrc_1024F16() || isAISrc_1024B32(); 632 } 633 634 bool isKImmFP32() const { 635 return isLiteralImm(MVT::f32); 636 } 637 638 bool isKImmFP16() const { 639 return isLiteralImm(MVT::f16); 640 } 641 642 bool isMem() const override { 643 return false; 644 } 645 646 bool isExpr() const { 647 return Kind == Expression; 648 } 649 650 bool isSoppBrTarget() const { 651 return isExpr() || isImm(); 652 } 653 654 bool isSWaitCnt() const; 655 bool isHwreg() const; 656 bool isSendMsg() const; 657 bool isSwizzle() const; 658 bool isSMRDOffset8() const; 659 bool isSMRDOffset20() const; 660 bool isSMRDLiteralOffset() const; 661 bool isDPP8() const; 662 bool isDPPCtrl() const; 663 bool isBLGP() const; 664 bool isCBSZ() const; 665 bool isABID() const; 666 bool isGPRIdxMode() const; 667 bool isS16Imm() const; 668 bool isU16Imm() const; 669 bool isEndpgm() const; 670 671 StringRef getExpressionAsToken() const { 672 assert(isExpr()); 673 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 674 return S->getSymbol().getName(); 675 } 676 677 StringRef getToken() const { 678 assert(isToken()); 679 680 if (Kind == Expression) 681 return getExpressionAsToken(); 682 683 return StringRef(Tok.Data, Tok.Length); 684 } 685 686 int64_t getImm() const { 687 assert(isImm()); 688 return Imm.Val; 689 } 690 691 ImmTy getImmTy() const { 692 assert(isImm()); 693 return Imm.Type; 694 } 695 696 unsigned getReg() const override { 697 assert(isRegKind()); 698 return Reg.RegNo; 699 } 700 701 SMLoc getStartLoc() const override { 702 return StartLoc; 703 } 704 705 SMLoc getEndLoc() const override { 706 return EndLoc; 707 } 708 709 SMRange getLocRange() const { 710 return SMRange(StartLoc, EndLoc); 711 } 712 713 Modifiers getModifiers() const { 714 assert(isRegKind() || isImmTy(ImmTyNone)); 715 return isRegKind() ? Reg.Mods : Imm.Mods; 716 } 717 718 void setModifiers(Modifiers Mods) { 719 assert(isRegKind() || isImmTy(ImmTyNone)); 720 if (isRegKind()) 721 Reg.Mods = Mods; 722 else 723 Imm.Mods = Mods; 724 } 725 726 bool hasModifiers() const { 727 return getModifiers().hasModifiers(); 728 } 729 730 bool hasFPModifiers() const { 731 return getModifiers().hasFPModifiers(); 732 } 733 734 bool hasIntModifiers() const { 735 return getModifiers().hasIntModifiers(); 736 } 737 738 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 739 740 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 741 742 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 743 744 template <unsigned Bitwidth> 745 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 746 747 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 748 addKImmFPOperands<16>(Inst, N); 749 } 750 751 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 752 addKImmFPOperands<32>(Inst, N); 753 } 754 755 void addRegOperands(MCInst &Inst, unsigned N) const; 756 757 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 758 addRegOperands(Inst, N); 759 } 760 761 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 762 if (isRegKind()) 763 addRegOperands(Inst, N); 764 else if (isExpr()) 765 Inst.addOperand(MCOperand::createExpr(Expr)); 766 else 767 addImmOperands(Inst, N); 768 } 769 770 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 771 Modifiers Mods = getModifiers(); 772 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 773 if (isRegKind()) { 774 addRegOperands(Inst, N); 775 } else { 776 addImmOperands(Inst, N, false); 777 } 778 } 779 780 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 781 assert(!hasIntModifiers()); 782 addRegOrImmWithInputModsOperands(Inst, N); 783 } 784 785 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 786 assert(!hasFPModifiers()); 787 addRegOrImmWithInputModsOperands(Inst, N); 788 } 789 790 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 791 Modifiers Mods = getModifiers(); 792 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 793 assert(isRegKind()); 794 addRegOperands(Inst, N); 795 } 796 797 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasIntModifiers()); 799 addRegWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 803 assert(!hasFPModifiers()); 804 addRegWithInputModsOperands(Inst, N); 805 } 806 807 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 808 if (isImm()) 809 addImmOperands(Inst, N); 810 else { 811 assert(isExpr()); 812 Inst.addOperand(MCOperand::createExpr(Expr)); 813 } 814 } 815 816 static void printImmTy(raw_ostream& OS, ImmTy Type) { 817 switch (Type) { 818 case ImmTyNone: OS << "None"; break; 819 case ImmTyGDS: OS << "GDS"; break; 820 case ImmTyLDS: OS << "LDS"; break; 821 case ImmTyOffen: OS << "Offen"; break; 822 case ImmTyIdxen: OS << "Idxen"; break; 823 case ImmTyAddr64: OS << "Addr64"; break; 824 case ImmTyOffset: OS << "Offset"; break; 825 case ImmTyInstOffset: OS << "InstOffset"; break; 826 case ImmTyOffset0: OS << "Offset0"; break; 827 case ImmTyOffset1: OS << "Offset1"; break; 828 case ImmTyDLC: OS << "DLC"; break; 829 case ImmTyGLC: OS << "GLC"; break; 830 case ImmTySLC: OS << "SLC"; break; 831 case ImmTySWZ: OS << "SWZ"; break; 832 case ImmTyTFE: OS << "TFE"; break; 833 case ImmTyD16: OS << "D16"; break; 834 case ImmTyFORMAT: OS << "FORMAT"; break; 835 case ImmTyClampSI: OS << "ClampSI"; break; 836 case ImmTyOModSI: OS << "OModSI"; break; 837 case ImmTyDPP8: OS << "DPP8"; break; 838 case ImmTyDppCtrl: OS << "DppCtrl"; break; 839 case ImmTyDppRowMask: OS << "DppRowMask"; break; 840 case ImmTyDppBankMask: OS << "DppBankMask"; break; 841 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 842 case ImmTyDppFi: OS << "FI"; break; 843 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 844 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 845 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 846 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 847 case ImmTyDMask: OS << "DMask"; break; 848 case ImmTyDim: OS << "Dim"; break; 849 case ImmTyUNorm: OS << "UNorm"; break; 850 case ImmTyDA: OS << "DA"; break; 851 case ImmTyR128A16: OS << "R128A16"; break; 852 case ImmTyA16: OS << "A16"; break; 853 case ImmTyLWE: OS << "LWE"; break; 854 case ImmTyOff: OS << "Off"; break; 855 case ImmTyExpTgt: OS << "ExpTgt"; break; 856 case ImmTyExpCompr: OS << "ExpCompr"; break; 857 case ImmTyExpVM: OS << "ExpVM"; break; 858 case ImmTyHwreg: OS << "Hwreg"; break; 859 case ImmTySendMsg: OS << "SendMsg"; break; 860 case ImmTyInterpSlot: OS << "InterpSlot"; break; 861 case ImmTyInterpAttr: OS << "InterpAttr"; break; 862 case ImmTyAttrChan: OS << "AttrChan"; break; 863 case ImmTyOpSel: OS << "OpSel"; break; 864 case ImmTyOpSelHi: OS << "OpSelHi"; break; 865 case ImmTyNegLo: OS << "NegLo"; break; 866 case ImmTyNegHi: OS << "NegHi"; break; 867 case ImmTySwizzle: OS << "Swizzle"; break; 868 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 869 case ImmTyHigh: OS << "High"; break; 870 case ImmTyBLGP: OS << "BLGP"; break; 871 case ImmTyCBSZ: OS << "CBSZ"; break; 872 case ImmTyABID: OS << "ABID"; break; 873 case ImmTyEndpgm: OS << "Endpgm"; break; 874 } 875 } 876 877 void print(raw_ostream &OS) const override { 878 switch (Kind) { 879 case Register: 880 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 881 break; 882 case Immediate: 883 OS << '<' << getImm(); 884 if (getImmTy() != ImmTyNone) { 885 OS << " type: "; printImmTy(OS, getImmTy()); 886 } 887 OS << " mods: " << Imm.Mods << '>'; 888 break; 889 case Token: 890 OS << '\'' << getToken() << '\''; 891 break; 892 case Expression: 893 OS << "<expr " << *Expr << '>'; 894 break; 895 } 896 } 897 898 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 899 int64_t Val, SMLoc Loc, 900 ImmTy Type = ImmTyNone, 901 bool IsFPImm = false) { 902 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 903 Op->Imm.Val = Val; 904 Op->Imm.IsFPImm = IsFPImm; 905 Op->Imm.Type = Type; 906 Op->Imm.Mods = Modifiers(); 907 Op->StartLoc = Loc; 908 Op->EndLoc = Loc; 909 return Op; 910 } 911 912 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 913 StringRef Str, SMLoc Loc, 914 bool HasExplicitEncodingSize = true) { 915 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 916 Res->Tok.Data = Str.data(); 917 Res->Tok.Length = Str.size(); 918 Res->StartLoc = Loc; 919 Res->EndLoc = Loc; 920 return Res; 921 } 922 923 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 924 unsigned RegNo, SMLoc S, 925 SMLoc E) { 926 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 927 Op->Reg.RegNo = RegNo; 928 Op->Reg.Mods = Modifiers(); 929 Op->StartLoc = S; 930 Op->EndLoc = E; 931 return Op; 932 } 933 934 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 935 const class MCExpr *Expr, SMLoc S) { 936 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 937 Op->Expr = Expr; 938 Op->StartLoc = S; 939 Op->EndLoc = S; 940 return Op; 941 } 942 }; 943 944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 945 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 946 return OS; 947 } 948 949 //===----------------------------------------------------------------------===// 950 // AsmParser 951 //===----------------------------------------------------------------------===// 952 953 // Holds info related to the current kernel, e.g. count of SGPRs used. 954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 955 // .amdgpu_hsa_kernel or at EOF. 956 class KernelScopeInfo { 957 int SgprIndexUnusedMin = -1; 958 int VgprIndexUnusedMin = -1; 959 MCContext *Ctx = nullptr; 960 961 void usesSgprAt(int i) { 962 if (i >= SgprIndexUnusedMin) { 963 SgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 void usesVgprAt(int i) { 972 if (i >= VgprIndexUnusedMin) { 973 VgprIndexUnusedMin = ++i; 974 if (Ctx) { 975 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 976 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 977 } 978 } 979 } 980 981 public: 982 KernelScopeInfo() = default; 983 984 void initialize(MCContext &Context) { 985 Ctx = &Context; 986 usesSgprAt(SgprIndexUnusedMin = -1); 987 usesVgprAt(VgprIndexUnusedMin = -1); 988 } 989 990 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 991 switch (RegKind) { 992 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 993 case IS_AGPR: // fall through 994 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 995 default: break; 996 } 997 } 998 }; 999 1000 class AMDGPUAsmParser : public MCTargetAsmParser { 1001 MCAsmParser &Parser; 1002 1003 // Number of extra operands parsed after the first optional operand. 1004 // This may be necessary to skip hardcoded mandatory operands. 1005 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1006 1007 unsigned ForcedEncodingSize = 0; 1008 bool ForcedDPP = false; 1009 bool ForcedSDWA = false; 1010 KernelScopeInfo KernelScope; 1011 1012 /// @name Auto-generated Match Functions 1013 /// { 1014 1015 #define GET_ASSEMBLER_HEADER 1016 #include "AMDGPUGenAsmMatcher.inc" 1017 1018 /// } 1019 1020 private: 1021 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1022 bool OutOfRangeError(SMRange Range); 1023 /// Calculate VGPR/SGPR blocks required for given target, reserved 1024 /// registers, and user-specified NextFreeXGPR values. 1025 /// 1026 /// \param Features [in] Target features, used for bug corrections. 1027 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1028 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1029 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1030 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1031 /// descriptor field, if valid. 1032 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1033 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1034 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1035 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1036 /// \param VGPRBlocks [out] Result VGPR block count. 1037 /// \param SGPRBlocks [out] Result SGPR block count. 1038 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1039 bool FlatScrUsed, bool XNACKUsed, 1040 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1041 SMRange VGPRRange, unsigned NextFreeSGPR, 1042 SMRange SGPRRange, unsigned &VGPRBlocks, 1043 unsigned &SGPRBlocks); 1044 bool ParseDirectiveAMDGCNTarget(); 1045 bool ParseDirectiveAMDHSAKernel(); 1046 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1047 bool ParseDirectiveHSACodeObjectVersion(); 1048 bool ParseDirectiveHSACodeObjectISA(); 1049 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1050 bool ParseDirectiveAMDKernelCodeT(); 1051 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1052 bool ParseDirectiveAMDGPUHsaKernel(); 1053 1054 bool ParseDirectiveISAVersion(); 1055 bool ParseDirectiveHSAMetadata(); 1056 bool ParseDirectivePALMetadataBegin(); 1057 bool ParseDirectivePALMetadata(); 1058 bool ParseDirectiveAMDGPULDS(); 1059 1060 /// Common code to parse out a block of text (typically YAML) between start and 1061 /// end directives. 1062 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1063 const char *AssemblerDirectiveEnd, 1064 std::string &CollectString); 1065 1066 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1067 RegisterKind RegKind, unsigned Reg1); 1068 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1069 unsigned& RegNum, unsigned& RegWidth); 1070 unsigned ParseRegularReg(RegisterKind &RegKind, 1071 unsigned &RegNum, 1072 unsigned &RegWidth); 1073 unsigned ParseSpecialReg(RegisterKind &RegKind, 1074 unsigned &RegNum, 1075 unsigned &RegWidth); 1076 unsigned ParseRegList(RegisterKind &RegKind, 1077 unsigned &RegNum, 1078 unsigned &RegWidth); 1079 bool ParseRegRange(unsigned& Num, unsigned& Width); 1080 unsigned getRegularReg(RegisterKind RegKind, 1081 unsigned RegNum, 1082 unsigned RegWidth); 1083 1084 bool isRegister(); 1085 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1086 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1087 void initializeGprCountSymbol(RegisterKind RegKind); 1088 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1089 unsigned RegWidth); 1090 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1091 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1092 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1093 bool IsGdsHardcoded); 1094 1095 public: 1096 enum AMDGPUMatchResultTy { 1097 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1098 }; 1099 enum OperandMode { 1100 OperandMode_Default, 1101 OperandMode_NSA, 1102 }; 1103 1104 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1105 1106 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1107 const MCInstrInfo &MII, 1108 const MCTargetOptions &Options) 1109 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1110 MCAsmParserExtension::Initialize(Parser); 1111 1112 if (getFeatureBits().none()) { 1113 // Set default features. 1114 copySTI().ToggleFeature("southern-islands"); 1115 } 1116 1117 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1118 1119 { 1120 // TODO: make those pre-defined variables read-only. 1121 // Currently there is none suitable machinery in the core llvm-mc for this. 1122 // MCSymbol::isRedefinable is intended for another purpose, and 1123 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1124 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1125 MCContext &Ctx = getContext(); 1126 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1127 MCSymbol *Sym = 1128 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1129 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1130 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1131 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1132 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1133 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1134 } else { 1135 MCSymbol *Sym = 1136 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1137 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1138 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1142 } 1143 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1144 initializeGprCountSymbol(IS_VGPR); 1145 initializeGprCountSymbol(IS_SGPR); 1146 } else 1147 KernelScope.initialize(getContext()); 1148 } 1149 } 1150 1151 bool hasXNACK() const { 1152 return AMDGPU::hasXNACK(getSTI()); 1153 } 1154 1155 bool hasMIMG_R128() const { 1156 return AMDGPU::hasMIMG_R128(getSTI()); 1157 } 1158 1159 bool hasPackedD16() const { 1160 return AMDGPU::hasPackedD16(getSTI()); 1161 } 1162 1163 bool hasGFX10A16() const { 1164 return AMDGPU::hasGFX10A16(getSTI()); 1165 } 1166 1167 bool isSI() const { 1168 return AMDGPU::isSI(getSTI()); 1169 } 1170 1171 bool isCI() const { 1172 return AMDGPU::isCI(getSTI()); 1173 } 1174 1175 bool isVI() const { 1176 return AMDGPU::isVI(getSTI()); 1177 } 1178 1179 bool isGFX9() const { 1180 return AMDGPU::isGFX9(getSTI()); 1181 } 1182 1183 bool isGFX10() const { 1184 return AMDGPU::isGFX10(getSTI()); 1185 } 1186 1187 bool hasInv2PiInlineImm() const { 1188 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1189 } 1190 1191 bool hasFlatOffsets() const { 1192 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1193 } 1194 1195 bool hasSGPR102_SGPR103() const { 1196 return !isVI() && !isGFX9(); 1197 } 1198 1199 bool hasSGPR104_SGPR105() const { 1200 return isGFX10(); 1201 } 1202 1203 bool hasIntClamp() const { 1204 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1205 } 1206 1207 AMDGPUTargetStreamer &getTargetStreamer() { 1208 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1209 return static_cast<AMDGPUTargetStreamer &>(TS); 1210 } 1211 1212 const MCRegisterInfo *getMRI() const { 1213 // We need this const_cast because for some reason getContext() is not const 1214 // in MCAsmParser. 1215 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1216 } 1217 1218 const MCInstrInfo *getMII() const { 1219 return &MII; 1220 } 1221 1222 const FeatureBitset &getFeatureBits() const { 1223 return getSTI().getFeatureBits(); 1224 } 1225 1226 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1227 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1228 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1229 1230 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1231 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1232 bool isForcedDPP() const { return ForcedDPP; } 1233 bool isForcedSDWA() const { return ForcedSDWA; } 1234 ArrayRef<unsigned> getMatchedVariants() const; 1235 1236 std::unique_ptr<AMDGPUOperand> parseRegister(); 1237 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1238 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1239 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1240 unsigned Kind) override; 1241 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1242 OperandVector &Operands, MCStreamer &Out, 1243 uint64_t &ErrorInfo, 1244 bool MatchingInlineAsm) override; 1245 bool ParseDirective(AsmToken DirectiveID) override; 1246 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1247 OperandMode Mode = OperandMode_Default); 1248 StringRef parseMnemonicSuffix(StringRef Name); 1249 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1250 SMLoc NameLoc, OperandVector &Operands) override; 1251 //bool ProcessInstruction(MCInst &Inst); 1252 1253 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1254 1255 OperandMatchResultTy 1256 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1257 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1258 bool (*ConvertResult)(int64_t &) = nullptr); 1259 1260 OperandMatchResultTy 1261 parseOperandArrayWithPrefix(const char *Prefix, 1262 OperandVector &Operands, 1263 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1264 bool (*ConvertResult)(int64_t&) = nullptr); 1265 1266 OperandMatchResultTy 1267 parseNamedBit(const char *Name, OperandVector &Operands, 1268 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1269 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1270 StringRef &Value); 1271 1272 bool isModifier(); 1273 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1274 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1275 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1276 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1277 bool parseSP3NegModifier(); 1278 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1279 OperandMatchResultTy parseReg(OperandVector &Operands); 1280 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1281 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1282 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1283 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1284 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1285 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1286 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1287 1288 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1289 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1290 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1291 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1292 1293 bool parseCnt(int64_t &IntVal); 1294 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1295 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1296 1297 private: 1298 struct OperandInfoTy { 1299 int64_t Id; 1300 bool IsSymbolic = false; 1301 bool IsDefined = false; 1302 1303 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1304 }; 1305 1306 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1307 bool validateSendMsg(const OperandInfoTy &Msg, 1308 const OperandInfoTy &Op, 1309 const OperandInfoTy &Stream, 1310 const SMLoc Loc); 1311 1312 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1313 bool validateHwreg(const OperandInfoTy &HwReg, 1314 const int64_t Offset, 1315 const int64_t Width, 1316 const SMLoc Loc); 1317 1318 void errorExpTgt(); 1319 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1320 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1321 1322 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1323 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1324 bool validateSOPLiteral(const MCInst &Inst) const; 1325 bool validateConstantBusLimitations(const MCInst &Inst); 1326 bool validateEarlyClobberLimitations(const MCInst &Inst); 1327 bool validateIntClampSupported(const MCInst &Inst); 1328 bool validateMIMGAtomicDMask(const MCInst &Inst); 1329 bool validateMIMGGatherDMask(const MCInst &Inst); 1330 bool validateMovrels(const MCInst &Inst); 1331 bool validateMIMGDataSize(const MCInst &Inst); 1332 bool validateMIMGAddrSize(const MCInst &Inst); 1333 bool validateMIMGD16(const MCInst &Inst); 1334 bool validateMIMGDim(const MCInst &Inst); 1335 bool validateLdsDirect(const MCInst &Inst); 1336 bool validateOpSel(const MCInst &Inst); 1337 bool validateVccOperand(unsigned Reg) const; 1338 bool validateVOP3Literal(const MCInst &Inst) const; 1339 unsigned getConstantBusLimit(unsigned Opcode) const; 1340 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1341 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1342 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1343 1344 bool isId(const StringRef Id) const; 1345 bool isId(const AsmToken &Token, const StringRef Id) const; 1346 bool isToken(const AsmToken::TokenKind Kind) const; 1347 bool trySkipId(const StringRef Id); 1348 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1349 bool trySkipToken(const AsmToken::TokenKind Kind); 1350 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1351 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1352 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1353 AsmToken::TokenKind getTokenKind() const; 1354 bool parseExpr(int64_t &Imm); 1355 bool parseExpr(OperandVector &Operands); 1356 StringRef getTokenStr() const; 1357 AsmToken peekToken(); 1358 AsmToken getToken() const; 1359 SMLoc getLoc() const; 1360 void lex(); 1361 1362 public: 1363 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1364 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1365 1366 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1367 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1368 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1369 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1370 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1371 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1372 1373 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1374 const unsigned MinVal, 1375 const unsigned MaxVal, 1376 const StringRef ErrMsg); 1377 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1378 bool parseSwizzleOffset(int64_t &Imm); 1379 bool parseSwizzleMacro(int64_t &Imm); 1380 bool parseSwizzleQuadPerm(int64_t &Imm); 1381 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1382 bool parseSwizzleBroadcast(int64_t &Imm); 1383 bool parseSwizzleSwap(int64_t &Imm); 1384 bool parseSwizzleReverse(int64_t &Imm); 1385 1386 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1387 int64_t parseGPRIdxMacro(); 1388 1389 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1390 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1391 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1392 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1393 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1394 1395 AMDGPUOperand::Ptr defaultDLC() const; 1396 AMDGPUOperand::Ptr defaultGLC() const; 1397 AMDGPUOperand::Ptr defaultSLC() const; 1398 1399 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1400 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1401 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1402 AMDGPUOperand::Ptr defaultFlatOffset() const; 1403 1404 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1405 1406 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1407 OptionalImmIndexMap &OptionalIdx); 1408 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1409 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1410 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1411 1412 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1413 1414 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1415 bool IsAtomic = false); 1416 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1417 1418 OperandMatchResultTy parseDim(OperandVector &Operands); 1419 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1420 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1421 AMDGPUOperand::Ptr defaultRowMask() const; 1422 AMDGPUOperand::Ptr defaultBankMask() const; 1423 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1424 AMDGPUOperand::Ptr defaultFI() const; 1425 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1426 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1427 1428 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1429 AMDGPUOperand::ImmTy Type); 1430 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1431 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1432 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1433 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1434 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1435 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1436 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1437 uint64_t BasicInstType, 1438 bool SkipDstVcc = false, 1439 bool SkipSrcVcc = false); 1440 1441 AMDGPUOperand::Ptr defaultBLGP() const; 1442 AMDGPUOperand::Ptr defaultCBSZ() const; 1443 AMDGPUOperand::Ptr defaultABID() const; 1444 1445 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1446 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1447 }; 1448 1449 struct OptionalOperand { 1450 const char *Name; 1451 AMDGPUOperand::ImmTy Type; 1452 bool IsBit; 1453 bool (*ConvertResult)(int64_t&); 1454 }; 1455 1456 } // end anonymous namespace 1457 1458 // May be called with integer type with equivalent bitwidth. 1459 static const fltSemantics *getFltSemantics(unsigned Size) { 1460 switch (Size) { 1461 case 4: 1462 return &APFloat::IEEEsingle(); 1463 case 8: 1464 return &APFloat::IEEEdouble(); 1465 case 2: 1466 return &APFloat::IEEEhalf(); 1467 default: 1468 llvm_unreachable("unsupported fp type"); 1469 } 1470 } 1471 1472 static const fltSemantics *getFltSemantics(MVT VT) { 1473 return getFltSemantics(VT.getSizeInBits() / 8); 1474 } 1475 1476 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1477 switch (OperandType) { 1478 case AMDGPU::OPERAND_REG_IMM_INT32: 1479 case AMDGPU::OPERAND_REG_IMM_FP32: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1482 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1483 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1484 return &APFloat::IEEEsingle(); 1485 case AMDGPU::OPERAND_REG_IMM_INT64: 1486 case AMDGPU::OPERAND_REG_IMM_FP64: 1487 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1488 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1489 return &APFloat::IEEEdouble(); 1490 case AMDGPU::OPERAND_REG_IMM_INT16: 1491 case AMDGPU::OPERAND_REG_IMM_FP16: 1492 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1493 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1494 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1495 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1496 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1497 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1498 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1499 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1500 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1501 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1502 return &APFloat::IEEEhalf(); 1503 default: 1504 llvm_unreachable("unsupported fp type"); 1505 } 1506 } 1507 1508 //===----------------------------------------------------------------------===// 1509 // Operand 1510 //===----------------------------------------------------------------------===// 1511 1512 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1513 bool Lost; 1514 1515 // Convert literal to single precision 1516 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1517 APFloat::rmNearestTiesToEven, 1518 &Lost); 1519 // We allow precision lost but not overflow or underflow 1520 if (Status != APFloat::opOK && 1521 Lost && 1522 ((Status & APFloat::opOverflow) != 0 || 1523 (Status & APFloat::opUnderflow) != 0)) { 1524 return false; 1525 } 1526 1527 return true; 1528 } 1529 1530 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1531 return isUIntN(Size, Val) || isIntN(Size, Val); 1532 } 1533 1534 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1535 1536 // This is a hack to enable named inline values like 1537 // shared_base with both 32-bit and 64-bit operands. 1538 // Note that these values are defined as 1539 // 32-bit operands only. 1540 if (isInlineValue()) { 1541 return true; 1542 } 1543 1544 if (!isImmTy(ImmTyNone)) { 1545 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1546 return false; 1547 } 1548 // TODO: We should avoid using host float here. It would be better to 1549 // check the float bit values which is what a few other places do. 1550 // We've had bot failures before due to weird NaN support on mips hosts. 1551 1552 APInt Literal(64, Imm.Val); 1553 1554 if (Imm.IsFPImm) { // We got fp literal token 1555 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1556 return AMDGPU::isInlinableLiteral64(Imm.Val, 1557 AsmParser->hasInv2PiInlineImm()); 1558 } 1559 1560 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1561 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1562 return false; 1563 1564 if (type.getScalarSizeInBits() == 16) { 1565 return AMDGPU::isInlinableLiteral16( 1566 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1567 AsmParser->hasInv2PiInlineImm()); 1568 } 1569 1570 // Check if single precision literal is inlinable 1571 return AMDGPU::isInlinableLiteral32( 1572 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1573 AsmParser->hasInv2PiInlineImm()); 1574 } 1575 1576 // We got int literal token. 1577 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1578 return AMDGPU::isInlinableLiteral64(Imm.Val, 1579 AsmParser->hasInv2PiInlineImm()); 1580 } 1581 1582 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1583 return false; 1584 } 1585 1586 if (type.getScalarSizeInBits() == 16) { 1587 return AMDGPU::isInlinableLiteral16( 1588 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1589 AsmParser->hasInv2PiInlineImm()); 1590 } 1591 1592 return AMDGPU::isInlinableLiteral32( 1593 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1594 AsmParser->hasInv2PiInlineImm()); 1595 } 1596 1597 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1598 // Check that this immediate can be added as literal 1599 if (!isImmTy(ImmTyNone)) { 1600 return false; 1601 } 1602 1603 if (!Imm.IsFPImm) { 1604 // We got int literal token. 1605 1606 if (type == MVT::f64 && hasFPModifiers()) { 1607 // Cannot apply fp modifiers to int literals preserving the same semantics 1608 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1609 // disable these cases. 1610 return false; 1611 } 1612 1613 unsigned Size = type.getSizeInBits(); 1614 if (Size == 64) 1615 Size = 32; 1616 1617 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1618 // types. 1619 return isSafeTruncation(Imm.Val, Size); 1620 } 1621 1622 // We got fp literal token 1623 if (type == MVT::f64) { // Expected 64-bit fp operand 1624 // We would set low 64-bits of literal to zeroes but we accept this literals 1625 return true; 1626 } 1627 1628 if (type == MVT::i64) { // Expected 64-bit int operand 1629 // We don't allow fp literals in 64-bit integer instructions. It is 1630 // unclear how we should encode them. 1631 return false; 1632 } 1633 1634 // We allow fp literals with f16x2 operands assuming that the specified 1635 // literal goes into the lower half and the upper half is zero. We also 1636 // require that the literal may be losslesly converted to f16. 1637 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1638 (type == MVT::v2i16)? MVT::i16 : type; 1639 1640 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1641 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1642 } 1643 1644 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1645 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1646 } 1647 1648 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1649 if (AsmParser->isVI()) 1650 return isVReg32(); 1651 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1652 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1653 else 1654 return false; 1655 } 1656 1657 bool AMDGPUOperand::isSDWAFP16Operand() const { 1658 return isSDWAOperand(MVT::f16); 1659 } 1660 1661 bool AMDGPUOperand::isSDWAFP32Operand() const { 1662 return isSDWAOperand(MVT::f32); 1663 } 1664 1665 bool AMDGPUOperand::isSDWAInt16Operand() const { 1666 return isSDWAOperand(MVT::i16); 1667 } 1668 1669 bool AMDGPUOperand::isSDWAInt32Operand() const { 1670 return isSDWAOperand(MVT::i32); 1671 } 1672 1673 bool AMDGPUOperand::isBoolReg() const { 1674 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1675 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1676 } 1677 1678 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1679 { 1680 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1681 assert(Size == 2 || Size == 4 || Size == 8); 1682 1683 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1684 1685 if (Imm.Mods.Abs) { 1686 Val &= ~FpSignMask; 1687 } 1688 if (Imm.Mods.Neg) { 1689 Val ^= FpSignMask; 1690 } 1691 1692 return Val; 1693 } 1694 1695 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1696 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1697 Inst.getNumOperands())) { 1698 addLiteralImmOperand(Inst, Imm.Val, 1699 ApplyModifiers & 1700 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1701 } else { 1702 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1703 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1704 } 1705 } 1706 1707 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1708 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1709 auto OpNum = Inst.getNumOperands(); 1710 // Check that this operand accepts literals 1711 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1712 1713 if (ApplyModifiers) { 1714 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1715 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1716 Val = applyInputFPModifiers(Val, Size); 1717 } 1718 1719 APInt Literal(64, Val); 1720 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1721 1722 if (Imm.IsFPImm) { // We got fp literal token 1723 switch (OpTy) { 1724 case AMDGPU::OPERAND_REG_IMM_INT64: 1725 case AMDGPU::OPERAND_REG_IMM_FP64: 1726 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1727 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1728 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1729 AsmParser->hasInv2PiInlineImm())) { 1730 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1731 return; 1732 } 1733 1734 // Non-inlineable 1735 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1736 // For fp operands we check if low 32 bits are zeros 1737 if (Literal.getLoBits(32) != 0) { 1738 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1739 "Can't encode literal as exact 64-bit floating-point operand. " 1740 "Low 32-bits will be set to zero"); 1741 } 1742 1743 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1744 return; 1745 } 1746 1747 // We don't allow fp literals in 64-bit integer instructions. It is 1748 // unclear how we should encode them. This case should be checked earlier 1749 // in predicate methods (isLiteralImm()) 1750 llvm_unreachable("fp literal in 64-bit integer instruction."); 1751 1752 case AMDGPU::OPERAND_REG_IMM_INT32: 1753 case AMDGPU::OPERAND_REG_IMM_FP32: 1754 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1755 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1756 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1757 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1758 case AMDGPU::OPERAND_REG_IMM_INT16: 1759 case AMDGPU::OPERAND_REG_IMM_FP16: 1760 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1761 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1762 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1763 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1764 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1765 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1768 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1770 bool lost; 1771 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1772 // Convert literal to single precision 1773 FPLiteral.convert(*getOpFltSemantics(OpTy), 1774 APFloat::rmNearestTiesToEven, &lost); 1775 // We allow precision lost but not overflow or underflow. This should be 1776 // checked earlier in isLiteralImm() 1777 1778 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1779 Inst.addOperand(MCOperand::createImm(ImmVal)); 1780 return; 1781 } 1782 default: 1783 llvm_unreachable("invalid operand size"); 1784 } 1785 1786 return; 1787 } 1788 1789 // We got int literal token. 1790 // Only sign extend inline immediates. 1791 switch (OpTy) { 1792 case AMDGPU::OPERAND_REG_IMM_INT32: 1793 case AMDGPU::OPERAND_REG_IMM_FP32: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1798 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1799 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1800 if (isSafeTruncation(Val, 32) && 1801 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1802 AsmParser->hasInv2PiInlineImm())) { 1803 Inst.addOperand(MCOperand::createImm(Val)); 1804 return; 1805 } 1806 1807 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1808 return; 1809 1810 case AMDGPU::OPERAND_REG_IMM_INT64: 1811 case AMDGPU::OPERAND_REG_IMM_FP64: 1812 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1813 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1814 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1815 Inst.addOperand(MCOperand::createImm(Val)); 1816 return; 1817 } 1818 1819 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1820 return; 1821 1822 case AMDGPU::OPERAND_REG_IMM_INT16: 1823 case AMDGPU::OPERAND_REG_IMM_FP16: 1824 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1828 if (isSafeTruncation(Val, 16) && 1829 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1830 AsmParser->hasInv2PiInlineImm())) { 1831 Inst.addOperand(MCOperand::createImm(Val)); 1832 return; 1833 } 1834 1835 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1836 return; 1837 1838 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1839 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1840 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1841 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1842 assert(isSafeTruncation(Val, 16)); 1843 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1844 AsmParser->hasInv2PiInlineImm())); 1845 1846 Inst.addOperand(MCOperand::createImm(Val)); 1847 return; 1848 } 1849 default: 1850 llvm_unreachable("invalid operand size"); 1851 } 1852 } 1853 1854 template <unsigned Bitwidth> 1855 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1856 APInt Literal(64, Imm.Val); 1857 1858 if (!Imm.IsFPImm) { 1859 // We got int literal token. 1860 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1861 return; 1862 } 1863 1864 bool Lost; 1865 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1866 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1867 APFloat::rmNearestTiesToEven, &Lost); 1868 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1869 } 1870 1871 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1872 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1873 } 1874 1875 static bool isInlineValue(unsigned Reg) { 1876 switch (Reg) { 1877 case AMDGPU::SRC_SHARED_BASE: 1878 case AMDGPU::SRC_SHARED_LIMIT: 1879 case AMDGPU::SRC_PRIVATE_BASE: 1880 case AMDGPU::SRC_PRIVATE_LIMIT: 1881 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1882 return true; 1883 case AMDGPU::SRC_VCCZ: 1884 case AMDGPU::SRC_EXECZ: 1885 case AMDGPU::SRC_SCC: 1886 return true; 1887 case AMDGPU::SGPR_NULL: 1888 return true; 1889 default: 1890 return false; 1891 } 1892 } 1893 1894 bool AMDGPUOperand::isInlineValue() const { 1895 return isRegKind() && ::isInlineValue(getReg()); 1896 } 1897 1898 //===----------------------------------------------------------------------===// 1899 // AsmParser 1900 //===----------------------------------------------------------------------===// 1901 1902 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1903 if (Is == IS_VGPR) { 1904 switch (RegWidth) { 1905 default: return -1; 1906 case 1: return AMDGPU::VGPR_32RegClassID; 1907 case 2: return AMDGPU::VReg_64RegClassID; 1908 case 3: return AMDGPU::VReg_96RegClassID; 1909 case 4: return AMDGPU::VReg_128RegClassID; 1910 case 5: return AMDGPU::VReg_160RegClassID; 1911 case 8: return AMDGPU::VReg_256RegClassID; 1912 case 16: return AMDGPU::VReg_512RegClassID; 1913 case 32: return AMDGPU::VReg_1024RegClassID; 1914 } 1915 } else if (Is == IS_TTMP) { 1916 switch (RegWidth) { 1917 default: return -1; 1918 case 1: return AMDGPU::TTMP_32RegClassID; 1919 case 2: return AMDGPU::TTMP_64RegClassID; 1920 case 4: return AMDGPU::TTMP_128RegClassID; 1921 case 8: return AMDGPU::TTMP_256RegClassID; 1922 case 16: return AMDGPU::TTMP_512RegClassID; 1923 } 1924 } else if (Is == IS_SGPR) { 1925 switch (RegWidth) { 1926 default: return -1; 1927 case 1: return AMDGPU::SGPR_32RegClassID; 1928 case 2: return AMDGPU::SGPR_64RegClassID; 1929 case 4: return AMDGPU::SGPR_128RegClassID; 1930 case 8: return AMDGPU::SGPR_256RegClassID; 1931 case 16: return AMDGPU::SGPR_512RegClassID; 1932 } 1933 } else if (Is == IS_AGPR) { 1934 switch (RegWidth) { 1935 default: return -1; 1936 case 1: return AMDGPU::AGPR_32RegClassID; 1937 case 2: return AMDGPU::AReg_64RegClassID; 1938 case 4: return AMDGPU::AReg_128RegClassID; 1939 case 16: return AMDGPU::AReg_512RegClassID; 1940 case 32: return AMDGPU::AReg_1024RegClassID; 1941 } 1942 } 1943 return -1; 1944 } 1945 1946 static unsigned getSpecialRegForName(StringRef RegName) { 1947 return StringSwitch<unsigned>(RegName) 1948 .Case("exec", AMDGPU::EXEC) 1949 .Case("vcc", AMDGPU::VCC) 1950 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1951 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1952 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1953 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1954 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1955 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1956 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1957 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1958 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1959 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1960 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1961 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1962 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1963 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1964 .Case("m0", AMDGPU::M0) 1965 .Case("vccz", AMDGPU::SRC_VCCZ) 1966 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1967 .Case("execz", AMDGPU::SRC_EXECZ) 1968 .Case("src_execz", AMDGPU::SRC_EXECZ) 1969 .Case("scc", AMDGPU::SRC_SCC) 1970 .Case("src_scc", AMDGPU::SRC_SCC) 1971 .Case("tba", AMDGPU::TBA) 1972 .Case("tma", AMDGPU::TMA) 1973 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1974 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1975 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1976 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1977 .Case("vcc_lo", AMDGPU::VCC_LO) 1978 .Case("vcc_hi", AMDGPU::VCC_HI) 1979 .Case("exec_lo", AMDGPU::EXEC_LO) 1980 .Case("exec_hi", AMDGPU::EXEC_HI) 1981 .Case("tma_lo", AMDGPU::TMA_LO) 1982 .Case("tma_hi", AMDGPU::TMA_HI) 1983 .Case("tba_lo", AMDGPU::TBA_LO) 1984 .Case("tba_hi", AMDGPU::TBA_HI) 1985 .Case("null", AMDGPU::SGPR_NULL) 1986 .Default(AMDGPU::NoRegister); 1987 } 1988 1989 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1990 SMLoc &EndLoc) { 1991 auto R = parseRegister(); 1992 if (!R) return true; 1993 assert(R->isReg()); 1994 RegNo = R->getReg(); 1995 StartLoc = R->getStartLoc(); 1996 EndLoc = R->getEndLoc(); 1997 return false; 1998 } 1999 2000 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2001 RegisterKind RegKind, unsigned Reg1) { 2002 switch (RegKind) { 2003 case IS_SPECIAL: 2004 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2005 Reg = AMDGPU::EXEC; 2006 RegWidth = 2; 2007 return true; 2008 } 2009 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2010 Reg = AMDGPU::FLAT_SCR; 2011 RegWidth = 2; 2012 return true; 2013 } 2014 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2015 Reg = AMDGPU::XNACK_MASK; 2016 RegWidth = 2; 2017 return true; 2018 } 2019 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2020 Reg = AMDGPU::VCC; 2021 RegWidth = 2; 2022 return true; 2023 } 2024 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2025 Reg = AMDGPU::TBA; 2026 RegWidth = 2; 2027 return true; 2028 } 2029 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2030 Reg = AMDGPU::TMA; 2031 RegWidth = 2; 2032 return true; 2033 } 2034 return false; 2035 case IS_VGPR: 2036 case IS_SGPR: 2037 case IS_AGPR: 2038 case IS_TTMP: 2039 if (Reg1 != Reg + RegWidth) { 2040 return false; 2041 } 2042 RegWidth++; 2043 return true; 2044 default: 2045 llvm_unreachable("unexpected register kind"); 2046 } 2047 } 2048 2049 struct RegInfo { 2050 StringLiteral Name; 2051 RegisterKind Kind; 2052 }; 2053 2054 static constexpr RegInfo RegularRegisters[] = { 2055 {{"v"}, IS_VGPR}, 2056 {{"s"}, IS_SGPR}, 2057 {{"ttmp"}, IS_TTMP}, 2058 {{"acc"}, IS_AGPR}, 2059 {{"a"}, IS_AGPR}, 2060 }; 2061 2062 static bool isRegularReg(RegisterKind Kind) { 2063 return Kind == IS_VGPR || 2064 Kind == IS_SGPR || 2065 Kind == IS_TTMP || 2066 Kind == IS_AGPR; 2067 } 2068 2069 static const RegInfo* getRegularRegInfo(StringRef Str) { 2070 for (const RegInfo &Reg : RegularRegisters) 2071 if (Str.startswith(Reg.Name)) 2072 return &Reg; 2073 return nullptr; 2074 } 2075 2076 static bool getRegNum(StringRef Str, unsigned& Num) { 2077 return !Str.getAsInteger(10, Num); 2078 } 2079 2080 bool 2081 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2082 const AsmToken &NextToken) const { 2083 2084 // A list of consecutive registers: [s0,s1,s2,s3] 2085 if (Token.is(AsmToken::LBrac)) 2086 return true; 2087 2088 if (!Token.is(AsmToken::Identifier)) 2089 return false; 2090 2091 // A single register like s0 or a range of registers like s[0:1] 2092 2093 StringRef Str = Token.getString(); 2094 const RegInfo *Reg = getRegularRegInfo(Str); 2095 if (Reg) { 2096 StringRef RegName = Reg->Name; 2097 StringRef RegSuffix = Str.substr(RegName.size()); 2098 if (!RegSuffix.empty()) { 2099 unsigned Num; 2100 // A single register with an index: rXX 2101 if (getRegNum(RegSuffix, Num)) 2102 return true; 2103 } else { 2104 // A range of registers: r[XX:YY]. 2105 if (NextToken.is(AsmToken::LBrac)) 2106 return true; 2107 } 2108 } 2109 2110 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2111 } 2112 2113 bool 2114 AMDGPUAsmParser::isRegister() 2115 { 2116 return isRegister(getToken(), peekToken()); 2117 } 2118 2119 unsigned 2120 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2121 unsigned RegNum, 2122 unsigned RegWidth) { 2123 2124 assert(isRegularReg(RegKind)); 2125 2126 unsigned AlignSize = 1; 2127 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2128 // SGPR and TTMP registers must be aligned. 2129 // Max required alignment is 4 dwords. 2130 AlignSize = std::min(RegWidth, 4u); 2131 } 2132 2133 if (RegNum % AlignSize != 0) 2134 return AMDGPU::NoRegister; 2135 2136 unsigned RegIdx = RegNum / AlignSize; 2137 int RCID = getRegClass(RegKind, RegWidth); 2138 if (RCID == -1) 2139 return AMDGPU::NoRegister; 2140 2141 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2142 const MCRegisterClass RC = TRI->getRegClass(RCID); 2143 if (RegIdx >= RC.getNumRegs()) 2144 return AMDGPU::NoRegister; 2145 2146 return RC.getRegister(RegIdx); 2147 } 2148 2149 bool 2150 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2151 int64_t RegLo, RegHi; 2152 if (!trySkipToken(AsmToken::LBrac)) 2153 return false; 2154 2155 if (!parseExpr(RegLo)) 2156 return false; 2157 2158 if (trySkipToken(AsmToken::Colon)) { 2159 if (!parseExpr(RegHi)) 2160 return false; 2161 } else { 2162 RegHi = RegLo; 2163 } 2164 2165 if (!trySkipToken(AsmToken::RBrac)) 2166 return false; 2167 2168 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2169 return false; 2170 2171 Num = static_cast<unsigned>(RegLo); 2172 Width = (RegHi - RegLo) + 1; 2173 return true; 2174 } 2175 2176 unsigned 2177 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2178 unsigned &RegNum, 2179 unsigned &RegWidth) { 2180 assert(isToken(AsmToken::Identifier)); 2181 unsigned Reg = getSpecialRegForName(getTokenStr()); 2182 if (Reg) { 2183 RegNum = 0; 2184 RegWidth = 1; 2185 RegKind = IS_SPECIAL; 2186 lex(); // skip register name 2187 } 2188 return Reg; 2189 } 2190 2191 unsigned 2192 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2193 unsigned &RegNum, 2194 unsigned &RegWidth) { 2195 assert(isToken(AsmToken::Identifier)); 2196 StringRef RegName = getTokenStr(); 2197 2198 const RegInfo *RI = getRegularRegInfo(RegName); 2199 if (!RI) 2200 return AMDGPU::NoRegister; 2201 lex(); // skip register name 2202 2203 RegKind = RI->Kind; 2204 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2205 if (!RegSuffix.empty()) { 2206 // Single 32-bit register: vXX. 2207 if (!getRegNum(RegSuffix, RegNum)) 2208 return AMDGPU::NoRegister; 2209 RegWidth = 1; 2210 } else { 2211 // Range of registers: v[XX:YY]. ":YY" is optional. 2212 if (!ParseRegRange(RegNum, RegWidth)) 2213 return AMDGPU::NoRegister; 2214 } 2215 2216 return getRegularReg(RegKind, RegNum, RegWidth); 2217 } 2218 2219 unsigned 2220 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2221 unsigned &RegNum, 2222 unsigned &RegWidth) { 2223 unsigned Reg = AMDGPU::NoRegister; 2224 2225 if (!trySkipToken(AsmToken::LBrac)) 2226 return AMDGPU::NoRegister; 2227 2228 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2229 2230 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2231 return AMDGPU::NoRegister; 2232 if (RegWidth != 1) 2233 return AMDGPU::NoRegister; 2234 2235 for (; trySkipToken(AsmToken::Comma); ) { 2236 RegisterKind NextRegKind; 2237 unsigned NextReg, NextRegNum, NextRegWidth; 2238 2239 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) 2240 return AMDGPU::NoRegister; 2241 if (NextRegWidth != 1) 2242 return AMDGPU::NoRegister; 2243 if (NextRegKind != RegKind) 2244 return AMDGPU::NoRegister; 2245 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2246 return AMDGPU::NoRegister; 2247 } 2248 2249 if (!trySkipToken(AsmToken::RBrac)) 2250 return AMDGPU::NoRegister; 2251 2252 if (isRegularReg(RegKind)) 2253 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2254 2255 return Reg; 2256 } 2257 2258 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 2259 unsigned &Reg, 2260 unsigned &RegNum, 2261 unsigned &RegWidth) { 2262 Reg = AMDGPU::NoRegister; 2263 2264 if (isToken(AsmToken::Identifier)) { 2265 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); 2266 if (Reg == AMDGPU::NoRegister) 2267 Reg = ParseRegularReg(RegKind, RegNum, RegWidth); 2268 } else { 2269 Reg = ParseRegList(RegKind, RegNum, RegWidth); 2270 } 2271 2272 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2273 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2274 } 2275 2276 Optional<StringRef> 2277 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2278 switch (RegKind) { 2279 case IS_VGPR: 2280 return StringRef(".amdgcn.next_free_vgpr"); 2281 case IS_SGPR: 2282 return StringRef(".amdgcn.next_free_sgpr"); 2283 default: 2284 return None; 2285 } 2286 } 2287 2288 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2289 auto SymbolName = getGprCountSymbolName(RegKind); 2290 assert(SymbolName && "initializing invalid register kind"); 2291 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2292 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2293 } 2294 2295 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2296 unsigned DwordRegIndex, 2297 unsigned RegWidth) { 2298 // Symbols are only defined for GCN targets 2299 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2300 return true; 2301 2302 auto SymbolName = getGprCountSymbolName(RegKind); 2303 if (!SymbolName) 2304 return true; 2305 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2306 2307 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2308 int64_t OldCount; 2309 2310 if (!Sym->isVariable()) 2311 return !Error(getParser().getTok().getLoc(), 2312 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2313 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2314 return !Error( 2315 getParser().getTok().getLoc(), 2316 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2317 2318 if (OldCount <= NewMax) 2319 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2320 2321 return true; 2322 } 2323 2324 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2325 const auto &Tok = Parser.getTok(); 2326 SMLoc StartLoc = Tok.getLoc(); 2327 SMLoc EndLoc = Tok.getEndLoc(); 2328 RegisterKind RegKind; 2329 unsigned Reg, RegNum, RegWidth; 2330 2331 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2332 //FIXME: improve error messages (bug 41303). 2333 Error(StartLoc, "not a valid operand."); 2334 return nullptr; 2335 } 2336 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2337 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2338 return nullptr; 2339 } else 2340 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2341 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2342 } 2343 2344 OperandMatchResultTy 2345 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2346 // TODO: add syntactic sugar for 1/(2*PI) 2347 2348 assert(!isRegister()); 2349 assert(!isModifier()); 2350 2351 const auto& Tok = getToken(); 2352 const auto& NextTok = peekToken(); 2353 bool IsReal = Tok.is(AsmToken::Real); 2354 SMLoc S = getLoc(); 2355 bool Negate = false; 2356 2357 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2358 lex(); 2359 IsReal = true; 2360 Negate = true; 2361 } 2362 2363 if (IsReal) { 2364 // Floating-point expressions are not supported. 2365 // Can only allow floating-point literals with an 2366 // optional sign. 2367 2368 StringRef Num = getTokenStr(); 2369 lex(); 2370 2371 APFloat RealVal(APFloat::IEEEdouble()); 2372 auto roundMode = APFloat::rmNearestTiesToEven; 2373 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2374 return MatchOperand_ParseFail; 2375 } 2376 if (Negate) 2377 RealVal.changeSign(); 2378 2379 Operands.push_back( 2380 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2381 AMDGPUOperand::ImmTyNone, true)); 2382 2383 return MatchOperand_Success; 2384 2385 } else { 2386 int64_t IntVal; 2387 const MCExpr *Expr; 2388 SMLoc S = getLoc(); 2389 2390 if (HasSP3AbsModifier) { 2391 // This is a workaround for handling expressions 2392 // as arguments of SP3 'abs' modifier, for example: 2393 // |1.0| 2394 // |-1| 2395 // |1+x| 2396 // This syntax is not compatible with syntax of standard 2397 // MC expressions (due to the trailing '|'). 2398 SMLoc EndLoc; 2399 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2400 return MatchOperand_ParseFail; 2401 } else { 2402 if (Parser.parseExpression(Expr)) 2403 return MatchOperand_ParseFail; 2404 } 2405 2406 if (Expr->evaluateAsAbsolute(IntVal)) { 2407 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2408 } else { 2409 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2410 } 2411 2412 return MatchOperand_Success; 2413 } 2414 2415 return MatchOperand_NoMatch; 2416 } 2417 2418 OperandMatchResultTy 2419 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2420 if (!isRegister()) 2421 return MatchOperand_NoMatch; 2422 2423 if (auto R = parseRegister()) { 2424 assert(R->isReg()); 2425 Operands.push_back(std::move(R)); 2426 return MatchOperand_Success; 2427 } 2428 return MatchOperand_ParseFail; 2429 } 2430 2431 OperandMatchResultTy 2432 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2433 auto res = parseReg(Operands); 2434 if (res != MatchOperand_NoMatch) { 2435 return res; 2436 } else if (isModifier()) { 2437 return MatchOperand_NoMatch; 2438 } else { 2439 return parseImm(Operands, HasSP3AbsMod); 2440 } 2441 } 2442 2443 bool 2444 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2445 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2446 const auto &str = Token.getString(); 2447 return str == "abs" || str == "neg" || str == "sext"; 2448 } 2449 return false; 2450 } 2451 2452 bool 2453 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2454 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2455 } 2456 2457 bool 2458 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2459 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2460 } 2461 2462 bool 2463 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2464 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2465 } 2466 2467 // Check if this is an operand modifier or an opcode modifier 2468 // which may look like an expression but it is not. We should 2469 // avoid parsing these modifiers as expressions. Currently 2470 // recognized sequences are: 2471 // |...| 2472 // abs(...) 2473 // neg(...) 2474 // sext(...) 2475 // -reg 2476 // -|...| 2477 // -abs(...) 2478 // name:... 2479 // Note that simple opcode modifiers like 'gds' may be parsed as 2480 // expressions; this is a special case. See getExpressionAsToken. 2481 // 2482 bool 2483 AMDGPUAsmParser::isModifier() { 2484 2485 AsmToken Tok = getToken(); 2486 AsmToken NextToken[2]; 2487 peekTokens(NextToken); 2488 2489 return isOperandModifier(Tok, NextToken[0]) || 2490 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2491 isOpcodeModifierWithVal(Tok, NextToken[0]); 2492 } 2493 2494 // Check if the current token is an SP3 'neg' modifier. 2495 // Currently this modifier is allowed in the following context: 2496 // 2497 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2498 // 2. Before an 'abs' modifier: -abs(...) 2499 // 3. Before an SP3 'abs' modifier: -|...| 2500 // 2501 // In all other cases "-" is handled as a part 2502 // of an expression that follows the sign. 2503 // 2504 // Note: When "-" is followed by an integer literal, 2505 // this is interpreted as integer negation rather 2506 // than a floating-point NEG modifier applied to N. 2507 // Beside being contr-intuitive, such use of floating-point 2508 // NEG modifier would have resulted in different meaning 2509 // of integer literals used with VOP1/2/C and VOP3, 2510 // for example: 2511 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2512 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2513 // Negative fp literals with preceding "-" are 2514 // handled likewise for unifomtity 2515 // 2516 bool 2517 AMDGPUAsmParser::parseSP3NegModifier() { 2518 2519 AsmToken NextToken[2]; 2520 peekTokens(NextToken); 2521 2522 if (isToken(AsmToken::Minus) && 2523 (isRegister(NextToken[0], NextToken[1]) || 2524 NextToken[0].is(AsmToken::Pipe) || 2525 isId(NextToken[0], "abs"))) { 2526 lex(); 2527 return true; 2528 } 2529 2530 return false; 2531 } 2532 2533 OperandMatchResultTy 2534 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2535 bool AllowImm) { 2536 bool Neg, SP3Neg; 2537 bool Abs, SP3Abs; 2538 SMLoc Loc; 2539 2540 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2541 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2542 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2543 return MatchOperand_ParseFail; 2544 } 2545 2546 SP3Neg = parseSP3NegModifier(); 2547 2548 Loc = getLoc(); 2549 Neg = trySkipId("neg"); 2550 if (Neg && SP3Neg) { 2551 Error(Loc, "expected register or immediate"); 2552 return MatchOperand_ParseFail; 2553 } 2554 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2555 return MatchOperand_ParseFail; 2556 2557 Abs = trySkipId("abs"); 2558 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2559 return MatchOperand_ParseFail; 2560 2561 Loc = getLoc(); 2562 SP3Abs = trySkipToken(AsmToken::Pipe); 2563 if (Abs && SP3Abs) { 2564 Error(Loc, "expected register or immediate"); 2565 return MatchOperand_ParseFail; 2566 } 2567 2568 OperandMatchResultTy Res; 2569 if (AllowImm) { 2570 Res = parseRegOrImm(Operands, SP3Abs); 2571 } else { 2572 Res = parseReg(Operands); 2573 } 2574 if (Res != MatchOperand_Success) { 2575 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2576 } 2577 2578 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2579 return MatchOperand_ParseFail; 2580 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2581 return MatchOperand_ParseFail; 2582 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2583 return MatchOperand_ParseFail; 2584 2585 AMDGPUOperand::Modifiers Mods; 2586 Mods.Abs = Abs || SP3Abs; 2587 Mods.Neg = Neg || SP3Neg; 2588 2589 if (Mods.hasFPModifiers()) { 2590 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2591 if (Op.isExpr()) { 2592 Error(Op.getStartLoc(), "expected an absolute expression"); 2593 return MatchOperand_ParseFail; 2594 } 2595 Op.setModifiers(Mods); 2596 } 2597 return MatchOperand_Success; 2598 } 2599 2600 OperandMatchResultTy 2601 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2602 bool AllowImm) { 2603 bool Sext = trySkipId("sext"); 2604 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2605 return MatchOperand_ParseFail; 2606 2607 OperandMatchResultTy Res; 2608 if (AllowImm) { 2609 Res = parseRegOrImm(Operands); 2610 } else { 2611 Res = parseReg(Operands); 2612 } 2613 if (Res != MatchOperand_Success) { 2614 return Sext? MatchOperand_ParseFail : Res; 2615 } 2616 2617 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2618 return MatchOperand_ParseFail; 2619 2620 AMDGPUOperand::Modifiers Mods; 2621 Mods.Sext = Sext; 2622 2623 if (Mods.hasIntModifiers()) { 2624 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2625 if (Op.isExpr()) { 2626 Error(Op.getStartLoc(), "expected an absolute expression"); 2627 return MatchOperand_ParseFail; 2628 } 2629 Op.setModifiers(Mods); 2630 } 2631 2632 return MatchOperand_Success; 2633 } 2634 2635 OperandMatchResultTy 2636 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2637 return parseRegOrImmWithFPInputMods(Operands, false); 2638 } 2639 2640 OperandMatchResultTy 2641 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2642 return parseRegOrImmWithIntInputMods(Operands, false); 2643 } 2644 2645 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2646 auto Loc = getLoc(); 2647 if (trySkipId("off")) { 2648 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2649 AMDGPUOperand::ImmTyOff, false)); 2650 return MatchOperand_Success; 2651 } 2652 2653 if (!isRegister()) 2654 return MatchOperand_NoMatch; 2655 2656 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2657 if (Reg) { 2658 Operands.push_back(std::move(Reg)); 2659 return MatchOperand_Success; 2660 } 2661 2662 return MatchOperand_ParseFail; 2663 2664 } 2665 2666 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2667 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2668 2669 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2670 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2671 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2672 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2673 return Match_InvalidOperand; 2674 2675 if ((TSFlags & SIInstrFlags::VOP3) && 2676 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2677 getForcedEncodingSize() != 64) 2678 return Match_PreferE32; 2679 2680 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2681 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2682 // v_mac_f32/16 allow only dst_sel == DWORD; 2683 auto OpNum = 2684 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2685 const auto &Op = Inst.getOperand(OpNum); 2686 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2687 return Match_InvalidOperand; 2688 } 2689 } 2690 2691 return Match_Success; 2692 } 2693 2694 // What asm variants we should check 2695 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2696 if (getForcedEncodingSize() == 32) { 2697 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2698 return makeArrayRef(Variants); 2699 } 2700 2701 if (isForcedVOP3()) { 2702 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2703 return makeArrayRef(Variants); 2704 } 2705 2706 if (isForcedSDWA()) { 2707 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2708 AMDGPUAsmVariants::SDWA9}; 2709 return makeArrayRef(Variants); 2710 } 2711 2712 if (isForcedDPP()) { 2713 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2714 return makeArrayRef(Variants); 2715 } 2716 2717 static const unsigned Variants[] = { 2718 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2719 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2720 }; 2721 2722 return makeArrayRef(Variants); 2723 } 2724 2725 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2726 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2727 const unsigned Num = Desc.getNumImplicitUses(); 2728 for (unsigned i = 0; i < Num; ++i) { 2729 unsigned Reg = Desc.ImplicitUses[i]; 2730 switch (Reg) { 2731 case AMDGPU::FLAT_SCR: 2732 case AMDGPU::VCC: 2733 case AMDGPU::VCC_LO: 2734 case AMDGPU::VCC_HI: 2735 case AMDGPU::M0: 2736 return Reg; 2737 default: 2738 break; 2739 } 2740 } 2741 return AMDGPU::NoRegister; 2742 } 2743 2744 // NB: This code is correct only when used to check constant 2745 // bus limitations because GFX7 support no f16 inline constants. 2746 // Note that there are no cases when a GFX7 opcode violates 2747 // constant bus limitations due to the use of an f16 constant. 2748 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2749 unsigned OpIdx) const { 2750 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2751 2752 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2753 return false; 2754 } 2755 2756 const MCOperand &MO = Inst.getOperand(OpIdx); 2757 2758 int64_t Val = MO.getImm(); 2759 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2760 2761 switch (OpSize) { // expected operand size 2762 case 8: 2763 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2764 case 4: 2765 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2766 case 2: { 2767 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2768 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2769 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2770 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2771 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2772 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2773 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2774 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2775 } else { 2776 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2777 } 2778 } 2779 default: 2780 llvm_unreachable("invalid operand size"); 2781 } 2782 } 2783 2784 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2785 if (!isGFX10()) 2786 return 1; 2787 2788 switch (Opcode) { 2789 // 64-bit shift instructions can use only one scalar value input 2790 case AMDGPU::V_LSHLREV_B64: 2791 case AMDGPU::V_LSHLREV_B64_gfx10: 2792 case AMDGPU::V_LSHL_B64: 2793 case AMDGPU::V_LSHRREV_B64: 2794 case AMDGPU::V_LSHRREV_B64_gfx10: 2795 case AMDGPU::V_LSHR_B64: 2796 case AMDGPU::V_ASHRREV_I64: 2797 case AMDGPU::V_ASHRREV_I64_gfx10: 2798 case AMDGPU::V_ASHR_I64: 2799 return 1; 2800 default: 2801 return 2; 2802 } 2803 } 2804 2805 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2806 const MCOperand &MO = Inst.getOperand(OpIdx); 2807 if (MO.isImm()) { 2808 return !isInlineConstant(Inst, OpIdx); 2809 } else if (MO.isReg()) { 2810 auto Reg = MO.getReg(); 2811 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2812 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2813 } else { 2814 return true; 2815 } 2816 } 2817 2818 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2819 const unsigned Opcode = Inst.getOpcode(); 2820 const MCInstrDesc &Desc = MII.get(Opcode); 2821 unsigned ConstantBusUseCount = 0; 2822 unsigned NumLiterals = 0; 2823 unsigned LiteralSize; 2824 2825 if (Desc.TSFlags & 2826 (SIInstrFlags::VOPC | 2827 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2828 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2829 SIInstrFlags::SDWA)) { 2830 // Check special imm operands (used by madmk, etc) 2831 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2832 ++ConstantBusUseCount; 2833 } 2834 2835 SmallDenseSet<unsigned> SGPRsUsed; 2836 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2837 if (SGPRUsed != AMDGPU::NoRegister) { 2838 SGPRsUsed.insert(SGPRUsed); 2839 ++ConstantBusUseCount; 2840 } 2841 2842 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2843 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2844 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2845 2846 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2847 2848 for (int OpIdx : OpIndices) { 2849 if (OpIdx == -1) break; 2850 2851 const MCOperand &MO = Inst.getOperand(OpIdx); 2852 if (usesConstantBus(Inst, OpIdx)) { 2853 if (MO.isReg()) { 2854 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2855 // Pairs of registers with a partial intersections like these 2856 // s0, s[0:1] 2857 // flat_scratch_lo, flat_scratch 2858 // flat_scratch_lo, flat_scratch_hi 2859 // are theoretically valid but they are disabled anyway. 2860 // Note that this code mimics SIInstrInfo::verifyInstruction 2861 if (!SGPRsUsed.count(Reg)) { 2862 SGPRsUsed.insert(Reg); 2863 ++ConstantBusUseCount; 2864 } 2865 } else { // Expression or a literal 2866 2867 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2868 continue; // special operand like VINTERP attr_chan 2869 2870 // An instruction may use only one literal. 2871 // This has been validated on the previous step. 2872 // See validateVOP3Literal. 2873 // This literal may be used as more than one operand. 2874 // If all these operands are of the same size, 2875 // this literal counts as one scalar value. 2876 // Otherwise it counts as 2 scalar values. 2877 // See "GFX10 Shader Programming", section 3.6.2.3. 2878 2879 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2880 if (Size < 4) Size = 4; 2881 2882 if (NumLiterals == 0) { 2883 NumLiterals = 1; 2884 LiteralSize = Size; 2885 } else if (LiteralSize != Size) { 2886 NumLiterals = 2; 2887 } 2888 } 2889 } 2890 } 2891 } 2892 ConstantBusUseCount += NumLiterals; 2893 2894 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2895 } 2896 2897 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2898 const unsigned Opcode = Inst.getOpcode(); 2899 const MCInstrDesc &Desc = MII.get(Opcode); 2900 2901 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2902 if (DstIdx == -1 || 2903 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2904 return true; 2905 } 2906 2907 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2908 2909 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2910 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2911 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2912 2913 assert(DstIdx != -1); 2914 const MCOperand &Dst = Inst.getOperand(DstIdx); 2915 assert(Dst.isReg()); 2916 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2917 2918 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2919 2920 for (int SrcIdx : SrcIndices) { 2921 if (SrcIdx == -1) break; 2922 const MCOperand &Src = Inst.getOperand(SrcIdx); 2923 if (Src.isReg()) { 2924 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2925 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2926 return false; 2927 } 2928 } 2929 } 2930 2931 return true; 2932 } 2933 2934 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2935 2936 const unsigned Opc = Inst.getOpcode(); 2937 const MCInstrDesc &Desc = MII.get(Opc); 2938 2939 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2940 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2941 assert(ClampIdx != -1); 2942 return Inst.getOperand(ClampIdx).getImm() == 0; 2943 } 2944 2945 return true; 2946 } 2947 2948 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2949 2950 const unsigned Opc = Inst.getOpcode(); 2951 const MCInstrDesc &Desc = MII.get(Opc); 2952 2953 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2954 return true; 2955 2956 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2957 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2958 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2959 2960 assert(VDataIdx != -1); 2961 assert(DMaskIdx != -1); 2962 assert(TFEIdx != -1); 2963 2964 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2965 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2966 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2967 if (DMask == 0) 2968 DMask = 1; 2969 2970 unsigned DataSize = 2971 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2972 if (hasPackedD16()) { 2973 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2974 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2975 DataSize = (DataSize + 1) / 2; 2976 } 2977 2978 return (VDataSize / 4) == DataSize + TFESize; 2979 } 2980 2981 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2982 const unsigned Opc = Inst.getOpcode(); 2983 const MCInstrDesc &Desc = MII.get(Opc); 2984 2985 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2986 return true; 2987 2988 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2989 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2990 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2991 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2992 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2993 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2994 2995 assert(VAddr0Idx != -1); 2996 assert(SrsrcIdx != -1); 2997 assert(DimIdx != -1); 2998 assert(SrsrcIdx > VAddr0Idx); 2999 3000 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3001 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3002 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3003 unsigned VAddrSize = 3004 IsNSA ? SrsrcIdx - VAddr0Idx 3005 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3006 3007 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3008 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3009 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3010 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3011 if (!IsNSA) { 3012 if (AddrSize > 8) 3013 AddrSize = 16; 3014 else if (AddrSize > 4) 3015 AddrSize = 8; 3016 } 3017 3018 return VAddrSize == AddrSize; 3019 } 3020 3021 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3022 3023 const unsigned Opc = Inst.getOpcode(); 3024 const MCInstrDesc &Desc = MII.get(Opc); 3025 3026 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3027 return true; 3028 if (!Desc.mayLoad() || !Desc.mayStore()) 3029 return true; // Not atomic 3030 3031 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3032 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3033 3034 // This is an incomplete check because image_atomic_cmpswap 3035 // may only use 0x3 and 0xf while other atomic operations 3036 // may use 0x1 and 0x3. However these limitations are 3037 // verified when we check that dmask matches dst size. 3038 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3039 } 3040 3041 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3042 3043 const unsigned Opc = Inst.getOpcode(); 3044 const MCInstrDesc &Desc = MII.get(Opc); 3045 3046 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3047 return true; 3048 3049 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3050 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3051 3052 // GATHER4 instructions use dmask in a different fashion compared to 3053 // other MIMG instructions. The only useful DMASK values are 3054 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3055 // (red,red,red,red) etc.) The ISA document doesn't mention 3056 // this. 3057 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3058 } 3059 3060 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3061 { 3062 switch (Opcode) { 3063 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3064 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3065 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3066 return true; 3067 default: 3068 return false; 3069 } 3070 } 3071 3072 // movrels* opcodes should only allow VGPRS as src0. 3073 // This is specified in .td description for vop1/vop3, 3074 // but sdwa is handled differently. See isSDWAOperand. 3075 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3076 3077 const unsigned Opc = Inst.getOpcode(); 3078 const MCInstrDesc &Desc = MII.get(Opc); 3079 3080 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3081 return true; 3082 3083 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3084 assert(Src0Idx != -1); 3085 3086 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3087 if (!Src0.isReg()) 3088 return false; 3089 3090 auto Reg = Src0.getReg(); 3091 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3092 return !isSGPR(mc2PseudoReg(Reg), TRI); 3093 } 3094 3095 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3096 3097 const unsigned Opc = Inst.getOpcode(); 3098 const MCInstrDesc &Desc = MII.get(Opc); 3099 3100 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3101 return true; 3102 3103 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3104 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3105 if (isCI() || isSI()) 3106 return false; 3107 } 3108 3109 return true; 3110 } 3111 3112 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3113 const unsigned Opc = Inst.getOpcode(); 3114 const MCInstrDesc &Desc = MII.get(Opc); 3115 3116 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3117 return true; 3118 3119 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3120 if (DimIdx < 0) 3121 return true; 3122 3123 long Imm = Inst.getOperand(DimIdx).getImm(); 3124 if (Imm < 0 || Imm >= 8) 3125 return false; 3126 3127 return true; 3128 } 3129 3130 static bool IsRevOpcode(const unsigned Opcode) 3131 { 3132 switch (Opcode) { 3133 case AMDGPU::V_SUBREV_F32_e32: 3134 case AMDGPU::V_SUBREV_F32_e64: 3135 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3136 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3137 case AMDGPU::V_SUBREV_F32_e32_vi: 3138 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3139 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3140 case AMDGPU::V_SUBREV_F32_e64_vi: 3141 3142 case AMDGPU::V_SUBREV_I32_e32: 3143 case AMDGPU::V_SUBREV_I32_e64: 3144 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3145 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3146 3147 case AMDGPU::V_SUBBREV_U32_e32: 3148 case AMDGPU::V_SUBBREV_U32_e64: 3149 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3150 case AMDGPU::V_SUBBREV_U32_e32_vi: 3151 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3152 case AMDGPU::V_SUBBREV_U32_e64_vi: 3153 3154 case AMDGPU::V_SUBREV_U32_e32: 3155 case AMDGPU::V_SUBREV_U32_e64: 3156 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3157 case AMDGPU::V_SUBREV_U32_e32_vi: 3158 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3159 case AMDGPU::V_SUBREV_U32_e64_vi: 3160 3161 case AMDGPU::V_SUBREV_F16_e32: 3162 case AMDGPU::V_SUBREV_F16_e64: 3163 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3164 case AMDGPU::V_SUBREV_F16_e32_vi: 3165 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3166 case AMDGPU::V_SUBREV_F16_e64_vi: 3167 3168 case AMDGPU::V_SUBREV_U16_e32: 3169 case AMDGPU::V_SUBREV_U16_e64: 3170 case AMDGPU::V_SUBREV_U16_e32_vi: 3171 case AMDGPU::V_SUBREV_U16_e64_vi: 3172 3173 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3174 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3175 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3176 3177 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3178 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3179 3180 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3181 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3182 3183 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3184 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3185 3186 case AMDGPU::V_LSHRREV_B32_e32: 3187 case AMDGPU::V_LSHRREV_B32_e64: 3188 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3189 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3190 case AMDGPU::V_LSHRREV_B32_e32_vi: 3191 case AMDGPU::V_LSHRREV_B32_e64_vi: 3192 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3193 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3194 3195 case AMDGPU::V_ASHRREV_I32_e32: 3196 case AMDGPU::V_ASHRREV_I32_e64: 3197 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3198 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3199 case AMDGPU::V_ASHRREV_I32_e32_vi: 3200 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3201 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3202 case AMDGPU::V_ASHRREV_I32_e64_vi: 3203 3204 case AMDGPU::V_LSHLREV_B32_e32: 3205 case AMDGPU::V_LSHLREV_B32_e64: 3206 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3207 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3208 case AMDGPU::V_LSHLREV_B32_e32_vi: 3209 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3210 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3211 case AMDGPU::V_LSHLREV_B32_e64_vi: 3212 3213 case AMDGPU::V_LSHLREV_B16_e32: 3214 case AMDGPU::V_LSHLREV_B16_e64: 3215 case AMDGPU::V_LSHLREV_B16_e32_vi: 3216 case AMDGPU::V_LSHLREV_B16_e64_vi: 3217 case AMDGPU::V_LSHLREV_B16_gfx10: 3218 3219 case AMDGPU::V_LSHRREV_B16_e32: 3220 case AMDGPU::V_LSHRREV_B16_e64: 3221 case AMDGPU::V_LSHRREV_B16_e32_vi: 3222 case AMDGPU::V_LSHRREV_B16_e64_vi: 3223 case AMDGPU::V_LSHRREV_B16_gfx10: 3224 3225 case AMDGPU::V_ASHRREV_I16_e32: 3226 case AMDGPU::V_ASHRREV_I16_e64: 3227 case AMDGPU::V_ASHRREV_I16_e32_vi: 3228 case AMDGPU::V_ASHRREV_I16_e64_vi: 3229 case AMDGPU::V_ASHRREV_I16_gfx10: 3230 3231 case AMDGPU::V_LSHLREV_B64: 3232 case AMDGPU::V_LSHLREV_B64_gfx10: 3233 case AMDGPU::V_LSHLREV_B64_vi: 3234 3235 case AMDGPU::V_LSHRREV_B64: 3236 case AMDGPU::V_LSHRREV_B64_gfx10: 3237 case AMDGPU::V_LSHRREV_B64_vi: 3238 3239 case AMDGPU::V_ASHRREV_I64: 3240 case AMDGPU::V_ASHRREV_I64_gfx10: 3241 case AMDGPU::V_ASHRREV_I64_vi: 3242 3243 case AMDGPU::V_PK_LSHLREV_B16: 3244 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3245 case AMDGPU::V_PK_LSHLREV_B16_vi: 3246 3247 case AMDGPU::V_PK_LSHRREV_B16: 3248 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3249 case AMDGPU::V_PK_LSHRREV_B16_vi: 3250 case AMDGPU::V_PK_ASHRREV_I16: 3251 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3252 case AMDGPU::V_PK_ASHRREV_I16_vi: 3253 return true; 3254 default: 3255 return false; 3256 } 3257 } 3258 3259 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3260 3261 using namespace SIInstrFlags; 3262 const unsigned Opcode = Inst.getOpcode(); 3263 const MCInstrDesc &Desc = MII.get(Opcode); 3264 3265 // lds_direct register is defined so that it can be used 3266 // with 9-bit operands only. Ignore encodings which do not accept these. 3267 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3268 return true; 3269 3270 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3271 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3272 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3273 3274 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3275 3276 // lds_direct cannot be specified as either src1 or src2. 3277 for (int SrcIdx : SrcIndices) { 3278 if (SrcIdx == -1) break; 3279 const MCOperand &Src = Inst.getOperand(SrcIdx); 3280 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3281 return false; 3282 } 3283 } 3284 3285 if (Src0Idx == -1) 3286 return true; 3287 3288 const MCOperand &Src = Inst.getOperand(Src0Idx); 3289 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3290 return true; 3291 3292 // lds_direct is specified as src0. Check additional limitations. 3293 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3294 } 3295 3296 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3297 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3298 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3299 if (Op.isFlatOffset()) 3300 return Op.getStartLoc(); 3301 } 3302 return getLoc(); 3303 } 3304 3305 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3306 const OperandVector &Operands) { 3307 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3308 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3309 return true; 3310 3311 auto Opcode = Inst.getOpcode(); 3312 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3313 assert(OpNum != -1); 3314 3315 const auto &Op = Inst.getOperand(OpNum); 3316 if (!hasFlatOffsets() && Op.getImm() != 0) { 3317 Error(getFlatOffsetLoc(Operands), 3318 "flat offset modifier is not supported on this GPU"); 3319 return false; 3320 } 3321 3322 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3323 // For FLAT segment the offset must be positive; 3324 // MSB is ignored and forced to zero. 3325 unsigned OffsetSize = isGFX9() ? 13 : 12; 3326 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3327 if (!isIntN(OffsetSize, Op.getImm())) { 3328 Error(getFlatOffsetLoc(Operands), 3329 isGFX9() ? "expected a 13-bit signed offset" : 3330 "expected a 12-bit signed offset"); 3331 return false; 3332 } 3333 } else { 3334 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3335 Error(getFlatOffsetLoc(Operands), 3336 isGFX9() ? "expected a 12-bit unsigned offset" : 3337 "expected an 11-bit unsigned offset"); 3338 return false; 3339 } 3340 } 3341 3342 return true; 3343 } 3344 3345 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3346 unsigned Opcode = Inst.getOpcode(); 3347 const MCInstrDesc &Desc = MII.get(Opcode); 3348 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3349 return true; 3350 3351 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3352 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3353 3354 const int OpIndices[] = { Src0Idx, Src1Idx }; 3355 3356 unsigned NumExprs = 0; 3357 unsigned NumLiterals = 0; 3358 uint32_t LiteralValue; 3359 3360 for (int OpIdx : OpIndices) { 3361 if (OpIdx == -1) break; 3362 3363 const MCOperand &MO = Inst.getOperand(OpIdx); 3364 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3365 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3366 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3367 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3368 if (NumLiterals == 0 || LiteralValue != Value) { 3369 LiteralValue = Value; 3370 ++NumLiterals; 3371 } 3372 } else if (MO.isExpr()) { 3373 ++NumExprs; 3374 } 3375 } 3376 } 3377 3378 return NumLiterals + NumExprs <= 1; 3379 } 3380 3381 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3382 const unsigned Opc = Inst.getOpcode(); 3383 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3384 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3385 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3386 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3387 3388 if (OpSel & ~3) 3389 return false; 3390 } 3391 return true; 3392 } 3393 3394 // Check if VCC register matches wavefront size 3395 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3396 auto FB = getFeatureBits(); 3397 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3398 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3399 } 3400 3401 // VOP3 literal is only allowed in GFX10+ and only one can be used 3402 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3403 unsigned Opcode = Inst.getOpcode(); 3404 const MCInstrDesc &Desc = MII.get(Opcode); 3405 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3406 return true; 3407 3408 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3409 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3410 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3411 3412 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3413 3414 unsigned NumExprs = 0; 3415 unsigned NumLiterals = 0; 3416 uint32_t LiteralValue; 3417 3418 for (int OpIdx : OpIndices) { 3419 if (OpIdx == -1) break; 3420 3421 const MCOperand &MO = Inst.getOperand(OpIdx); 3422 if (!MO.isImm() && !MO.isExpr()) 3423 continue; 3424 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3425 continue; 3426 3427 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3428 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3429 return false; 3430 3431 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3432 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3433 if (NumLiterals == 0 || LiteralValue != Value) { 3434 LiteralValue = Value; 3435 ++NumLiterals; 3436 } 3437 } else if (MO.isExpr()) { 3438 ++NumExprs; 3439 } 3440 } 3441 NumLiterals += NumExprs; 3442 3443 return !NumLiterals || 3444 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3445 } 3446 3447 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3448 const SMLoc &IDLoc, 3449 const OperandVector &Operands) { 3450 if (!validateLdsDirect(Inst)) { 3451 Error(IDLoc, 3452 "invalid use of lds_direct"); 3453 return false; 3454 } 3455 if (!validateSOPLiteral(Inst)) { 3456 Error(IDLoc, 3457 "only one literal operand is allowed"); 3458 return false; 3459 } 3460 if (!validateVOP3Literal(Inst)) { 3461 Error(IDLoc, 3462 "invalid literal operand"); 3463 return false; 3464 } 3465 if (!validateConstantBusLimitations(Inst)) { 3466 Error(IDLoc, 3467 "invalid operand (violates constant bus restrictions)"); 3468 return false; 3469 } 3470 if (!validateEarlyClobberLimitations(Inst)) { 3471 Error(IDLoc, 3472 "destination must be different than all sources"); 3473 return false; 3474 } 3475 if (!validateIntClampSupported(Inst)) { 3476 Error(IDLoc, 3477 "integer clamping is not supported on this GPU"); 3478 return false; 3479 } 3480 if (!validateOpSel(Inst)) { 3481 Error(IDLoc, 3482 "invalid op_sel operand"); 3483 return false; 3484 } 3485 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3486 if (!validateMIMGD16(Inst)) { 3487 Error(IDLoc, 3488 "d16 modifier is not supported on this GPU"); 3489 return false; 3490 } 3491 if (!validateMIMGDim(Inst)) { 3492 Error(IDLoc, "dim modifier is required on this GPU"); 3493 return false; 3494 } 3495 if (!validateMIMGDataSize(Inst)) { 3496 Error(IDLoc, 3497 "image data size does not match dmask and tfe"); 3498 return false; 3499 } 3500 if (!validateMIMGAddrSize(Inst)) { 3501 Error(IDLoc, 3502 "image address size does not match dim and a16"); 3503 return false; 3504 } 3505 if (!validateMIMGAtomicDMask(Inst)) { 3506 Error(IDLoc, 3507 "invalid atomic image dmask"); 3508 return false; 3509 } 3510 if (!validateMIMGGatherDMask(Inst)) { 3511 Error(IDLoc, 3512 "invalid image_gather dmask: only one bit must be set"); 3513 return false; 3514 } 3515 if (!validateMovrels(Inst)) { 3516 Error(IDLoc, "source operand must be a VGPR"); 3517 return false; 3518 } 3519 if (!validateFlatOffset(Inst, Operands)) { 3520 return false; 3521 } 3522 3523 return true; 3524 } 3525 3526 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3527 const FeatureBitset &FBS, 3528 unsigned VariantID = 0); 3529 3530 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3531 OperandVector &Operands, 3532 MCStreamer &Out, 3533 uint64_t &ErrorInfo, 3534 bool MatchingInlineAsm) { 3535 MCInst Inst; 3536 unsigned Result = Match_Success; 3537 for (auto Variant : getMatchedVariants()) { 3538 uint64_t EI; 3539 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3540 Variant); 3541 // We order match statuses from least to most specific. We use most specific 3542 // status as resulting 3543 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3544 if ((R == Match_Success) || 3545 (R == Match_PreferE32) || 3546 (R == Match_MissingFeature && Result != Match_PreferE32) || 3547 (R == Match_InvalidOperand && Result != Match_MissingFeature 3548 && Result != Match_PreferE32) || 3549 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3550 && Result != Match_MissingFeature 3551 && Result != Match_PreferE32)) { 3552 Result = R; 3553 ErrorInfo = EI; 3554 } 3555 if (R == Match_Success) 3556 break; 3557 } 3558 3559 switch (Result) { 3560 default: break; 3561 case Match_Success: 3562 if (!validateInstruction(Inst, IDLoc, Operands)) { 3563 return true; 3564 } 3565 Inst.setLoc(IDLoc); 3566 Out.EmitInstruction(Inst, getSTI()); 3567 return false; 3568 3569 case Match_MissingFeature: 3570 return Error(IDLoc, "instruction not supported on this GPU"); 3571 3572 case Match_MnemonicFail: { 3573 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3574 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3575 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3576 return Error(IDLoc, "invalid instruction" + Suggestion, 3577 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3578 } 3579 3580 case Match_InvalidOperand: { 3581 SMLoc ErrorLoc = IDLoc; 3582 if (ErrorInfo != ~0ULL) { 3583 if (ErrorInfo >= Operands.size()) { 3584 return Error(IDLoc, "too few operands for instruction"); 3585 } 3586 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3587 if (ErrorLoc == SMLoc()) 3588 ErrorLoc = IDLoc; 3589 } 3590 return Error(ErrorLoc, "invalid operand for instruction"); 3591 } 3592 3593 case Match_PreferE32: 3594 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3595 "should be encoded as e32"); 3596 } 3597 llvm_unreachable("Implement any new match types added!"); 3598 } 3599 3600 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3601 int64_t Tmp = -1; 3602 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3603 return true; 3604 } 3605 if (getParser().parseAbsoluteExpression(Tmp)) { 3606 return true; 3607 } 3608 Ret = static_cast<uint32_t>(Tmp); 3609 return false; 3610 } 3611 3612 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3613 uint32_t &Minor) { 3614 if (ParseAsAbsoluteExpression(Major)) 3615 return TokError("invalid major version"); 3616 3617 if (getLexer().isNot(AsmToken::Comma)) 3618 return TokError("minor version number required, comma expected"); 3619 Lex(); 3620 3621 if (ParseAsAbsoluteExpression(Minor)) 3622 return TokError("invalid minor version"); 3623 3624 return false; 3625 } 3626 3627 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3628 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3629 return TokError("directive only supported for amdgcn architecture"); 3630 3631 std::string Target; 3632 3633 SMLoc TargetStart = getTok().getLoc(); 3634 if (getParser().parseEscapedString(Target)) 3635 return true; 3636 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3637 3638 std::string ExpectedTarget; 3639 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3640 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3641 3642 if (Target != ExpectedTargetOS.str()) 3643 return getParser().Error(TargetRange.Start, "target must match options", 3644 TargetRange); 3645 3646 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3647 return false; 3648 } 3649 3650 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3651 return getParser().Error(Range.Start, "value out of range", Range); 3652 } 3653 3654 bool AMDGPUAsmParser::calculateGPRBlocks( 3655 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3656 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3657 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3658 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3659 // TODO(scott.linder): These calculations are duplicated from 3660 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3661 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3662 3663 unsigned NumVGPRs = NextFreeVGPR; 3664 unsigned NumSGPRs = NextFreeSGPR; 3665 3666 if (Version.Major >= 10) 3667 NumSGPRs = 0; 3668 else { 3669 unsigned MaxAddressableNumSGPRs = 3670 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3671 3672 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3673 NumSGPRs > MaxAddressableNumSGPRs) 3674 return OutOfRangeError(SGPRRange); 3675 3676 NumSGPRs += 3677 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3678 3679 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3680 NumSGPRs > MaxAddressableNumSGPRs) 3681 return OutOfRangeError(SGPRRange); 3682 3683 if (Features.test(FeatureSGPRInitBug)) 3684 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3685 } 3686 3687 VGPRBlocks = 3688 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3689 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3690 3691 return false; 3692 } 3693 3694 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3695 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3696 return TokError("directive only supported for amdgcn architecture"); 3697 3698 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3699 return TokError("directive only supported for amdhsa OS"); 3700 3701 StringRef KernelName; 3702 if (getParser().parseIdentifier(KernelName)) 3703 return true; 3704 3705 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3706 3707 StringSet<> Seen; 3708 3709 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3710 3711 SMRange VGPRRange; 3712 uint64_t NextFreeVGPR = 0; 3713 SMRange SGPRRange; 3714 uint64_t NextFreeSGPR = 0; 3715 unsigned UserSGPRCount = 0; 3716 bool ReserveVCC = true; 3717 bool ReserveFlatScr = true; 3718 bool ReserveXNACK = hasXNACK(); 3719 Optional<bool> EnableWavefrontSize32; 3720 3721 while (true) { 3722 while (getLexer().is(AsmToken::EndOfStatement)) 3723 Lex(); 3724 3725 if (getLexer().isNot(AsmToken::Identifier)) 3726 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3727 3728 StringRef ID = getTok().getIdentifier(); 3729 SMRange IDRange = getTok().getLocRange(); 3730 Lex(); 3731 3732 if (ID == ".end_amdhsa_kernel") 3733 break; 3734 3735 if (Seen.find(ID) != Seen.end()) 3736 return TokError(".amdhsa_ directives cannot be repeated"); 3737 Seen.insert(ID); 3738 3739 SMLoc ValStart = getTok().getLoc(); 3740 int64_t IVal; 3741 if (getParser().parseAbsoluteExpression(IVal)) 3742 return true; 3743 SMLoc ValEnd = getTok().getLoc(); 3744 SMRange ValRange = SMRange(ValStart, ValEnd); 3745 3746 if (IVal < 0) 3747 return OutOfRangeError(ValRange); 3748 3749 uint64_t Val = IVal; 3750 3751 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3752 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3753 return OutOfRangeError(RANGE); \ 3754 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3755 3756 if (ID == ".amdhsa_group_segment_fixed_size") { 3757 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3758 return OutOfRangeError(ValRange); 3759 KD.group_segment_fixed_size = Val; 3760 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3761 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3762 return OutOfRangeError(ValRange); 3763 KD.private_segment_fixed_size = Val; 3764 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3765 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3766 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3767 Val, ValRange); 3768 if (Val) 3769 UserSGPRCount += 4; 3770 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3771 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3772 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3773 ValRange); 3774 if (Val) 3775 UserSGPRCount += 2; 3776 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3777 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3778 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3779 ValRange); 3780 if (Val) 3781 UserSGPRCount += 2; 3782 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3783 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3784 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3785 Val, ValRange); 3786 if (Val) 3787 UserSGPRCount += 2; 3788 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3789 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3790 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3791 ValRange); 3792 if (Val) 3793 UserSGPRCount += 2; 3794 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3795 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3796 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3797 ValRange); 3798 if (Val) 3799 UserSGPRCount += 2; 3800 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3801 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3802 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3803 Val, ValRange); 3804 if (Val) 3805 UserSGPRCount += 1; 3806 } else if (ID == ".amdhsa_wavefront_size32") { 3807 if (IVersion.Major < 10) 3808 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3809 IDRange); 3810 EnableWavefrontSize32 = Val; 3811 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3812 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3813 Val, ValRange); 3814 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3815 PARSE_BITS_ENTRY( 3816 KD.compute_pgm_rsrc2, 3817 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3818 ValRange); 3819 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3821 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3822 ValRange); 3823 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3824 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3825 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3826 ValRange); 3827 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3828 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3829 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3830 ValRange); 3831 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3832 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3833 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3834 ValRange); 3835 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3837 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3838 ValRange); 3839 } else if (ID == ".amdhsa_next_free_vgpr") { 3840 VGPRRange = ValRange; 3841 NextFreeVGPR = Val; 3842 } else if (ID == ".amdhsa_next_free_sgpr") { 3843 SGPRRange = ValRange; 3844 NextFreeSGPR = Val; 3845 } else if (ID == ".amdhsa_reserve_vcc") { 3846 if (!isUInt<1>(Val)) 3847 return OutOfRangeError(ValRange); 3848 ReserveVCC = Val; 3849 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3850 if (IVersion.Major < 7) 3851 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3852 IDRange); 3853 if (!isUInt<1>(Val)) 3854 return OutOfRangeError(ValRange); 3855 ReserveFlatScr = Val; 3856 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3857 if (IVersion.Major < 8) 3858 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3859 IDRange); 3860 if (!isUInt<1>(Val)) 3861 return OutOfRangeError(ValRange); 3862 ReserveXNACK = Val; 3863 } else if (ID == ".amdhsa_float_round_mode_32") { 3864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3865 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3866 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3868 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3869 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3870 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3871 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3872 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3874 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3875 ValRange); 3876 } else if (ID == ".amdhsa_dx10_clamp") { 3877 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3878 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3879 } else if (ID == ".amdhsa_ieee_mode") { 3880 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3881 Val, ValRange); 3882 } else if (ID == ".amdhsa_fp16_overflow") { 3883 if (IVersion.Major < 9) 3884 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3885 IDRange); 3886 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3887 ValRange); 3888 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3889 if (IVersion.Major < 10) 3890 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3891 IDRange); 3892 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3893 ValRange); 3894 } else if (ID == ".amdhsa_memory_ordered") { 3895 if (IVersion.Major < 10) 3896 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3897 IDRange); 3898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3899 ValRange); 3900 } else if (ID == ".amdhsa_forward_progress") { 3901 if (IVersion.Major < 10) 3902 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3903 IDRange); 3904 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3905 ValRange); 3906 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3907 PARSE_BITS_ENTRY( 3908 KD.compute_pgm_rsrc2, 3909 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3910 ValRange); 3911 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3912 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3913 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3914 Val, ValRange); 3915 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3916 PARSE_BITS_ENTRY( 3917 KD.compute_pgm_rsrc2, 3918 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3919 ValRange); 3920 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3921 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3922 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3923 Val, ValRange); 3924 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3925 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3926 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3927 Val, ValRange); 3928 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3929 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3930 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3931 Val, ValRange); 3932 } else if (ID == ".amdhsa_exception_int_div_zero") { 3933 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3934 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3935 Val, ValRange); 3936 } else { 3937 return getParser().Error(IDRange.Start, 3938 "unknown .amdhsa_kernel directive", IDRange); 3939 } 3940 3941 #undef PARSE_BITS_ENTRY 3942 } 3943 3944 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3945 return TokError(".amdhsa_next_free_vgpr directive is required"); 3946 3947 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3948 return TokError(".amdhsa_next_free_sgpr directive is required"); 3949 3950 unsigned VGPRBlocks; 3951 unsigned SGPRBlocks; 3952 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3953 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3954 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3955 SGPRBlocks)) 3956 return true; 3957 3958 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3959 VGPRBlocks)) 3960 return OutOfRangeError(VGPRRange); 3961 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3962 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3963 3964 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3965 SGPRBlocks)) 3966 return OutOfRangeError(SGPRRange); 3967 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3968 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3969 SGPRBlocks); 3970 3971 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3972 return TokError("too many user SGPRs enabled"); 3973 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3974 UserSGPRCount); 3975 3976 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3977 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3978 ReserveFlatScr, ReserveXNACK); 3979 return false; 3980 } 3981 3982 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3983 uint32_t Major; 3984 uint32_t Minor; 3985 3986 if (ParseDirectiveMajorMinor(Major, Minor)) 3987 return true; 3988 3989 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3990 return false; 3991 } 3992 3993 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3994 uint32_t Major; 3995 uint32_t Minor; 3996 uint32_t Stepping; 3997 StringRef VendorName; 3998 StringRef ArchName; 3999 4000 // If this directive has no arguments, then use the ISA version for the 4001 // targeted GPU. 4002 if (getLexer().is(AsmToken::EndOfStatement)) { 4003 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4004 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4005 ISA.Stepping, 4006 "AMD", "AMDGPU"); 4007 return false; 4008 } 4009 4010 if (ParseDirectiveMajorMinor(Major, Minor)) 4011 return true; 4012 4013 if (getLexer().isNot(AsmToken::Comma)) 4014 return TokError("stepping version number required, comma expected"); 4015 Lex(); 4016 4017 if (ParseAsAbsoluteExpression(Stepping)) 4018 return TokError("invalid stepping version"); 4019 4020 if (getLexer().isNot(AsmToken::Comma)) 4021 return TokError("vendor name required, comma expected"); 4022 Lex(); 4023 4024 if (getLexer().isNot(AsmToken::String)) 4025 return TokError("invalid vendor name"); 4026 4027 VendorName = getLexer().getTok().getStringContents(); 4028 Lex(); 4029 4030 if (getLexer().isNot(AsmToken::Comma)) 4031 return TokError("arch name required, comma expected"); 4032 Lex(); 4033 4034 if (getLexer().isNot(AsmToken::String)) 4035 return TokError("invalid arch name"); 4036 4037 ArchName = getLexer().getTok().getStringContents(); 4038 Lex(); 4039 4040 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4041 VendorName, ArchName); 4042 return false; 4043 } 4044 4045 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4046 amd_kernel_code_t &Header) { 4047 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4048 // assembly for backwards compatibility. 4049 if (ID == "max_scratch_backing_memory_byte_size") { 4050 Parser.eatToEndOfStatement(); 4051 return false; 4052 } 4053 4054 SmallString<40> ErrStr; 4055 raw_svector_ostream Err(ErrStr); 4056 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4057 return TokError(Err.str()); 4058 } 4059 Lex(); 4060 4061 if (ID == "enable_wavefront_size32") { 4062 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4063 if (!isGFX10()) 4064 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4065 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4066 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4067 } else { 4068 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4069 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4070 } 4071 } 4072 4073 if (ID == "wavefront_size") { 4074 if (Header.wavefront_size == 5) { 4075 if (!isGFX10()) 4076 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4077 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4078 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4079 } else if (Header.wavefront_size == 6) { 4080 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4081 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4082 } 4083 } 4084 4085 if (ID == "enable_wgp_mode") { 4086 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4087 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4088 } 4089 4090 if (ID == "enable_mem_ordered") { 4091 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4092 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4093 } 4094 4095 if (ID == "enable_fwd_progress") { 4096 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4097 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4098 } 4099 4100 return false; 4101 } 4102 4103 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4104 amd_kernel_code_t Header; 4105 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4106 4107 while (true) { 4108 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4109 // will set the current token to EndOfStatement. 4110 while(getLexer().is(AsmToken::EndOfStatement)) 4111 Lex(); 4112 4113 if (getLexer().isNot(AsmToken::Identifier)) 4114 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4115 4116 StringRef ID = getLexer().getTok().getIdentifier(); 4117 Lex(); 4118 4119 if (ID == ".end_amd_kernel_code_t") 4120 break; 4121 4122 if (ParseAMDKernelCodeTValue(ID, Header)) 4123 return true; 4124 } 4125 4126 getTargetStreamer().EmitAMDKernelCodeT(Header); 4127 4128 return false; 4129 } 4130 4131 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4132 if (getLexer().isNot(AsmToken::Identifier)) 4133 return TokError("expected symbol name"); 4134 4135 StringRef KernelName = Parser.getTok().getString(); 4136 4137 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4138 ELF::STT_AMDGPU_HSA_KERNEL); 4139 Lex(); 4140 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4141 KernelScope.initialize(getContext()); 4142 return false; 4143 } 4144 4145 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4146 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4147 return Error(getParser().getTok().getLoc(), 4148 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4149 "architectures"); 4150 } 4151 4152 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4153 4154 std::string ISAVersionStringFromSTI; 4155 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4156 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4157 4158 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4159 return Error(getParser().getTok().getLoc(), 4160 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4161 "arguments specified through the command line"); 4162 } 4163 4164 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4165 Lex(); 4166 4167 return false; 4168 } 4169 4170 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4171 const char *AssemblerDirectiveBegin; 4172 const char *AssemblerDirectiveEnd; 4173 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4174 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4175 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4176 HSAMD::V3::AssemblerDirectiveEnd) 4177 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4178 HSAMD::AssemblerDirectiveEnd); 4179 4180 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4181 return Error(getParser().getTok().getLoc(), 4182 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4183 "not available on non-amdhsa OSes")).str()); 4184 } 4185 4186 std::string HSAMetadataString; 4187 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4188 HSAMetadataString)) 4189 return true; 4190 4191 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4192 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4193 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4194 } else { 4195 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4196 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4197 } 4198 4199 return false; 4200 } 4201 4202 /// Common code to parse out a block of text (typically YAML) between start and 4203 /// end directives. 4204 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4205 const char *AssemblerDirectiveEnd, 4206 std::string &CollectString) { 4207 4208 raw_string_ostream CollectStream(CollectString); 4209 4210 getLexer().setSkipSpace(false); 4211 4212 bool FoundEnd = false; 4213 while (!getLexer().is(AsmToken::Eof)) { 4214 while (getLexer().is(AsmToken::Space)) { 4215 CollectStream << getLexer().getTok().getString(); 4216 Lex(); 4217 } 4218 4219 if (getLexer().is(AsmToken::Identifier)) { 4220 StringRef ID = getLexer().getTok().getIdentifier(); 4221 if (ID == AssemblerDirectiveEnd) { 4222 Lex(); 4223 FoundEnd = true; 4224 break; 4225 } 4226 } 4227 4228 CollectStream << Parser.parseStringToEndOfStatement() 4229 << getContext().getAsmInfo()->getSeparatorString(); 4230 4231 Parser.eatToEndOfStatement(); 4232 } 4233 4234 getLexer().setSkipSpace(true); 4235 4236 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4237 return TokError(Twine("expected directive ") + 4238 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4239 } 4240 4241 CollectStream.flush(); 4242 return false; 4243 } 4244 4245 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4246 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4247 std::string String; 4248 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4249 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4250 return true; 4251 4252 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4253 if (!PALMetadata->setFromString(String)) 4254 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4255 return false; 4256 } 4257 4258 /// Parse the assembler directive for old linear-format PAL metadata. 4259 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4260 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4261 return Error(getParser().getTok().getLoc(), 4262 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4263 "not available on non-amdpal OSes")).str()); 4264 } 4265 4266 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4267 PALMetadata->setLegacy(); 4268 for (;;) { 4269 uint32_t Key, Value; 4270 if (ParseAsAbsoluteExpression(Key)) { 4271 return TokError(Twine("invalid value in ") + 4272 Twine(PALMD::AssemblerDirective)); 4273 } 4274 if (getLexer().isNot(AsmToken::Comma)) { 4275 return TokError(Twine("expected an even number of values in ") + 4276 Twine(PALMD::AssemblerDirective)); 4277 } 4278 Lex(); 4279 if (ParseAsAbsoluteExpression(Value)) { 4280 return TokError(Twine("invalid value in ") + 4281 Twine(PALMD::AssemblerDirective)); 4282 } 4283 PALMetadata->setRegister(Key, Value); 4284 if (getLexer().isNot(AsmToken::Comma)) 4285 break; 4286 Lex(); 4287 } 4288 return false; 4289 } 4290 4291 /// ParseDirectiveAMDGPULDS 4292 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4293 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4294 if (getParser().checkForValidSection()) 4295 return true; 4296 4297 StringRef Name; 4298 SMLoc NameLoc = getLexer().getLoc(); 4299 if (getParser().parseIdentifier(Name)) 4300 return TokError("expected identifier in directive"); 4301 4302 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4303 if (parseToken(AsmToken::Comma, "expected ','")) 4304 return true; 4305 4306 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4307 4308 int64_t Size; 4309 SMLoc SizeLoc = getLexer().getLoc(); 4310 if (getParser().parseAbsoluteExpression(Size)) 4311 return true; 4312 if (Size < 0) 4313 return Error(SizeLoc, "size must be non-negative"); 4314 if (Size > LocalMemorySize) 4315 return Error(SizeLoc, "size is too large"); 4316 4317 int64_t Align = 4; 4318 if (getLexer().is(AsmToken::Comma)) { 4319 Lex(); 4320 SMLoc AlignLoc = getLexer().getLoc(); 4321 if (getParser().parseAbsoluteExpression(Align)) 4322 return true; 4323 if (Align < 0 || !isPowerOf2_64(Align)) 4324 return Error(AlignLoc, "alignment must be a power of two"); 4325 4326 // Alignment larger than the size of LDS is possible in theory, as long 4327 // as the linker manages to place to symbol at address 0, but we do want 4328 // to make sure the alignment fits nicely into a 32-bit integer. 4329 if (Align >= 1u << 31) 4330 return Error(AlignLoc, "alignment is too large"); 4331 } 4332 4333 if (parseToken(AsmToken::EndOfStatement, 4334 "unexpected token in '.amdgpu_lds' directive")) 4335 return true; 4336 4337 Symbol->redefineIfPossible(); 4338 if (!Symbol->isUndefined()) 4339 return Error(NameLoc, "invalid symbol redefinition"); 4340 4341 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4342 return false; 4343 } 4344 4345 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4346 StringRef IDVal = DirectiveID.getString(); 4347 4348 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4349 if (IDVal == ".amdgcn_target") 4350 return ParseDirectiveAMDGCNTarget(); 4351 4352 if (IDVal == ".amdhsa_kernel") 4353 return ParseDirectiveAMDHSAKernel(); 4354 4355 // TODO: Restructure/combine with PAL metadata directive. 4356 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4357 return ParseDirectiveHSAMetadata(); 4358 } else { 4359 if (IDVal == ".hsa_code_object_version") 4360 return ParseDirectiveHSACodeObjectVersion(); 4361 4362 if (IDVal == ".hsa_code_object_isa") 4363 return ParseDirectiveHSACodeObjectISA(); 4364 4365 if (IDVal == ".amd_kernel_code_t") 4366 return ParseDirectiveAMDKernelCodeT(); 4367 4368 if (IDVal == ".amdgpu_hsa_kernel") 4369 return ParseDirectiveAMDGPUHsaKernel(); 4370 4371 if (IDVal == ".amd_amdgpu_isa") 4372 return ParseDirectiveISAVersion(); 4373 4374 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4375 return ParseDirectiveHSAMetadata(); 4376 } 4377 4378 if (IDVal == ".amdgpu_lds") 4379 return ParseDirectiveAMDGPULDS(); 4380 4381 if (IDVal == PALMD::AssemblerDirectiveBegin) 4382 return ParseDirectivePALMetadataBegin(); 4383 4384 if (IDVal == PALMD::AssemblerDirective) 4385 return ParseDirectivePALMetadata(); 4386 4387 return true; 4388 } 4389 4390 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4391 unsigned RegNo) const { 4392 4393 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4394 R.isValid(); ++R) { 4395 if (*R == RegNo) 4396 return isGFX9() || isGFX10(); 4397 } 4398 4399 // GFX10 has 2 more SGPRs 104 and 105. 4400 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4401 R.isValid(); ++R) { 4402 if (*R == RegNo) 4403 return hasSGPR104_SGPR105(); 4404 } 4405 4406 switch (RegNo) { 4407 case AMDGPU::SRC_SHARED_BASE: 4408 case AMDGPU::SRC_SHARED_LIMIT: 4409 case AMDGPU::SRC_PRIVATE_BASE: 4410 case AMDGPU::SRC_PRIVATE_LIMIT: 4411 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4412 return !isCI() && !isSI() && !isVI(); 4413 case AMDGPU::TBA: 4414 case AMDGPU::TBA_LO: 4415 case AMDGPU::TBA_HI: 4416 case AMDGPU::TMA: 4417 case AMDGPU::TMA_LO: 4418 case AMDGPU::TMA_HI: 4419 return !isGFX9() && !isGFX10(); 4420 case AMDGPU::XNACK_MASK: 4421 case AMDGPU::XNACK_MASK_LO: 4422 case AMDGPU::XNACK_MASK_HI: 4423 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4424 case AMDGPU::SGPR_NULL: 4425 return isGFX10(); 4426 default: 4427 break; 4428 } 4429 4430 if (isCI()) 4431 return true; 4432 4433 if (isSI() || isGFX10()) { 4434 // No flat_scr on SI. 4435 // On GFX10 flat scratch is not a valid register operand and can only be 4436 // accessed with s_setreg/s_getreg. 4437 switch (RegNo) { 4438 case AMDGPU::FLAT_SCR: 4439 case AMDGPU::FLAT_SCR_LO: 4440 case AMDGPU::FLAT_SCR_HI: 4441 return false; 4442 default: 4443 return true; 4444 } 4445 } 4446 4447 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4448 // SI/CI have. 4449 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4450 R.isValid(); ++R) { 4451 if (*R == RegNo) 4452 return hasSGPR102_SGPR103(); 4453 } 4454 4455 return true; 4456 } 4457 4458 OperandMatchResultTy 4459 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4460 OperandMode Mode) { 4461 // Try to parse with a custom parser 4462 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4463 4464 // If we successfully parsed the operand or if there as an error parsing, 4465 // we are done. 4466 // 4467 // If we are parsing after we reach EndOfStatement then this means we 4468 // are appending default values to the Operands list. This is only done 4469 // by custom parser, so we shouldn't continue on to the generic parsing. 4470 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4471 getLexer().is(AsmToken::EndOfStatement)) 4472 return ResTy; 4473 4474 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4475 unsigned Prefix = Operands.size(); 4476 SMLoc LBraceLoc = getTok().getLoc(); 4477 Parser.Lex(); // eat the '[' 4478 4479 for (;;) { 4480 ResTy = parseReg(Operands); 4481 if (ResTy != MatchOperand_Success) 4482 return ResTy; 4483 4484 if (getLexer().is(AsmToken::RBrac)) 4485 break; 4486 4487 if (getLexer().isNot(AsmToken::Comma)) 4488 return MatchOperand_ParseFail; 4489 Parser.Lex(); 4490 } 4491 4492 if (Operands.size() - Prefix > 1) { 4493 Operands.insert(Operands.begin() + Prefix, 4494 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4495 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4496 getTok().getLoc())); 4497 } 4498 4499 Parser.Lex(); // eat the ']' 4500 return MatchOperand_Success; 4501 } 4502 4503 return parseRegOrImm(Operands); 4504 } 4505 4506 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4507 // Clear any forced encodings from the previous instruction. 4508 setForcedEncodingSize(0); 4509 setForcedDPP(false); 4510 setForcedSDWA(false); 4511 4512 if (Name.endswith("_e64")) { 4513 setForcedEncodingSize(64); 4514 return Name.substr(0, Name.size() - 4); 4515 } else if (Name.endswith("_e32")) { 4516 setForcedEncodingSize(32); 4517 return Name.substr(0, Name.size() - 4); 4518 } else if (Name.endswith("_dpp")) { 4519 setForcedDPP(true); 4520 return Name.substr(0, Name.size() - 4); 4521 } else if (Name.endswith("_sdwa")) { 4522 setForcedSDWA(true); 4523 return Name.substr(0, Name.size() - 5); 4524 } 4525 return Name; 4526 } 4527 4528 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4529 StringRef Name, 4530 SMLoc NameLoc, OperandVector &Operands) { 4531 // Add the instruction mnemonic 4532 Name = parseMnemonicSuffix(Name); 4533 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4534 4535 bool IsMIMG = Name.startswith("image_"); 4536 4537 while (!getLexer().is(AsmToken::EndOfStatement)) { 4538 OperandMode Mode = OperandMode_Default; 4539 if (IsMIMG && isGFX10() && Operands.size() == 2) 4540 Mode = OperandMode_NSA; 4541 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4542 4543 // Eat the comma or space if there is one. 4544 if (getLexer().is(AsmToken::Comma)) 4545 Parser.Lex(); 4546 4547 switch (Res) { 4548 case MatchOperand_Success: break; 4549 case MatchOperand_ParseFail: 4550 // FIXME: use real operand location rather than the current location. 4551 Error(getLexer().getLoc(), "failed parsing operand."); 4552 while (!getLexer().is(AsmToken::EndOfStatement)) { 4553 Parser.Lex(); 4554 } 4555 return true; 4556 case MatchOperand_NoMatch: 4557 // FIXME: use real operand location rather than the current location. 4558 Error(getLexer().getLoc(), "not a valid operand."); 4559 while (!getLexer().is(AsmToken::EndOfStatement)) { 4560 Parser.Lex(); 4561 } 4562 return true; 4563 } 4564 } 4565 4566 return false; 4567 } 4568 4569 //===----------------------------------------------------------------------===// 4570 // Utility functions 4571 //===----------------------------------------------------------------------===// 4572 4573 OperandMatchResultTy 4574 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4575 4576 if (!trySkipId(Prefix, AsmToken::Colon)) 4577 return MatchOperand_NoMatch; 4578 4579 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4580 } 4581 4582 OperandMatchResultTy 4583 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4584 AMDGPUOperand::ImmTy ImmTy, 4585 bool (*ConvertResult)(int64_t&)) { 4586 SMLoc S = getLoc(); 4587 int64_t Value = 0; 4588 4589 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4590 if (Res != MatchOperand_Success) 4591 return Res; 4592 4593 if (ConvertResult && !ConvertResult(Value)) { 4594 Error(S, "invalid " + StringRef(Prefix) + " value."); 4595 } 4596 4597 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4598 return MatchOperand_Success; 4599 } 4600 4601 OperandMatchResultTy 4602 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4603 OperandVector &Operands, 4604 AMDGPUOperand::ImmTy ImmTy, 4605 bool (*ConvertResult)(int64_t&)) { 4606 SMLoc S = getLoc(); 4607 if (!trySkipId(Prefix, AsmToken::Colon)) 4608 return MatchOperand_NoMatch; 4609 4610 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4611 return MatchOperand_ParseFail; 4612 4613 unsigned Val = 0; 4614 const unsigned MaxSize = 4; 4615 4616 // FIXME: How to verify the number of elements matches the number of src 4617 // operands? 4618 for (int I = 0; ; ++I) { 4619 int64_t Op; 4620 SMLoc Loc = getLoc(); 4621 if (!parseExpr(Op)) 4622 return MatchOperand_ParseFail; 4623 4624 if (Op != 0 && Op != 1) { 4625 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4626 return MatchOperand_ParseFail; 4627 } 4628 4629 Val |= (Op << I); 4630 4631 if (trySkipToken(AsmToken::RBrac)) 4632 break; 4633 4634 if (I + 1 == MaxSize) { 4635 Error(getLoc(), "expected a closing square bracket"); 4636 return MatchOperand_ParseFail; 4637 } 4638 4639 if (!skipToken(AsmToken::Comma, "expected a comma")) 4640 return MatchOperand_ParseFail; 4641 } 4642 4643 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4644 return MatchOperand_Success; 4645 } 4646 4647 OperandMatchResultTy 4648 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4649 AMDGPUOperand::ImmTy ImmTy) { 4650 int64_t Bit = 0; 4651 SMLoc S = Parser.getTok().getLoc(); 4652 4653 // We are at the end of the statement, and this is a default argument, so 4654 // use a default value. 4655 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4656 switch(getLexer().getKind()) { 4657 case AsmToken::Identifier: { 4658 StringRef Tok = Parser.getTok().getString(); 4659 if (Tok == Name) { 4660 if (Tok == "r128" && !hasMIMG_R128()) 4661 Error(S, "r128 modifier is not supported on this GPU"); 4662 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4663 Error(S, "a16 modifier is not supported on this GPU"); 4664 Bit = 1; 4665 Parser.Lex(); 4666 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4667 Bit = 0; 4668 Parser.Lex(); 4669 } else { 4670 return MatchOperand_NoMatch; 4671 } 4672 break; 4673 } 4674 default: 4675 return MatchOperand_NoMatch; 4676 } 4677 } 4678 4679 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4680 return MatchOperand_ParseFail; 4681 4682 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4683 ImmTy = AMDGPUOperand::ImmTyR128A16; 4684 4685 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4686 return MatchOperand_Success; 4687 } 4688 4689 static void addOptionalImmOperand( 4690 MCInst& Inst, const OperandVector& Operands, 4691 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4692 AMDGPUOperand::ImmTy ImmT, 4693 int64_t Default = 0) { 4694 auto i = OptionalIdx.find(ImmT); 4695 if (i != OptionalIdx.end()) { 4696 unsigned Idx = i->second; 4697 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4698 } else { 4699 Inst.addOperand(MCOperand::createImm(Default)); 4700 } 4701 } 4702 4703 OperandMatchResultTy 4704 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4705 if (getLexer().isNot(AsmToken::Identifier)) { 4706 return MatchOperand_NoMatch; 4707 } 4708 StringRef Tok = Parser.getTok().getString(); 4709 if (Tok != Prefix) { 4710 return MatchOperand_NoMatch; 4711 } 4712 4713 Parser.Lex(); 4714 if (getLexer().isNot(AsmToken::Colon)) { 4715 return MatchOperand_ParseFail; 4716 } 4717 4718 Parser.Lex(); 4719 if (getLexer().isNot(AsmToken::Identifier)) { 4720 return MatchOperand_ParseFail; 4721 } 4722 4723 Value = Parser.getTok().getString(); 4724 return MatchOperand_Success; 4725 } 4726 4727 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4728 // values to live in a joint format operand in the MCInst encoding. 4729 OperandMatchResultTy 4730 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4731 SMLoc S = Parser.getTok().getLoc(); 4732 int64_t Dfmt = 0, Nfmt = 0; 4733 // dfmt and nfmt can appear in either order, and each is optional. 4734 bool GotDfmt = false, GotNfmt = false; 4735 while (!GotDfmt || !GotNfmt) { 4736 if (!GotDfmt) { 4737 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4738 if (Res != MatchOperand_NoMatch) { 4739 if (Res != MatchOperand_Success) 4740 return Res; 4741 if (Dfmt >= 16) { 4742 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4743 return MatchOperand_ParseFail; 4744 } 4745 GotDfmt = true; 4746 Parser.Lex(); 4747 continue; 4748 } 4749 } 4750 if (!GotNfmt) { 4751 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4752 if (Res != MatchOperand_NoMatch) { 4753 if (Res != MatchOperand_Success) 4754 return Res; 4755 if (Nfmt >= 8) { 4756 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4757 return MatchOperand_ParseFail; 4758 } 4759 GotNfmt = true; 4760 Parser.Lex(); 4761 continue; 4762 } 4763 } 4764 break; 4765 } 4766 if (!GotDfmt && !GotNfmt) 4767 return MatchOperand_NoMatch; 4768 auto Format = Dfmt | Nfmt << 4; 4769 Operands.push_back( 4770 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4771 return MatchOperand_Success; 4772 } 4773 4774 //===----------------------------------------------------------------------===// 4775 // ds 4776 //===----------------------------------------------------------------------===// 4777 4778 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4779 const OperandVector &Operands) { 4780 OptionalImmIndexMap OptionalIdx; 4781 4782 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4783 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4784 4785 // Add the register arguments 4786 if (Op.isReg()) { 4787 Op.addRegOperands(Inst, 1); 4788 continue; 4789 } 4790 4791 // Handle optional arguments 4792 OptionalIdx[Op.getImmTy()] = i; 4793 } 4794 4795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4798 4799 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4800 } 4801 4802 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4803 bool IsGdsHardcoded) { 4804 OptionalImmIndexMap OptionalIdx; 4805 4806 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4807 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4808 4809 // Add the register arguments 4810 if (Op.isReg()) { 4811 Op.addRegOperands(Inst, 1); 4812 continue; 4813 } 4814 4815 if (Op.isToken() && Op.getToken() == "gds") { 4816 IsGdsHardcoded = true; 4817 continue; 4818 } 4819 4820 // Handle optional arguments 4821 OptionalIdx[Op.getImmTy()] = i; 4822 } 4823 4824 AMDGPUOperand::ImmTy OffsetType = 4825 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4826 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4827 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4828 AMDGPUOperand::ImmTyOffset; 4829 4830 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4831 4832 if (!IsGdsHardcoded) { 4833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4834 } 4835 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4836 } 4837 4838 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4839 OptionalImmIndexMap OptionalIdx; 4840 4841 unsigned OperandIdx[4]; 4842 unsigned EnMask = 0; 4843 int SrcIdx = 0; 4844 4845 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4846 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4847 4848 // Add the register arguments 4849 if (Op.isReg()) { 4850 assert(SrcIdx < 4); 4851 OperandIdx[SrcIdx] = Inst.size(); 4852 Op.addRegOperands(Inst, 1); 4853 ++SrcIdx; 4854 continue; 4855 } 4856 4857 if (Op.isOff()) { 4858 assert(SrcIdx < 4); 4859 OperandIdx[SrcIdx] = Inst.size(); 4860 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4861 ++SrcIdx; 4862 continue; 4863 } 4864 4865 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4866 Op.addImmOperands(Inst, 1); 4867 continue; 4868 } 4869 4870 if (Op.isToken() && Op.getToken() == "done") 4871 continue; 4872 4873 // Handle optional arguments 4874 OptionalIdx[Op.getImmTy()] = i; 4875 } 4876 4877 assert(SrcIdx == 4); 4878 4879 bool Compr = false; 4880 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4881 Compr = true; 4882 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4883 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4884 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4885 } 4886 4887 for (auto i = 0; i < SrcIdx; ++i) { 4888 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4889 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4890 } 4891 } 4892 4893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4895 4896 Inst.addOperand(MCOperand::createImm(EnMask)); 4897 } 4898 4899 //===----------------------------------------------------------------------===// 4900 // s_waitcnt 4901 //===----------------------------------------------------------------------===// 4902 4903 static bool 4904 encodeCnt( 4905 const AMDGPU::IsaVersion ISA, 4906 int64_t &IntVal, 4907 int64_t CntVal, 4908 bool Saturate, 4909 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4910 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4911 { 4912 bool Failed = false; 4913 4914 IntVal = encode(ISA, IntVal, CntVal); 4915 if (CntVal != decode(ISA, IntVal)) { 4916 if (Saturate) { 4917 IntVal = encode(ISA, IntVal, -1); 4918 } else { 4919 Failed = true; 4920 } 4921 } 4922 return Failed; 4923 } 4924 4925 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4926 4927 SMLoc CntLoc = getLoc(); 4928 StringRef CntName = getTokenStr(); 4929 4930 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4931 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4932 return false; 4933 4934 int64_t CntVal; 4935 SMLoc ValLoc = getLoc(); 4936 if (!parseExpr(CntVal)) 4937 return false; 4938 4939 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4940 4941 bool Failed = true; 4942 bool Sat = CntName.endswith("_sat"); 4943 4944 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4945 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4946 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4947 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4948 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4949 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4950 } else { 4951 Error(CntLoc, "invalid counter name " + CntName); 4952 return false; 4953 } 4954 4955 if (Failed) { 4956 Error(ValLoc, "too large value for " + CntName); 4957 return false; 4958 } 4959 4960 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4961 return false; 4962 4963 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4964 if (isToken(AsmToken::EndOfStatement)) { 4965 Error(getLoc(), "expected a counter name"); 4966 return false; 4967 } 4968 } 4969 4970 return true; 4971 } 4972 4973 OperandMatchResultTy 4974 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4975 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4976 int64_t Waitcnt = getWaitcntBitMask(ISA); 4977 SMLoc S = getLoc(); 4978 4979 // If parse failed, do not return error code 4980 // to avoid excessive error messages. 4981 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4982 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4983 } else { 4984 parseExpr(Waitcnt); 4985 } 4986 4987 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4988 return MatchOperand_Success; 4989 } 4990 4991 bool 4992 AMDGPUOperand::isSWaitCnt() const { 4993 return isImm(); 4994 } 4995 4996 //===----------------------------------------------------------------------===// 4997 // hwreg 4998 //===----------------------------------------------------------------------===// 4999 5000 bool 5001 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5002 int64_t &Offset, 5003 int64_t &Width) { 5004 using namespace llvm::AMDGPU::Hwreg; 5005 5006 // The register may be specified by name or using a numeric code 5007 if (isToken(AsmToken::Identifier) && 5008 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5009 HwReg.IsSymbolic = true; 5010 lex(); // skip message name 5011 } else if (!parseExpr(HwReg.Id)) { 5012 return false; 5013 } 5014 5015 if (trySkipToken(AsmToken::RParen)) 5016 return true; 5017 5018 // parse optional params 5019 return 5020 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5021 parseExpr(Offset) && 5022 skipToken(AsmToken::Comma, "expected a comma") && 5023 parseExpr(Width) && 5024 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5025 } 5026 5027 bool 5028 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5029 const int64_t Offset, 5030 const int64_t Width, 5031 const SMLoc Loc) { 5032 5033 using namespace llvm::AMDGPU::Hwreg; 5034 5035 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5036 Error(Loc, "specified hardware register is not supported on this GPU"); 5037 return false; 5038 } else if (!isValidHwreg(HwReg.Id)) { 5039 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5040 return false; 5041 } else if (!isValidHwregOffset(Offset)) { 5042 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5043 return false; 5044 } else if (!isValidHwregWidth(Width)) { 5045 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5046 return false; 5047 } 5048 return true; 5049 } 5050 5051 OperandMatchResultTy 5052 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5053 using namespace llvm::AMDGPU::Hwreg; 5054 5055 int64_t ImmVal = 0; 5056 SMLoc Loc = getLoc(); 5057 5058 // If parse failed, do not return error code 5059 // to avoid excessive error messages. 5060 if (trySkipId("hwreg", AsmToken::LParen)) { 5061 OperandInfoTy HwReg(ID_UNKNOWN_); 5062 int64_t Offset = OFFSET_DEFAULT_; 5063 int64_t Width = WIDTH_DEFAULT_; 5064 if (parseHwregBody(HwReg, Offset, Width) && 5065 validateHwreg(HwReg, Offset, Width, Loc)) { 5066 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5067 } 5068 } else if (parseExpr(ImmVal)) { 5069 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5070 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5071 } 5072 5073 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5074 return MatchOperand_Success; 5075 } 5076 5077 bool AMDGPUOperand::isHwreg() const { 5078 return isImmTy(ImmTyHwreg); 5079 } 5080 5081 //===----------------------------------------------------------------------===// 5082 // sendmsg 5083 //===----------------------------------------------------------------------===// 5084 5085 bool 5086 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5087 OperandInfoTy &Op, 5088 OperandInfoTy &Stream) { 5089 using namespace llvm::AMDGPU::SendMsg; 5090 5091 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5092 Msg.IsSymbolic = true; 5093 lex(); // skip message name 5094 } else if (!parseExpr(Msg.Id)) { 5095 return false; 5096 } 5097 5098 if (trySkipToken(AsmToken::Comma)) { 5099 Op.IsDefined = true; 5100 if (isToken(AsmToken::Identifier) && 5101 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5102 lex(); // skip operation name 5103 } else if (!parseExpr(Op.Id)) { 5104 return false; 5105 } 5106 5107 if (trySkipToken(AsmToken::Comma)) { 5108 Stream.IsDefined = true; 5109 if (!parseExpr(Stream.Id)) 5110 return false; 5111 } 5112 } 5113 5114 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5115 } 5116 5117 bool 5118 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5119 const OperandInfoTy &Op, 5120 const OperandInfoTy &Stream, 5121 const SMLoc S) { 5122 using namespace llvm::AMDGPU::SendMsg; 5123 5124 // Validation strictness depends on whether message is specified 5125 // in a symbolc or in a numeric form. In the latter case 5126 // only encoding possibility is checked. 5127 bool Strict = Msg.IsSymbolic; 5128 5129 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5130 Error(S, "invalid message id"); 5131 return false; 5132 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5133 Error(S, Op.IsDefined ? 5134 "message does not support operations" : 5135 "missing message operation"); 5136 return false; 5137 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5138 Error(S, "invalid operation id"); 5139 return false; 5140 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5141 Error(S, "message operation does not support streams"); 5142 return false; 5143 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5144 Error(S, "invalid message stream id"); 5145 return false; 5146 } 5147 return true; 5148 } 5149 5150 OperandMatchResultTy 5151 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5152 using namespace llvm::AMDGPU::SendMsg; 5153 5154 int64_t ImmVal = 0; 5155 SMLoc Loc = getLoc(); 5156 5157 // If parse failed, do not return error code 5158 // to avoid excessive error messages. 5159 if (trySkipId("sendmsg", AsmToken::LParen)) { 5160 OperandInfoTy Msg(ID_UNKNOWN_); 5161 OperandInfoTy Op(OP_NONE_); 5162 OperandInfoTy Stream(STREAM_ID_NONE_); 5163 if (parseSendMsgBody(Msg, Op, Stream) && 5164 validateSendMsg(Msg, Op, Stream, Loc)) { 5165 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5166 } 5167 } else if (parseExpr(ImmVal)) { 5168 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5169 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5170 } 5171 5172 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5173 return MatchOperand_Success; 5174 } 5175 5176 bool AMDGPUOperand::isSendMsg() const { 5177 return isImmTy(ImmTySendMsg); 5178 } 5179 5180 //===----------------------------------------------------------------------===// 5181 // v_interp 5182 //===----------------------------------------------------------------------===// 5183 5184 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5185 if (getLexer().getKind() != AsmToken::Identifier) 5186 return MatchOperand_NoMatch; 5187 5188 StringRef Str = Parser.getTok().getString(); 5189 int Slot = StringSwitch<int>(Str) 5190 .Case("p10", 0) 5191 .Case("p20", 1) 5192 .Case("p0", 2) 5193 .Default(-1); 5194 5195 SMLoc S = Parser.getTok().getLoc(); 5196 if (Slot == -1) 5197 return MatchOperand_ParseFail; 5198 5199 Parser.Lex(); 5200 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5201 AMDGPUOperand::ImmTyInterpSlot)); 5202 return MatchOperand_Success; 5203 } 5204 5205 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5206 if (getLexer().getKind() != AsmToken::Identifier) 5207 return MatchOperand_NoMatch; 5208 5209 StringRef Str = Parser.getTok().getString(); 5210 if (!Str.startswith("attr")) 5211 return MatchOperand_NoMatch; 5212 5213 StringRef Chan = Str.take_back(2); 5214 int AttrChan = StringSwitch<int>(Chan) 5215 .Case(".x", 0) 5216 .Case(".y", 1) 5217 .Case(".z", 2) 5218 .Case(".w", 3) 5219 .Default(-1); 5220 if (AttrChan == -1) 5221 return MatchOperand_ParseFail; 5222 5223 Str = Str.drop_back(2).drop_front(4); 5224 5225 uint8_t Attr; 5226 if (Str.getAsInteger(10, Attr)) 5227 return MatchOperand_ParseFail; 5228 5229 SMLoc S = Parser.getTok().getLoc(); 5230 Parser.Lex(); 5231 if (Attr > 63) { 5232 Error(S, "out of bounds attr"); 5233 return MatchOperand_Success; 5234 } 5235 5236 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5237 5238 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5239 AMDGPUOperand::ImmTyInterpAttr)); 5240 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5241 AMDGPUOperand::ImmTyAttrChan)); 5242 return MatchOperand_Success; 5243 } 5244 5245 //===----------------------------------------------------------------------===// 5246 // exp 5247 //===----------------------------------------------------------------------===// 5248 5249 void AMDGPUAsmParser::errorExpTgt() { 5250 Error(Parser.getTok().getLoc(), "invalid exp target"); 5251 } 5252 5253 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5254 uint8_t &Val) { 5255 if (Str == "null") { 5256 Val = 9; 5257 return MatchOperand_Success; 5258 } 5259 5260 if (Str.startswith("mrt")) { 5261 Str = Str.drop_front(3); 5262 if (Str == "z") { // == mrtz 5263 Val = 8; 5264 return MatchOperand_Success; 5265 } 5266 5267 if (Str.getAsInteger(10, Val)) 5268 return MatchOperand_ParseFail; 5269 5270 if (Val > 7) 5271 errorExpTgt(); 5272 5273 return MatchOperand_Success; 5274 } 5275 5276 if (Str.startswith("pos")) { 5277 Str = Str.drop_front(3); 5278 if (Str.getAsInteger(10, Val)) 5279 return MatchOperand_ParseFail; 5280 5281 if (Val > 4 || (Val == 4 && !isGFX10())) 5282 errorExpTgt(); 5283 5284 Val += 12; 5285 return MatchOperand_Success; 5286 } 5287 5288 if (isGFX10() && Str == "prim") { 5289 Val = 20; 5290 return MatchOperand_Success; 5291 } 5292 5293 if (Str.startswith("param")) { 5294 Str = Str.drop_front(5); 5295 if (Str.getAsInteger(10, Val)) 5296 return MatchOperand_ParseFail; 5297 5298 if (Val >= 32) 5299 errorExpTgt(); 5300 5301 Val += 32; 5302 return MatchOperand_Success; 5303 } 5304 5305 if (Str.startswith("invalid_target_")) { 5306 Str = Str.drop_front(15); 5307 if (Str.getAsInteger(10, Val)) 5308 return MatchOperand_ParseFail; 5309 5310 errorExpTgt(); 5311 return MatchOperand_Success; 5312 } 5313 5314 return MatchOperand_NoMatch; 5315 } 5316 5317 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5318 uint8_t Val; 5319 StringRef Str = Parser.getTok().getString(); 5320 5321 auto Res = parseExpTgtImpl(Str, Val); 5322 if (Res != MatchOperand_Success) 5323 return Res; 5324 5325 SMLoc S = Parser.getTok().getLoc(); 5326 Parser.Lex(); 5327 5328 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5329 AMDGPUOperand::ImmTyExpTgt)); 5330 return MatchOperand_Success; 5331 } 5332 5333 //===----------------------------------------------------------------------===// 5334 // parser helpers 5335 //===----------------------------------------------------------------------===// 5336 5337 bool 5338 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5339 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5340 } 5341 5342 bool 5343 AMDGPUAsmParser::isId(const StringRef Id) const { 5344 return isId(getToken(), Id); 5345 } 5346 5347 bool 5348 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5349 return getTokenKind() == Kind; 5350 } 5351 5352 bool 5353 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5354 if (isId(Id)) { 5355 lex(); 5356 return true; 5357 } 5358 return false; 5359 } 5360 5361 bool 5362 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5363 if (isId(Id) && peekToken().is(Kind)) { 5364 lex(); 5365 lex(); 5366 return true; 5367 } 5368 return false; 5369 } 5370 5371 bool 5372 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5373 if (isToken(Kind)) { 5374 lex(); 5375 return true; 5376 } 5377 return false; 5378 } 5379 5380 bool 5381 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5382 const StringRef ErrMsg) { 5383 if (!trySkipToken(Kind)) { 5384 Error(getLoc(), ErrMsg); 5385 return false; 5386 } 5387 return true; 5388 } 5389 5390 bool 5391 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5392 return !getParser().parseAbsoluteExpression(Imm); 5393 } 5394 5395 bool 5396 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5397 SMLoc S = getLoc(); 5398 5399 const MCExpr *Expr; 5400 if (Parser.parseExpression(Expr)) 5401 return false; 5402 5403 int64_t IntVal; 5404 if (Expr->evaluateAsAbsolute(IntVal)) { 5405 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5406 } else { 5407 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5408 } 5409 return true; 5410 } 5411 5412 bool 5413 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5414 if (isToken(AsmToken::String)) { 5415 Val = getToken().getStringContents(); 5416 lex(); 5417 return true; 5418 } else { 5419 Error(getLoc(), ErrMsg); 5420 return false; 5421 } 5422 } 5423 5424 AsmToken 5425 AMDGPUAsmParser::getToken() const { 5426 return Parser.getTok(); 5427 } 5428 5429 AsmToken 5430 AMDGPUAsmParser::peekToken() { 5431 return getLexer().peekTok(); 5432 } 5433 5434 void 5435 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5436 auto TokCount = getLexer().peekTokens(Tokens); 5437 5438 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5439 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5440 } 5441 5442 AsmToken::TokenKind 5443 AMDGPUAsmParser::getTokenKind() const { 5444 return getLexer().getKind(); 5445 } 5446 5447 SMLoc 5448 AMDGPUAsmParser::getLoc() const { 5449 return getToken().getLoc(); 5450 } 5451 5452 StringRef 5453 AMDGPUAsmParser::getTokenStr() const { 5454 return getToken().getString(); 5455 } 5456 5457 void 5458 AMDGPUAsmParser::lex() { 5459 Parser.Lex(); 5460 } 5461 5462 //===----------------------------------------------------------------------===// 5463 // swizzle 5464 //===----------------------------------------------------------------------===// 5465 5466 LLVM_READNONE 5467 static unsigned 5468 encodeBitmaskPerm(const unsigned AndMask, 5469 const unsigned OrMask, 5470 const unsigned XorMask) { 5471 using namespace llvm::AMDGPU::Swizzle; 5472 5473 return BITMASK_PERM_ENC | 5474 (AndMask << BITMASK_AND_SHIFT) | 5475 (OrMask << BITMASK_OR_SHIFT) | 5476 (XorMask << BITMASK_XOR_SHIFT); 5477 } 5478 5479 bool 5480 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5481 const unsigned MinVal, 5482 const unsigned MaxVal, 5483 const StringRef ErrMsg) { 5484 for (unsigned i = 0; i < OpNum; ++i) { 5485 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5486 return false; 5487 } 5488 SMLoc ExprLoc = Parser.getTok().getLoc(); 5489 if (!parseExpr(Op[i])) { 5490 return false; 5491 } 5492 if (Op[i] < MinVal || Op[i] > MaxVal) { 5493 Error(ExprLoc, ErrMsg); 5494 return false; 5495 } 5496 } 5497 5498 return true; 5499 } 5500 5501 bool 5502 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5503 using namespace llvm::AMDGPU::Swizzle; 5504 5505 int64_t Lane[LANE_NUM]; 5506 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5507 "expected a 2-bit lane id")) { 5508 Imm = QUAD_PERM_ENC; 5509 for (unsigned I = 0; I < LANE_NUM; ++I) { 5510 Imm |= Lane[I] << (LANE_SHIFT * I); 5511 } 5512 return true; 5513 } 5514 return false; 5515 } 5516 5517 bool 5518 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5519 using namespace llvm::AMDGPU::Swizzle; 5520 5521 SMLoc S = Parser.getTok().getLoc(); 5522 int64_t GroupSize; 5523 int64_t LaneIdx; 5524 5525 if (!parseSwizzleOperands(1, &GroupSize, 5526 2, 32, 5527 "group size must be in the interval [2,32]")) { 5528 return false; 5529 } 5530 if (!isPowerOf2_64(GroupSize)) { 5531 Error(S, "group size must be a power of two"); 5532 return false; 5533 } 5534 if (parseSwizzleOperands(1, &LaneIdx, 5535 0, GroupSize - 1, 5536 "lane id must be in the interval [0,group size - 1]")) { 5537 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5538 return true; 5539 } 5540 return false; 5541 } 5542 5543 bool 5544 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5545 using namespace llvm::AMDGPU::Swizzle; 5546 5547 SMLoc S = Parser.getTok().getLoc(); 5548 int64_t GroupSize; 5549 5550 if (!parseSwizzleOperands(1, &GroupSize, 5551 2, 32, "group size must be in the interval [2,32]")) { 5552 return false; 5553 } 5554 if (!isPowerOf2_64(GroupSize)) { 5555 Error(S, "group size must be a power of two"); 5556 return false; 5557 } 5558 5559 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5560 return true; 5561 } 5562 5563 bool 5564 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5565 using namespace llvm::AMDGPU::Swizzle; 5566 5567 SMLoc S = Parser.getTok().getLoc(); 5568 int64_t GroupSize; 5569 5570 if (!parseSwizzleOperands(1, &GroupSize, 5571 1, 16, "group size must be in the interval [1,16]")) { 5572 return false; 5573 } 5574 if (!isPowerOf2_64(GroupSize)) { 5575 Error(S, "group size must be a power of two"); 5576 return false; 5577 } 5578 5579 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5580 return true; 5581 } 5582 5583 bool 5584 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5585 using namespace llvm::AMDGPU::Swizzle; 5586 5587 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5588 return false; 5589 } 5590 5591 StringRef Ctl; 5592 SMLoc StrLoc = Parser.getTok().getLoc(); 5593 if (!parseString(Ctl)) { 5594 return false; 5595 } 5596 if (Ctl.size() != BITMASK_WIDTH) { 5597 Error(StrLoc, "expected a 5-character mask"); 5598 return false; 5599 } 5600 5601 unsigned AndMask = 0; 5602 unsigned OrMask = 0; 5603 unsigned XorMask = 0; 5604 5605 for (size_t i = 0; i < Ctl.size(); ++i) { 5606 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5607 switch(Ctl[i]) { 5608 default: 5609 Error(StrLoc, "invalid mask"); 5610 return false; 5611 case '0': 5612 break; 5613 case '1': 5614 OrMask |= Mask; 5615 break; 5616 case 'p': 5617 AndMask |= Mask; 5618 break; 5619 case 'i': 5620 AndMask |= Mask; 5621 XorMask |= Mask; 5622 break; 5623 } 5624 } 5625 5626 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5627 return true; 5628 } 5629 5630 bool 5631 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5632 5633 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5634 5635 if (!parseExpr(Imm)) { 5636 return false; 5637 } 5638 if (!isUInt<16>(Imm)) { 5639 Error(OffsetLoc, "expected a 16-bit offset"); 5640 return false; 5641 } 5642 return true; 5643 } 5644 5645 bool 5646 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5647 using namespace llvm::AMDGPU::Swizzle; 5648 5649 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5650 5651 SMLoc ModeLoc = Parser.getTok().getLoc(); 5652 bool Ok = false; 5653 5654 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5655 Ok = parseSwizzleQuadPerm(Imm); 5656 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5657 Ok = parseSwizzleBitmaskPerm(Imm); 5658 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5659 Ok = parseSwizzleBroadcast(Imm); 5660 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5661 Ok = parseSwizzleSwap(Imm); 5662 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5663 Ok = parseSwizzleReverse(Imm); 5664 } else { 5665 Error(ModeLoc, "expected a swizzle mode"); 5666 } 5667 5668 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5669 } 5670 5671 return false; 5672 } 5673 5674 OperandMatchResultTy 5675 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5676 SMLoc S = Parser.getTok().getLoc(); 5677 int64_t Imm = 0; 5678 5679 if (trySkipId("offset")) { 5680 5681 bool Ok = false; 5682 if (skipToken(AsmToken::Colon, "expected a colon")) { 5683 if (trySkipId("swizzle")) { 5684 Ok = parseSwizzleMacro(Imm); 5685 } else { 5686 Ok = parseSwizzleOffset(Imm); 5687 } 5688 } 5689 5690 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5691 5692 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5693 } else { 5694 // Swizzle "offset" operand is optional. 5695 // If it is omitted, try parsing other optional operands. 5696 return parseOptionalOpr(Operands); 5697 } 5698 } 5699 5700 bool 5701 AMDGPUOperand::isSwizzle() const { 5702 return isImmTy(ImmTySwizzle); 5703 } 5704 5705 //===----------------------------------------------------------------------===// 5706 // VGPR Index Mode 5707 //===----------------------------------------------------------------------===// 5708 5709 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5710 5711 using namespace llvm::AMDGPU::VGPRIndexMode; 5712 5713 if (trySkipToken(AsmToken::RParen)) { 5714 return OFF; 5715 } 5716 5717 int64_t Imm = 0; 5718 5719 while (true) { 5720 unsigned Mode = 0; 5721 SMLoc S = Parser.getTok().getLoc(); 5722 5723 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5724 if (trySkipId(IdSymbolic[ModeId])) { 5725 Mode = 1 << ModeId; 5726 break; 5727 } 5728 } 5729 5730 if (Mode == 0) { 5731 Error(S, (Imm == 0)? 5732 "expected a VGPR index mode or a closing parenthesis" : 5733 "expected a VGPR index mode"); 5734 break; 5735 } 5736 5737 if (Imm & Mode) { 5738 Error(S, "duplicate VGPR index mode"); 5739 break; 5740 } 5741 Imm |= Mode; 5742 5743 if (trySkipToken(AsmToken::RParen)) 5744 break; 5745 if (!skipToken(AsmToken::Comma, 5746 "expected a comma or a closing parenthesis")) 5747 break; 5748 } 5749 5750 return Imm; 5751 } 5752 5753 OperandMatchResultTy 5754 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5755 5756 int64_t Imm = 0; 5757 SMLoc S = Parser.getTok().getLoc(); 5758 5759 if (getLexer().getKind() == AsmToken::Identifier && 5760 Parser.getTok().getString() == "gpr_idx" && 5761 getLexer().peekTok().is(AsmToken::LParen)) { 5762 5763 Parser.Lex(); 5764 Parser.Lex(); 5765 5766 // If parse failed, trigger an error but do not return error code 5767 // to avoid excessive error messages. 5768 Imm = parseGPRIdxMacro(); 5769 5770 } else { 5771 if (getParser().parseAbsoluteExpression(Imm)) 5772 return MatchOperand_NoMatch; 5773 if (Imm < 0 || !isUInt<4>(Imm)) { 5774 Error(S, "invalid immediate: only 4-bit values are legal"); 5775 } 5776 } 5777 5778 Operands.push_back( 5779 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5780 return MatchOperand_Success; 5781 } 5782 5783 bool AMDGPUOperand::isGPRIdxMode() const { 5784 return isImmTy(ImmTyGprIdxMode); 5785 } 5786 5787 //===----------------------------------------------------------------------===// 5788 // sopp branch targets 5789 //===----------------------------------------------------------------------===// 5790 5791 OperandMatchResultTy 5792 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5793 5794 // Make sure we are not parsing something 5795 // that looks like a label or an expression but is not. 5796 // This will improve error messages. 5797 if (isRegister() || isModifier()) 5798 return MatchOperand_NoMatch; 5799 5800 if (parseExpr(Operands)) { 5801 5802 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5803 assert(Opr.isImm() || Opr.isExpr()); 5804 SMLoc Loc = Opr.getStartLoc(); 5805 5806 // Currently we do not support arbitrary expressions as branch targets. 5807 // Only labels and absolute expressions are accepted. 5808 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5809 Error(Loc, "expected an absolute expression or a label"); 5810 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5811 Error(Loc, "expected a 16-bit signed jump offset"); 5812 } 5813 } 5814 5815 return MatchOperand_Success; // avoid excessive error messages 5816 } 5817 5818 //===----------------------------------------------------------------------===// 5819 // Boolean holding registers 5820 //===----------------------------------------------------------------------===// 5821 5822 OperandMatchResultTy 5823 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5824 return parseReg(Operands); 5825 } 5826 5827 //===----------------------------------------------------------------------===// 5828 // mubuf 5829 //===----------------------------------------------------------------------===// 5830 5831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5832 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5833 } 5834 5835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5836 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5837 } 5838 5839 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5840 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5841 } 5842 5843 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5844 const OperandVector &Operands, 5845 bool IsAtomic, 5846 bool IsAtomicReturn, 5847 bool IsLds) { 5848 bool IsLdsOpcode = IsLds; 5849 bool HasLdsModifier = false; 5850 OptionalImmIndexMap OptionalIdx; 5851 assert(IsAtomicReturn ? IsAtomic : true); 5852 unsigned FirstOperandIdx = 1; 5853 5854 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5855 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5856 5857 // Add the register arguments 5858 if (Op.isReg()) { 5859 Op.addRegOperands(Inst, 1); 5860 // Insert a tied src for atomic return dst. 5861 // This cannot be postponed as subsequent calls to 5862 // addImmOperands rely on correct number of MC operands. 5863 if (IsAtomicReturn && i == FirstOperandIdx) 5864 Op.addRegOperands(Inst, 1); 5865 continue; 5866 } 5867 5868 // Handle the case where soffset is an immediate 5869 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5870 Op.addImmOperands(Inst, 1); 5871 continue; 5872 } 5873 5874 HasLdsModifier |= Op.isLDS(); 5875 5876 // Handle tokens like 'offen' which are sometimes hard-coded into the 5877 // asm string. There are no MCInst operands for these. 5878 if (Op.isToken()) { 5879 continue; 5880 } 5881 assert(Op.isImm()); 5882 5883 // Handle optional arguments 5884 OptionalIdx[Op.getImmTy()] = i; 5885 } 5886 5887 // This is a workaround for an llvm quirk which may result in an 5888 // incorrect instruction selection. Lds and non-lds versions of 5889 // MUBUF instructions are identical except that lds versions 5890 // have mandatory 'lds' modifier. However this modifier follows 5891 // optional modifiers and llvm asm matcher regards this 'lds' 5892 // modifier as an optional one. As a result, an lds version 5893 // of opcode may be selected even if it has no 'lds' modifier. 5894 if (IsLdsOpcode && !HasLdsModifier) { 5895 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5896 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5897 Inst.setOpcode(NoLdsOpcode); 5898 IsLdsOpcode = false; 5899 } 5900 } 5901 5902 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5903 if (!IsAtomic) { // glc is hard-coded. 5904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5905 } 5906 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5907 5908 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5909 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5910 } 5911 5912 if (isGFX10()) 5913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5914 } 5915 5916 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5917 OptionalImmIndexMap OptionalIdx; 5918 5919 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5920 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5921 5922 // Add the register arguments 5923 if (Op.isReg()) { 5924 Op.addRegOperands(Inst, 1); 5925 continue; 5926 } 5927 5928 // Handle the case where soffset is an immediate 5929 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5930 Op.addImmOperands(Inst, 1); 5931 continue; 5932 } 5933 5934 // Handle tokens like 'offen' which are sometimes hard-coded into the 5935 // asm string. There are no MCInst operands for these. 5936 if (Op.isToken()) { 5937 continue; 5938 } 5939 assert(Op.isImm()); 5940 5941 // Handle optional arguments 5942 OptionalIdx[Op.getImmTy()] = i; 5943 } 5944 5945 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5946 AMDGPUOperand::ImmTyOffset); 5947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5951 5952 if (isGFX10()) 5953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5954 } 5955 5956 //===----------------------------------------------------------------------===// 5957 // mimg 5958 //===----------------------------------------------------------------------===// 5959 5960 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5961 bool IsAtomic) { 5962 unsigned I = 1; 5963 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5964 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5965 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5966 } 5967 5968 if (IsAtomic) { 5969 // Add src, same as dst 5970 assert(Desc.getNumDefs() == 1); 5971 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5972 } 5973 5974 OptionalImmIndexMap OptionalIdx; 5975 5976 for (unsigned E = Operands.size(); I != E; ++I) { 5977 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5978 5979 // Add the register arguments 5980 if (Op.isReg()) { 5981 Op.addRegOperands(Inst, 1); 5982 } else if (Op.isImmModifier()) { 5983 OptionalIdx[Op.getImmTy()] = I; 5984 } else if (!Op.isToken()) { 5985 llvm_unreachable("unexpected operand type"); 5986 } 5987 } 5988 5989 bool IsGFX10 = isGFX10(); 5990 5991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5992 if (IsGFX10) 5993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5995 if (IsGFX10) 5996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5997 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5998 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5999 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6000 if (IsGFX10) 6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6004 if (!IsGFX10) 6005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6007 } 6008 6009 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6010 cvtMIMG(Inst, Operands, true); 6011 } 6012 6013 //===----------------------------------------------------------------------===// 6014 // smrd 6015 //===----------------------------------------------------------------------===// 6016 6017 bool AMDGPUOperand::isSMRDOffset8() const { 6018 return isImm() && isUInt<8>(getImm()); 6019 } 6020 6021 bool AMDGPUOperand::isSMRDOffset20() const { 6022 return isImm() && isUInt<20>(getImm()); 6023 } 6024 6025 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6026 // 32-bit literals are only supported on CI and we only want to use them 6027 // when the offset is > 8-bits. 6028 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6029 } 6030 6031 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6032 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6033 } 6034 6035 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 6036 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6037 } 6038 6039 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6040 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6041 } 6042 6043 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6044 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6045 } 6046 6047 //===----------------------------------------------------------------------===// 6048 // vop3 6049 //===----------------------------------------------------------------------===// 6050 6051 static bool ConvertOmodMul(int64_t &Mul) { 6052 if (Mul != 1 && Mul != 2 && Mul != 4) 6053 return false; 6054 6055 Mul >>= 1; 6056 return true; 6057 } 6058 6059 static bool ConvertOmodDiv(int64_t &Div) { 6060 if (Div == 1) { 6061 Div = 0; 6062 return true; 6063 } 6064 6065 if (Div == 2) { 6066 Div = 3; 6067 return true; 6068 } 6069 6070 return false; 6071 } 6072 6073 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6074 if (BoundCtrl == 0) { 6075 BoundCtrl = 1; 6076 return true; 6077 } 6078 6079 if (BoundCtrl == -1) { 6080 BoundCtrl = 0; 6081 return true; 6082 } 6083 6084 return false; 6085 } 6086 6087 // Note: the order in this table matches the order of operands in AsmString. 6088 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6089 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6090 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6091 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6092 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6093 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6094 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6095 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6096 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6097 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6098 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6099 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6100 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6101 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6102 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6103 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6104 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6105 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6106 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6107 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6108 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6109 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6110 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6111 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6112 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6113 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6114 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6115 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6116 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6117 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6118 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6119 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6120 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6121 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6122 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6123 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6124 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6125 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6126 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6127 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6128 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6129 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6130 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6131 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6132 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6133 }; 6134 6135 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6136 6137 OperandMatchResultTy res = parseOptionalOpr(Operands); 6138 6139 // This is a hack to enable hardcoded mandatory operands which follow 6140 // optional operands. 6141 // 6142 // Current design assumes that all operands after the first optional operand 6143 // are also optional. However implementation of some instructions violates 6144 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6145 // 6146 // To alleviate this problem, we have to (implicitly) parse extra operands 6147 // to make sure autogenerated parser of custom operands never hit hardcoded 6148 // mandatory operands. 6149 6150 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6151 if (res != MatchOperand_Success || 6152 isToken(AsmToken::EndOfStatement)) 6153 break; 6154 6155 trySkipToken(AsmToken::Comma); 6156 res = parseOptionalOpr(Operands); 6157 } 6158 6159 return res; 6160 } 6161 6162 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6163 OperandMatchResultTy res; 6164 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6165 // try to parse any optional operand here 6166 if (Op.IsBit) { 6167 res = parseNamedBit(Op.Name, Operands, Op.Type); 6168 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6169 res = parseOModOperand(Operands); 6170 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6171 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6172 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6173 res = parseSDWASel(Operands, Op.Name, Op.Type); 6174 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6175 res = parseSDWADstUnused(Operands); 6176 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6177 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6178 Op.Type == AMDGPUOperand::ImmTyNegLo || 6179 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6180 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6181 Op.ConvertResult); 6182 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6183 res = parseDim(Operands); 6184 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6185 res = parseDfmtNfmt(Operands); 6186 } else { 6187 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6188 } 6189 if (res != MatchOperand_NoMatch) { 6190 return res; 6191 } 6192 } 6193 return MatchOperand_NoMatch; 6194 } 6195 6196 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6197 StringRef Name = Parser.getTok().getString(); 6198 if (Name == "mul") { 6199 return parseIntWithPrefix("mul", Operands, 6200 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6201 } 6202 6203 if (Name == "div") { 6204 return parseIntWithPrefix("div", Operands, 6205 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6206 } 6207 6208 return MatchOperand_NoMatch; 6209 } 6210 6211 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6212 cvtVOP3P(Inst, Operands); 6213 6214 int Opc = Inst.getOpcode(); 6215 6216 int SrcNum; 6217 const int Ops[] = { AMDGPU::OpName::src0, 6218 AMDGPU::OpName::src1, 6219 AMDGPU::OpName::src2 }; 6220 for (SrcNum = 0; 6221 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6222 ++SrcNum); 6223 assert(SrcNum > 0); 6224 6225 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6226 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6227 6228 if ((OpSel & (1 << SrcNum)) != 0) { 6229 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6230 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6231 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6232 } 6233 } 6234 6235 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6236 // 1. This operand is input modifiers 6237 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6238 // 2. This is not last operand 6239 && Desc.NumOperands > (OpNum + 1) 6240 // 3. Next operand is register class 6241 && Desc.OpInfo[OpNum + 1].RegClass != -1 6242 // 4. Next register is not tied to any other operand 6243 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6244 } 6245 6246 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6247 { 6248 OptionalImmIndexMap OptionalIdx; 6249 unsigned Opc = Inst.getOpcode(); 6250 6251 unsigned I = 1; 6252 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6253 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6254 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6255 } 6256 6257 for (unsigned E = Operands.size(); I != E; ++I) { 6258 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6259 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6260 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6261 } else if (Op.isInterpSlot() || 6262 Op.isInterpAttr() || 6263 Op.isAttrChan()) { 6264 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6265 } else if (Op.isImmModifier()) { 6266 OptionalIdx[Op.getImmTy()] = I; 6267 } else { 6268 llvm_unreachable("unhandled operand type"); 6269 } 6270 } 6271 6272 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6274 } 6275 6276 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6277 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6278 } 6279 6280 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6282 } 6283 } 6284 6285 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6286 OptionalImmIndexMap &OptionalIdx) { 6287 unsigned Opc = Inst.getOpcode(); 6288 6289 unsigned I = 1; 6290 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6291 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6292 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6293 } 6294 6295 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6296 // This instruction has src modifiers 6297 for (unsigned E = Operands.size(); I != E; ++I) { 6298 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6299 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6300 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6301 } else if (Op.isImmModifier()) { 6302 OptionalIdx[Op.getImmTy()] = I; 6303 } else if (Op.isRegOrImm()) { 6304 Op.addRegOrImmOperands(Inst, 1); 6305 } else { 6306 llvm_unreachable("unhandled operand type"); 6307 } 6308 } 6309 } else { 6310 // No src modifiers 6311 for (unsigned E = Operands.size(); I != E; ++I) { 6312 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6313 if (Op.isMod()) { 6314 OptionalIdx[Op.getImmTy()] = I; 6315 } else { 6316 Op.addRegOrImmOperands(Inst, 1); 6317 } 6318 } 6319 } 6320 6321 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6323 } 6324 6325 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6327 } 6328 6329 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6330 // it has src2 register operand that is tied to dst operand 6331 // we don't allow modifiers for this operand in assembler so src2_modifiers 6332 // should be 0. 6333 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6334 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6335 Opc == AMDGPU::V_MAC_F32_e64_vi || 6336 Opc == AMDGPU::V_MAC_F16_e64_vi || 6337 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6338 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6339 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6340 auto it = Inst.begin(); 6341 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6342 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6343 ++it; 6344 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6345 } 6346 } 6347 6348 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6349 OptionalImmIndexMap OptionalIdx; 6350 cvtVOP3(Inst, Operands, OptionalIdx); 6351 } 6352 6353 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6354 const OperandVector &Operands) { 6355 OptionalImmIndexMap OptIdx; 6356 const int Opc = Inst.getOpcode(); 6357 const MCInstrDesc &Desc = MII.get(Opc); 6358 6359 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6360 6361 cvtVOP3(Inst, Operands, OptIdx); 6362 6363 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6364 assert(!IsPacked); 6365 Inst.addOperand(Inst.getOperand(0)); 6366 } 6367 6368 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6369 // instruction, and then figure out where to actually put the modifiers 6370 6371 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6372 6373 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6374 if (OpSelHiIdx != -1) { 6375 int DefaultVal = IsPacked ? -1 : 0; 6376 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6377 DefaultVal); 6378 } 6379 6380 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6381 if (NegLoIdx != -1) { 6382 assert(IsPacked); 6383 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6384 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6385 } 6386 6387 const int Ops[] = { AMDGPU::OpName::src0, 6388 AMDGPU::OpName::src1, 6389 AMDGPU::OpName::src2 }; 6390 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6391 AMDGPU::OpName::src1_modifiers, 6392 AMDGPU::OpName::src2_modifiers }; 6393 6394 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6395 6396 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6397 unsigned OpSelHi = 0; 6398 unsigned NegLo = 0; 6399 unsigned NegHi = 0; 6400 6401 if (OpSelHiIdx != -1) { 6402 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6403 } 6404 6405 if (NegLoIdx != -1) { 6406 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6407 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6408 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6409 } 6410 6411 for (int J = 0; J < 3; ++J) { 6412 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6413 if (OpIdx == -1) 6414 break; 6415 6416 uint32_t ModVal = 0; 6417 6418 if ((OpSel & (1 << J)) != 0) 6419 ModVal |= SISrcMods::OP_SEL_0; 6420 6421 if ((OpSelHi & (1 << J)) != 0) 6422 ModVal |= SISrcMods::OP_SEL_1; 6423 6424 if ((NegLo & (1 << J)) != 0) 6425 ModVal |= SISrcMods::NEG; 6426 6427 if ((NegHi & (1 << J)) != 0) 6428 ModVal |= SISrcMods::NEG_HI; 6429 6430 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6431 6432 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6433 } 6434 } 6435 6436 //===----------------------------------------------------------------------===// 6437 // dpp 6438 //===----------------------------------------------------------------------===// 6439 6440 bool AMDGPUOperand::isDPP8() const { 6441 return isImmTy(ImmTyDPP8); 6442 } 6443 6444 bool AMDGPUOperand::isDPPCtrl() const { 6445 using namespace AMDGPU::DPP; 6446 6447 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6448 if (result) { 6449 int64_t Imm = getImm(); 6450 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6451 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6452 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6453 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6454 (Imm == DppCtrl::WAVE_SHL1) || 6455 (Imm == DppCtrl::WAVE_ROL1) || 6456 (Imm == DppCtrl::WAVE_SHR1) || 6457 (Imm == DppCtrl::WAVE_ROR1) || 6458 (Imm == DppCtrl::ROW_MIRROR) || 6459 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6460 (Imm == DppCtrl::BCAST15) || 6461 (Imm == DppCtrl::BCAST31) || 6462 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6463 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6464 } 6465 return false; 6466 } 6467 6468 //===----------------------------------------------------------------------===// 6469 // mAI 6470 //===----------------------------------------------------------------------===// 6471 6472 bool AMDGPUOperand::isBLGP() const { 6473 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6474 } 6475 6476 bool AMDGPUOperand::isCBSZ() const { 6477 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6478 } 6479 6480 bool AMDGPUOperand::isABID() const { 6481 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6482 } 6483 6484 bool AMDGPUOperand::isS16Imm() const { 6485 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6486 } 6487 6488 bool AMDGPUOperand::isU16Imm() const { 6489 return isImm() && isUInt<16>(getImm()); 6490 } 6491 6492 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6493 if (!isGFX10()) 6494 return MatchOperand_NoMatch; 6495 6496 SMLoc S = Parser.getTok().getLoc(); 6497 6498 if (getLexer().isNot(AsmToken::Identifier)) 6499 return MatchOperand_NoMatch; 6500 if (getLexer().getTok().getString() != "dim") 6501 return MatchOperand_NoMatch; 6502 6503 Parser.Lex(); 6504 if (getLexer().isNot(AsmToken::Colon)) 6505 return MatchOperand_ParseFail; 6506 6507 Parser.Lex(); 6508 6509 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6510 // integer. 6511 std::string Token; 6512 if (getLexer().is(AsmToken::Integer)) { 6513 SMLoc Loc = getLexer().getTok().getEndLoc(); 6514 Token = std::string(getLexer().getTok().getString()); 6515 Parser.Lex(); 6516 if (getLexer().getTok().getLoc() != Loc) 6517 return MatchOperand_ParseFail; 6518 } 6519 if (getLexer().isNot(AsmToken::Identifier)) 6520 return MatchOperand_ParseFail; 6521 Token += getLexer().getTok().getString(); 6522 6523 StringRef DimId = Token; 6524 if (DimId.startswith("SQ_RSRC_IMG_")) 6525 DimId = DimId.substr(12); 6526 6527 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6528 if (!DimInfo) 6529 return MatchOperand_ParseFail; 6530 6531 Parser.Lex(); 6532 6533 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6534 AMDGPUOperand::ImmTyDim)); 6535 return MatchOperand_Success; 6536 } 6537 6538 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6539 SMLoc S = Parser.getTok().getLoc(); 6540 StringRef Prefix; 6541 6542 if (getLexer().getKind() == AsmToken::Identifier) { 6543 Prefix = Parser.getTok().getString(); 6544 } else { 6545 return MatchOperand_NoMatch; 6546 } 6547 6548 if (Prefix != "dpp8") 6549 return parseDPPCtrl(Operands); 6550 if (!isGFX10()) 6551 return MatchOperand_NoMatch; 6552 6553 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6554 6555 int64_t Sels[8]; 6556 6557 Parser.Lex(); 6558 if (getLexer().isNot(AsmToken::Colon)) 6559 return MatchOperand_ParseFail; 6560 6561 Parser.Lex(); 6562 if (getLexer().isNot(AsmToken::LBrac)) 6563 return MatchOperand_ParseFail; 6564 6565 Parser.Lex(); 6566 if (getParser().parseAbsoluteExpression(Sels[0])) 6567 return MatchOperand_ParseFail; 6568 if (0 > Sels[0] || 7 < Sels[0]) 6569 return MatchOperand_ParseFail; 6570 6571 for (size_t i = 1; i < 8; ++i) { 6572 if (getLexer().isNot(AsmToken::Comma)) 6573 return MatchOperand_ParseFail; 6574 6575 Parser.Lex(); 6576 if (getParser().parseAbsoluteExpression(Sels[i])) 6577 return MatchOperand_ParseFail; 6578 if (0 > Sels[i] || 7 < Sels[i]) 6579 return MatchOperand_ParseFail; 6580 } 6581 6582 if (getLexer().isNot(AsmToken::RBrac)) 6583 return MatchOperand_ParseFail; 6584 Parser.Lex(); 6585 6586 unsigned DPP8 = 0; 6587 for (size_t i = 0; i < 8; ++i) 6588 DPP8 |= (Sels[i] << (i * 3)); 6589 6590 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6591 return MatchOperand_Success; 6592 } 6593 6594 OperandMatchResultTy 6595 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6596 using namespace AMDGPU::DPP; 6597 6598 SMLoc S = Parser.getTok().getLoc(); 6599 StringRef Prefix; 6600 int64_t Int; 6601 6602 if (getLexer().getKind() == AsmToken::Identifier) { 6603 Prefix = Parser.getTok().getString(); 6604 } else { 6605 return MatchOperand_NoMatch; 6606 } 6607 6608 if (Prefix == "row_mirror") { 6609 Int = DppCtrl::ROW_MIRROR; 6610 Parser.Lex(); 6611 } else if (Prefix == "row_half_mirror") { 6612 Int = DppCtrl::ROW_HALF_MIRROR; 6613 Parser.Lex(); 6614 } else { 6615 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6616 if (Prefix != "quad_perm" 6617 && Prefix != "row_shl" 6618 && Prefix != "row_shr" 6619 && Prefix != "row_ror" 6620 && Prefix != "wave_shl" 6621 && Prefix != "wave_rol" 6622 && Prefix != "wave_shr" 6623 && Prefix != "wave_ror" 6624 && Prefix != "row_bcast" 6625 && Prefix != "row_share" 6626 && Prefix != "row_xmask") { 6627 return MatchOperand_NoMatch; 6628 } 6629 6630 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6631 return MatchOperand_NoMatch; 6632 6633 if (!isVI() && !isGFX9() && 6634 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6635 Prefix == "wave_rol" || Prefix == "wave_ror" || 6636 Prefix == "row_bcast")) 6637 return MatchOperand_NoMatch; 6638 6639 Parser.Lex(); 6640 if (getLexer().isNot(AsmToken::Colon)) 6641 return MatchOperand_ParseFail; 6642 6643 if (Prefix == "quad_perm") { 6644 // quad_perm:[%d,%d,%d,%d] 6645 Parser.Lex(); 6646 if (getLexer().isNot(AsmToken::LBrac)) 6647 return MatchOperand_ParseFail; 6648 Parser.Lex(); 6649 6650 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6651 return MatchOperand_ParseFail; 6652 6653 for (int i = 0; i < 3; ++i) { 6654 if (getLexer().isNot(AsmToken::Comma)) 6655 return MatchOperand_ParseFail; 6656 Parser.Lex(); 6657 6658 int64_t Temp; 6659 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6660 return MatchOperand_ParseFail; 6661 const int shift = i*2 + 2; 6662 Int += (Temp << shift); 6663 } 6664 6665 if (getLexer().isNot(AsmToken::RBrac)) 6666 return MatchOperand_ParseFail; 6667 Parser.Lex(); 6668 } else { 6669 // sel:%d 6670 Parser.Lex(); 6671 if (getParser().parseAbsoluteExpression(Int)) 6672 return MatchOperand_ParseFail; 6673 6674 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6675 Int |= DppCtrl::ROW_SHL0; 6676 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6677 Int |= DppCtrl::ROW_SHR0; 6678 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6679 Int |= DppCtrl::ROW_ROR0; 6680 } else if (Prefix == "wave_shl" && 1 == Int) { 6681 Int = DppCtrl::WAVE_SHL1; 6682 } else if (Prefix == "wave_rol" && 1 == Int) { 6683 Int = DppCtrl::WAVE_ROL1; 6684 } else if (Prefix == "wave_shr" && 1 == Int) { 6685 Int = DppCtrl::WAVE_SHR1; 6686 } else if (Prefix == "wave_ror" && 1 == Int) { 6687 Int = DppCtrl::WAVE_ROR1; 6688 } else if (Prefix == "row_bcast") { 6689 if (Int == 15) { 6690 Int = DppCtrl::BCAST15; 6691 } else if (Int == 31) { 6692 Int = DppCtrl::BCAST31; 6693 } else { 6694 return MatchOperand_ParseFail; 6695 } 6696 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6697 Int |= DppCtrl::ROW_SHARE_FIRST; 6698 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6699 Int |= DppCtrl::ROW_XMASK_FIRST; 6700 } else { 6701 return MatchOperand_ParseFail; 6702 } 6703 } 6704 } 6705 6706 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6707 return MatchOperand_Success; 6708 } 6709 6710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6711 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6712 } 6713 6714 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6715 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6716 } 6717 6718 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6719 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6720 } 6721 6722 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6723 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6724 } 6725 6726 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6727 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6728 } 6729 6730 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6731 OptionalImmIndexMap OptionalIdx; 6732 6733 unsigned I = 1; 6734 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6735 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6736 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6737 } 6738 6739 int Fi = 0; 6740 for (unsigned E = Operands.size(); I != E; ++I) { 6741 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6742 MCOI::TIED_TO); 6743 if (TiedTo != -1) { 6744 assert((unsigned)TiedTo < Inst.getNumOperands()); 6745 // handle tied old or src2 for MAC instructions 6746 Inst.addOperand(Inst.getOperand(TiedTo)); 6747 } 6748 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6749 // Add the register arguments 6750 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6751 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6752 // Skip it. 6753 continue; 6754 } 6755 6756 if (IsDPP8) { 6757 if (Op.isDPP8()) { 6758 Op.addImmOperands(Inst, 1); 6759 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6760 Op.addRegWithFPInputModsOperands(Inst, 2); 6761 } else if (Op.isFI()) { 6762 Fi = Op.getImm(); 6763 } else if (Op.isReg()) { 6764 Op.addRegOperands(Inst, 1); 6765 } else { 6766 llvm_unreachable("Invalid operand type"); 6767 } 6768 } else { 6769 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6770 Op.addRegWithFPInputModsOperands(Inst, 2); 6771 } else if (Op.isDPPCtrl()) { 6772 Op.addImmOperands(Inst, 1); 6773 } else if (Op.isImm()) { 6774 // Handle optional arguments 6775 OptionalIdx[Op.getImmTy()] = I; 6776 } else { 6777 llvm_unreachable("Invalid operand type"); 6778 } 6779 } 6780 } 6781 6782 if (IsDPP8) { 6783 using namespace llvm::AMDGPU::DPP; 6784 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6785 } else { 6786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6789 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6791 } 6792 } 6793 } 6794 6795 //===----------------------------------------------------------------------===// 6796 // sdwa 6797 //===----------------------------------------------------------------------===// 6798 6799 OperandMatchResultTy 6800 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6801 AMDGPUOperand::ImmTy Type) { 6802 using namespace llvm::AMDGPU::SDWA; 6803 6804 SMLoc S = Parser.getTok().getLoc(); 6805 StringRef Value; 6806 OperandMatchResultTy res; 6807 6808 res = parseStringWithPrefix(Prefix, Value); 6809 if (res != MatchOperand_Success) { 6810 return res; 6811 } 6812 6813 int64_t Int; 6814 Int = StringSwitch<int64_t>(Value) 6815 .Case("BYTE_0", SdwaSel::BYTE_0) 6816 .Case("BYTE_1", SdwaSel::BYTE_1) 6817 .Case("BYTE_2", SdwaSel::BYTE_2) 6818 .Case("BYTE_3", SdwaSel::BYTE_3) 6819 .Case("WORD_0", SdwaSel::WORD_0) 6820 .Case("WORD_1", SdwaSel::WORD_1) 6821 .Case("DWORD", SdwaSel::DWORD) 6822 .Default(0xffffffff); 6823 Parser.Lex(); // eat last token 6824 6825 if (Int == 0xffffffff) { 6826 return MatchOperand_ParseFail; 6827 } 6828 6829 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6830 return MatchOperand_Success; 6831 } 6832 6833 OperandMatchResultTy 6834 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6835 using namespace llvm::AMDGPU::SDWA; 6836 6837 SMLoc S = Parser.getTok().getLoc(); 6838 StringRef Value; 6839 OperandMatchResultTy res; 6840 6841 res = parseStringWithPrefix("dst_unused", Value); 6842 if (res != MatchOperand_Success) { 6843 return res; 6844 } 6845 6846 int64_t Int; 6847 Int = StringSwitch<int64_t>(Value) 6848 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6849 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6850 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6851 .Default(0xffffffff); 6852 Parser.Lex(); // eat last token 6853 6854 if (Int == 0xffffffff) { 6855 return MatchOperand_ParseFail; 6856 } 6857 6858 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6859 return MatchOperand_Success; 6860 } 6861 6862 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6863 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6864 } 6865 6866 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6867 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6868 } 6869 6870 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6871 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6872 } 6873 6874 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6875 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6876 } 6877 6878 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6879 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6880 } 6881 6882 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6883 uint64_t BasicInstType, 6884 bool SkipDstVcc, 6885 bool SkipSrcVcc) { 6886 using namespace llvm::AMDGPU::SDWA; 6887 6888 OptionalImmIndexMap OptionalIdx; 6889 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6890 bool SkippedVcc = false; 6891 6892 unsigned I = 1; 6893 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6894 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6895 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6896 } 6897 6898 for (unsigned E = Operands.size(); I != E; ++I) { 6899 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6900 if (SkipVcc && !SkippedVcc && Op.isReg() && 6901 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6902 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6903 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6904 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6905 // Skip VCC only if we didn't skip it on previous iteration. 6906 // Note that src0 and src1 occupy 2 slots each because of modifiers. 6907 if (BasicInstType == SIInstrFlags::VOP2 && 6908 ((SkipDstVcc && Inst.getNumOperands() == 1) || 6909 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 6910 SkippedVcc = true; 6911 continue; 6912 } else if (BasicInstType == SIInstrFlags::VOPC && 6913 Inst.getNumOperands() == 0) { 6914 SkippedVcc = true; 6915 continue; 6916 } 6917 } 6918 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6919 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6920 } else if (Op.isImm()) { 6921 // Handle optional arguments 6922 OptionalIdx[Op.getImmTy()] = I; 6923 } else { 6924 llvm_unreachable("Invalid operand type"); 6925 } 6926 SkippedVcc = false; 6927 } 6928 6929 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6930 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6931 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6932 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6933 switch (BasicInstType) { 6934 case SIInstrFlags::VOP1: 6935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6936 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6938 } 6939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6942 break; 6943 6944 case SIInstrFlags::VOP2: 6945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6946 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6948 } 6949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6953 break; 6954 6955 case SIInstrFlags::VOPC: 6956 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6959 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6960 break; 6961 6962 default: 6963 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6964 } 6965 } 6966 6967 // special case v_mac_{f16, f32}: 6968 // it has src2 register operand that is tied to dst operand 6969 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6970 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6971 auto it = Inst.begin(); 6972 std::advance( 6973 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6974 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6975 } 6976 } 6977 6978 //===----------------------------------------------------------------------===// 6979 // mAI 6980 //===----------------------------------------------------------------------===// 6981 6982 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6983 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6984 } 6985 6986 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6987 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6988 } 6989 6990 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6991 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6992 } 6993 6994 /// Force static initialization. 6995 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 6996 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6997 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6998 } 6999 7000 #define GET_REGISTER_MATCHER 7001 #define GET_MATCHER_IMPLEMENTATION 7002 #define GET_MNEMONIC_SPELL_CHECKER 7003 #include "AMDGPUGenAsmMatcher.inc" 7004 7005 // This fuction should be defined after auto-generated include so that we have 7006 // MatchClassKind enum defined 7007 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7008 unsigned Kind) { 7009 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7010 // But MatchInstructionImpl() expects to meet token and fails to validate 7011 // operand. This method checks if we are given immediate operand but expect to 7012 // get corresponding token. 7013 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7014 switch (Kind) { 7015 case MCK_addr64: 7016 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7017 case MCK_gds: 7018 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7019 case MCK_lds: 7020 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7021 case MCK_glc: 7022 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7023 case MCK_idxen: 7024 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7025 case MCK_offen: 7026 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7027 case MCK_SSrcB32: 7028 // When operands have expression values, they will return true for isToken, 7029 // because it is not possible to distinguish between a token and an 7030 // expression at parse time. MatchInstructionImpl() will always try to 7031 // match an operand as a token, when isToken returns true, and when the 7032 // name of the expression is not a valid token, the match will fail, 7033 // so we need to handle it here. 7034 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7035 case MCK_SSrcF32: 7036 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7037 case MCK_SoppBrTarget: 7038 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7039 case MCK_VReg32OrOff: 7040 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7041 case MCK_InterpSlot: 7042 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7043 case MCK_Attr: 7044 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7045 case MCK_AttrChan: 7046 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7047 case MCK_SReg_64: 7048 case MCK_SReg_64_XEXEC: 7049 // Null is defined as a 32-bit register but 7050 // it should also be enabled with 64-bit operands. 7051 // The following code enables it for SReg_64 operands 7052 // used as source and destination. Remaining source 7053 // operands are handled in isInlinableImm. 7054 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7055 default: 7056 return Match_InvalidOperand; 7057 } 7058 } 7059 7060 //===----------------------------------------------------------------------===// 7061 // endpgm 7062 //===----------------------------------------------------------------------===// 7063 7064 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7065 SMLoc S = Parser.getTok().getLoc(); 7066 int64_t Imm = 0; 7067 7068 if (!parseExpr(Imm)) { 7069 // The operand is optional, if not present default to 0 7070 Imm = 0; 7071 } 7072 7073 if (!isUInt<16>(Imm)) { 7074 Error(S, "expected a 16-bit value"); 7075 return MatchOperand_ParseFail; 7076 } 7077 7078 Operands.push_back( 7079 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7080 return MatchOperand_Success; 7081 } 7082 7083 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7084